Extractor rewrite
This commit is contained in:
44
Cargo.lock
generated
44
Cargo.lock
generated
@@ -2491,6 +2491,7 @@ dependencies = [
|
|||||||
"pile-config",
|
"pile-config",
|
||||||
"pile-dataset",
|
"pile-dataset",
|
||||||
"pile-toolbox",
|
"pile-toolbox",
|
||||||
|
"pile-value",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
@@ -2515,26 +2516,13 @@ dependencies = [
|
|||||||
name = "pile-dataset"
|
name = "pile-dataset"
|
||||||
version = "0.0.1"
|
version = "0.0.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
|
||||||
"aws-sdk-s3",
|
|
||||||
"axum",
|
"axum",
|
||||||
"blake3",
|
|
||||||
"chrono",
|
"chrono",
|
||||||
"epub",
|
|
||||||
"id3",
|
|
||||||
"image",
|
|
||||||
"itertools 0.14.0",
|
|
||||||
"kamadak-exif",
|
|
||||||
"mime",
|
|
||||||
"mime_guess",
|
|
||||||
"pdf",
|
|
||||||
"pdfium-render",
|
|
||||||
"pile-config",
|
"pile-config",
|
||||||
"pile-flac",
|
|
||||||
"pile-toolbox",
|
"pile-toolbox",
|
||||||
|
"pile-value",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"smartstring",
|
|
||||||
"tantivy",
|
"tantivy",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokio",
|
"tokio",
|
||||||
@@ -2543,7 +2531,6 @@ dependencies = [
|
|||||||
"tracing",
|
"tracing",
|
||||||
"utoipa",
|
"utoipa",
|
||||||
"utoipa-swagger-ui",
|
"utoipa-swagger-ui",
|
||||||
"walkdir",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2568,6 +2555,33 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pile-value"
|
||||||
|
version = "0.0.1"
|
||||||
|
dependencies = [
|
||||||
|
"async-trait",
|
||||||
|
"aws-sdk-s3",
|
||||||
|
"blake3",
|
||||||
|
"chrono",
|
||||||
|
"epub",
|
||||||
|
"id3",
|
||||||
|
"image",
|
||||||
|
"kamadak-exif",
|
||||||
|
"mime",
|
||||||
|
"mime_guess",
|
||||||
|
"pdf",
|
||||||
|
"pdfium-render",
|
||||||
|
"pile-config",
|
||||||
|
"pile-flac",
|
||||||
|
"serde_json",
|
||||||
|
"smartstring",
|
||||||
|
"tokio",
|
||||||
|
"tokio-stream",
|
||||||
|
"toml",
|
||||||
|
"tracing",
|
||||||
|
"walkdir",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pin-project-lite"
|
name = "pin-project-lite"
|
||||||
version = "0.2.16"
|
version = "0.2.16"
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ pile-toolbox = { path = "crates/pile-toolbox" }
|
|||||||
pile-config = { path = "crates/pile-config" }
|
pile-config = { path = "crates/pile-config" }
|
||||||
pile-flac = { path = "crates/pile-flac" }
|
pile-flac = { path = "crates/pile-flac" }
|
||||||
pile-dataset = { path = "crates/pile-dataset" }
|
pile-dataset = { path = "crates/pile-dataset" }
|
||||||
|
pile-value = { path = "crates/pile-value" }
|
||||||
|
|
||||||
# Clients & servers
|
# Clients & servers
|
||||||
tantivy = "0.25.0"
|
tantivy = "0.25.0"
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::{collections::HashMap, fmt::Debug, path::PathBuf};
|
use std::{collections::HashMap, fmt::Debug, path::PathBuf};
|
||||||
|
|
||||||
mod post;
|
|
||||||
pub use post::*;
|
|
||||||
|
|
||||||
mod misc;
|
mod misc;
|
||||||
pub use misc::*;
|
pub use misc::*;
|
||||||
|
|
||||||
@@ -40,10 +37,6 @@ pub struct DatasetConfig {
|
|||||||
|
|
||||||
/// Where to find this field
|
/// Where to find this field
|
||||||
pub source: HashMap<Label, Source>,
|
pub source: HashMap<Label, Source>,
|
||||||
|
|
||||||
/// How to post-process this field
|
|
||||||
#[serde(default)]
|
|
||||||
pub post: Vec<FieldSpecPost>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
@@ -100,10 +93,6 @@ pub struct FieldSpec {
|
|||||||
|
|
||||||
/// How to find this field in a data entry
|
/// How to find this field in a data entry
|
||||||
pub path: Vec<ObjectPath>,
|
pub path: Vec<ObjectPath>,
|
||||||
|
|
||||||
/// How to post-process this field
|
|
||||||
#[serde(default)]
|
|
||||||
pub post: Vec<FieldSpecPost>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
|
||||||
|
|||||||
@@ -1,18 +0,0 @@
|
|||||||
use serde::Deserialize;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
|
|
||||||
#[serde(untagged)]
|
|
||||||
pub enum FieldSpecPost {
|
|
||||||
TrimSuffix { trim_suffix: String },
|
|
||||||
TrimPrefix { trim_prefix: String },
|
|
||||||
SetCase { case: Case },
|
|
||||||
Join { join: String },
|
|
||||||
NotEmpty { notempty: bool },
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
|
|
||||||
#[serde(rename_all = "lowercase")]
|
|
||||||
pub enum Case {
|
|
||||||
Lower,
|
|
||||||
Upper,
|
|
||||||
}
|
|
||||||
@@ -10,37 +10,23 @@ workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
pile-config = { workspace = true }
|
pile-config = { workspace = true }
|
||||||
pile-toolbox = { workspace = true }
|
pile-toolbox = { workspace = true }
|
||||||
pile-flac = { workspace = true }
|
pile-value = { workspace = true }
|
||||||
|
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
itertools = { workspace = true }
|
|
||||||
walkdir = { workspace = true }
|
|
||||||
tantivy = { workspace = true }
|
tantivy = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
chrono = { workspace = true }
|
chrono = { workspace = true }
|
||||||
toml = { workspace = true }
|
toml = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
smartstring = { workspace = true }
|
|
||||||
blake3 = { workspace = true }
|
|
||||||
epub = { workspace = true }
|
|
||||||
kamadak-exif = { workspace = true }
|
|
||||||
pdf = { workspace = true }
|
|
||||||
pdfium-render = { workspace = true, optional = true }
|
|
||||||
image = { workspace = true, optional = true }
|
|
||||||
id3 = { workspace = true }
|
|
||||||
tokio = { workspace = true }
|
tokio = { workspace = true }
|
||||||
tokio-stream = { workspace = true }
|
tokio-stream = { workspace = true }
|
||||||
async-trait = { workspace = true }
|
|
||||||
aws-sdk-s3 = { workspace = true }
|
|
||||||
mime = { workspace = true }
|
|
||||||
mime_guess = { workspace = true }
|
|
||||||
serde = { workspace = true }
|
|
||||||
|
|
||||||
|
serde = { workspace = true, optional = true }
|
||||||
axum = { workspace = true, optional = true }
|
axum = { workspace = true, optional = true }
|
||||||
utoipa = { workspace = true, optional = true }
|
utoipa = { workspace = true, optional = true }
|
||||||
utoipa-swagger-ui = { workspace = true, optional = true }
|
utoipa-swagger-ui = { workspace = true, optional = true }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
pdfium = ["dep:pdfium-render", "dep:image"]
|
pdfium = ["pile-value/pdfium"]
|
||||||
axum = ["dep:axum", "dep:utoipa", "dep:utoipa-swagger-ui"]
|
axum = ["dep:axum", "dep:utoipa", "dep:utoipa-swagger-ui", "dep:serde"]
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use pile_config::{ConfigToml, Label, Source, objectpath::ObjectPath};
|
use pile_config::{ConfigToml, Label, Source, objectpath::ObjectPath};
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
|
use pile_value::{
|
||||||
|
source::{DataSource, DirDataSource, S3DataSource, misc::path_ts_earliest},
|
||||||
|
value::{Item, PileValue},
|
||||||
|
};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::{collections::HashMap, io::ErrorKind, path::PathBuf, sync::Arc, time::Instant};
|
use std::{collections::HashMap, io::ErrorKind, path::PathBuf, sync::Arc, time::Instant};
|
||||||
use tantivy::{Executor, Index, IndexWriter, TantivyError, collector::TopDocs};
|
use tantivy::{Executor, Index, IndexWriter, TantivyError, collector::TopDocs};
|
||||||
@@ -9,13 +13,7 @@ use tokio::task::JoinSet;
|
|||||||
use tokio_stream::{StreamExt, wrappers::ReceiverStream};
|
use tokio_stream::{StreamExt, wrappers::ReceiverStream};
|
||||||
use tracing::{debug, info, trace, warn};
|
use tracing::{debug, info, trace, warn};
|
||||||
|
|
||||||
use crate::{
|
use crate::index::{DbFtsIndex, FtsLookupResult};
|
||||||
DataSource, Item, PileValue,
|
|
||||||
extract::MetaExtractor,
|
|
||||||
index::{DbFtsIndex, FtsLookupResult},
|
|
||||||
path_ts_earliest,
|
|
||||||
source::{DirDataSource, S3DataSource},
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum DatasetError {
|
pub enum DatasetError {
|
||||||
@@ -183,11 +181,12 @@ impl Datasets {
|
|||||||
let Some(item) = self.get(source, key).await else {
|
let Some(item) = self.get(source, key).await else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
let extractor = MetaExtractor::new(&item);
|
|
||||||
let root = PileValue::ObjectExtractor(Arc::new(extractor));
|
let item = PileValue::Item(item);
|
||||||
let Some(value) = root.query(path).await? else {
|
let Some(value) = item.query(path).await? else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Some(value.to_json().await?))
|
Ok(Some(value.to_json().await?))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,165 +0,0 @@
|
|||||||
use pile_config::Label;
|
|
||||||
use std::{collections::HashMap, sync::Arc};
|
|
||||||
|
|
||||||
mod flac;
|
|
||||||
pub use flac::*;
|
|
||||||
|
|
||||||
mod id3;
|
|
||||||
pub use id3::*;
|
|
||||||
|
|
||||||
mod fs;
|
|
||||||
pub use fs::*;
|
|
||||||
|
|
||||||
mod epub;
|
|
||||||
pub use epub::*;
|
|
||||||
|
|
||||||
mod exif;
|
|
||||||
pub use exif::*;
|
|
||||||
|
|
||||||
mod pdf;
|
|
||||||
pub use pdf::*;
|
|
||||||
|
|
||||||
mod toml;
|
|
||||||
pub use toml::*;
|
|
||||||
|
|
||||||
mod map;
|
|
||||||
pub use map::*;
|
|
||||||
|
|
||||||
mod sidecar;
|
|
||||||
pub use sidecar::*;
|
|
||||||
|
|
||||||
use crate::{Item, PileValue};
|
|
||||||
|
|
||||||
/// An attachment that extracts metadata from an [Item].
|
|
||||||
///
|
|
||||||
/// Metadata is exposed as an immutable map of {label: value},
|
|
||||||
/// much like a json object.
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
pub trait ObjectExtractor: Send + Sync {
|
|
||||||
/// Get the field at `name` from `item`.
|
|
||||||
/// - returns `None` if `name` is not a valid field
|
|
||||||
/// - returns `Some(Null)` if `name` is not available
|
|
||||||
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error>;
|
|
||||||
|
|
||||||
/// Return all fields in this extractor.
|
|
||||||
/// `Self::field` must return [Some] for all these keys
|
|
||||||
/// and [None] for all others.
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error>;
|
|
||||||
|
|
||||||
/// Convert this to a JSON value.
|
|
||||||
async fn to_json(&self) -> Result<serde_json::Value, std::io::Error> {
|
|
||||||
let keys = self.fields().await?;
|
|
||||||
let mut map = serde_json::Map::new();
|
|
||||||
for k in &keys {
|
|
||||||
let v = match self.field(k).await? {
|
|
||||||
Some(x) => x,
|
|
||||||
None => continue,
|
|
||||||
};
|
|
||||||
map.insert(k.to_string(), Box::pin(v.to_json()).await?);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(serde_json::Value::Object(map))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An attachment that extracts metadata from an [Item].
|
|
||||||
///
|
|
||||||
/// Metadata is exposed as an immutable list of values.
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
pub trait ListExtractor: Send + Sync {
|
|
||||||
/// Get the item at index `idx`.
|
|
||||||
/// Indices start at zero, and must be consecutive.
|
|
||||||
/// - returns `None` if `idx` is out of range
|
|
||||||
/// - returns `Some(Null)` if `None` is at `idx`
|
|
||||||
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error>;
|
|
||||||
|
|
||||||
async fn len(&self) -> Result<usize, std::io::Error>;
|
|
||||||
|
|
||||||
async fn is_empty(&self) -> Result<bool, std::io::Error> {
|
|
||||||
Ok(self.len().await? == 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert this list to a JSON value.
|
|
||||||
async fn to_json(&self) -> Result<serde_json::Value, std::io::Error> {
|
|
||||||
let len = self.len().await?;
|
|
||||||
let mut list = Vec::with_capacity(len);
|
|
||||||
for i in 0..len {
|
|
||||||
#[expect(clippy::expect_used)]
|
|
||||||
let v = self
|
|
||||||
.get(i)
|
|
||||||
.await?
|
|
||||||
.expect("value must be present according to length");
|
|
||||||
list.push(Box::pin(v.to_json()).await?);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(serde_json::Value::Array(list))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct MetaExtractor {
|
|
||||||
inner: MapExtractor,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MetaExtractor {
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
pub fn new(item: &Item) -> Self {
|
|
||||||
let inner = MapExtractor {
|
|
||||||
inner: HashMap::from([
|
|
||||||
(
|
|
||||||
Label::new("flac").unwrap(),
|
|
||||||
crate::PileValue::ObjectExtractor(Arc::new(FlacExtractor::new(item))),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
Label::new("id3").unwrap(),
|
|
||||||
crate::PileValue::ObjectExtractor(Arc::new(Id3Extractor::new(item))),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
Label::new("fs").unwrap(),
|
|
||||||
crate::PileValue::ObjectExtractor(Arc::new(FsExtractor::new(item))),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
Label::new("epub").unwrap(),
|
|
||||||
crate::PileValue::ObjectExtractor(Arc::new(EpubExtractor::new(item))),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
Label::new("exif").unwrap(),
|
|
||||||
crate::PileValue::ObjectExtractor(Arc::new(ExifExtractor::new(item))),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
Label::new("pdf").unwrap(),
|
|
||||||
crate::PileValue::ObjectExtractor(Arc::new(PdfExtractor::new(item))),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
Label::new("toml").unwrap(),
|
|
||||||
crate::PileValue::ObjectExtractor(Arc::new(TomlExtractor::new(item))),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
Label::new("sidecar").unwrap(),
|
|
||||||
crate::PileValue::ObjectExtractor(Arc::new(SidecarExtractor::new(item))),
|
|
||||||
),
|
|
||||||
]),
|
|
||||||
};
|
|
||||||
|
|
||||||
Self { inner }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
impl ObjectExtractor for MetaExtractor {
|
|
||||||
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error> {
|
|
||||||
self.inner.field(name).await
|
|
||||||
}
|
|
||||||
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
|
||||||
return Ok(vec![
|
|
||||||
Label::new("flac").unwrap(),
|
|
||||||
Label::new("id3").unwrap(),
|
|
||||||
Label::new("fs").unwrap(),
|
|
||||||
Label::new("epub").unwrap(),
|
|
||||||
Label::new("exif").unwrap(),
|
|
||||||
Label::new("pdf").unwrap(),
|
|
||||||
Label::new("sidecar").unwrap(),
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,95 +0,0 @@
|
|||||||
use image::ImageFormat;
|
|
||||||
use pdfium_render::prelude::*;
|
|
||||||
use pile_config::Label;
|
|
||||||
use std::{
|
|
||||||
collections::HashMap,
|
|
||||||
io::{BufReader, Cursor},
|
|
||||||
sync::{Arc, OnceLock},
|
|
||||||
};
|
|
||||||
use tracing::trace;
|
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
|
||||||
|
|
||||||
pub struct PdfCoverExtractor {
|
|
||||||
item: Item,
|
|
||||||
output: OnceLock<HashMap<Label, PileValue>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PdfCoverExtractor {
|
|
||||||
pub fn new(item: &Item) -> Self {
|
|
||||||
Self {
|
|
||||||
item: item.clone(),
|
|
||||||
output: OnceLock::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
|
||||||
if let Some(x) = self.output.get() {
|
|
||||||
return Ok(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
let reader = SyncReadBridge::new_current(self.item.read().await?);
|
|
||||||
let cover = tokio::task::spawn_blocking(move || {
|
|
||||||
let mut bytes = Vec::new();
|
|
||||||
std::io::Read::read_to_end(&mut BufReader::new(reader), &mut bytes)?;
|
|
||||||
|
|
||||||
let pdfium = Pdfium::default();
|
|
||||||
|
|
||||||
let document = pdfium
|
|
||||||
.load_pdf_from_byte_slice(&bytes, None)
|
|
||||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
|
||||||
|
|
||||||
let render_config = PdfRenderConfig::new().set_target_width(1024);
|
|
||||||
|
|
||||||
let page = document
|
|
||||||
.pages()
|
|
||||||
.get(0)
|
|
||||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
|
||||||
|
|
||||||
let image = page
|
|
||||||
.render_with_config(&render_config)
|
|
||||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?
|
|
||||||
.as_image();
|
|
||||||
|
|
||||||
let mut png_bytes = Vec::new();
|
|
||||||
image
|
|
||||||
.write_to(&mut Cursor::new(&mut png_bytes), ImageFormat::Png)
|
|
||||||
.map_err(|e| std::io::Error::other(e.to_string()))?;
|
|
||||||
|
|
||||||
Ok::<_, std::io::Error>(png_bytes)
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.map_err(std::io::Error::other)?;
|
|
||||||
|
|
||||||
let output = match cover {
|
|
||||||
Ok(data) => {
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
let label = Label::new("cover").unwrap();
|
|
||||||
HashMap::from([(
|
|
||||||
label,
|
|
||||||
PileValue::Blob {
|
|
||||||
mime: mime::IMAGE_PNG,
|
|
||||||
bytes: Arc::new(data),
|
|
||||||
},
|
|
||||||
)])
|
|
||||||
}
|
|
||||||
Err(error) => {
|
|
||||||
trace!(message = "Could not render pdf cover", ?error, key = ?self.item.key());
|
|
||||||
HashMap::new()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
return Ok(self.output.get_or_init(|| output));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
impl ObjectExtractor for PdfCoverExtractor {
|
|
||||||
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name).cloned())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.keys().cloned().collect())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,9 +1,6 @@
|
|||||||
use itertools::Itertools;
|
use pile_config::{ConfigToml, DatasetFts, Label};
|
||||||
use pile_config::{Case, ConfigToml, DatasetFts, FieldSpecPost, Label};
|
use pile_value::value::{Item, PileValue};
|
||||||
use std::{
|
use std::{path::PathBuf, sync::LazyLock};
|
||||||
path::PathBuf,
|
|
||||||
sync::{Arc, LazyLock},
|
|
||||||
};
|
|
||||||
use tantivy::{
|
use tantivy::{
|
||||||
DocAddress, Index, ReloadPolicy, TantivyDocument, TantivyError,
|
DocAddress, Index, ReloadPolicy, TantivyDocument, TantivyError,
|
||||||
collector::Collector,
|
collector::Collector,
|
||||||
@@ -12,8 +9,6 @@ use tantivy::{
|
|||||||
};
|
};
|
||||||
use tracing::{debug, trace, warn};
|
use tracing::{debug, trace, warn};
|
||||||
|
|
||||||
use crate::{Item, PileValue, extract::MetaExtractor};
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct FtsLookupResult {
|
pub struct FtsLookupResult {
|
||||||
pub score: f32,
|
pub score: f32,
|
||||||
@@ -76,11 +71,11 @@ impl DbFtsIndex {
|
|||||||
doc.add_text(self.schema.get_field("_meta_source")?, item.source_name());
|
doc.add_text(self.schema.get_field("_meta_source")?, item.source_name());
|
||||||
doc.add_text(self.schema.get_field("_meta_key")?, key);
|
doc.add_text(self.schema.get_field("_meta_key")?, key);
|
||||||
|
|
||||||
let extractor = PileValue::ObjectExtractor(Arc::new(MetaExtractor::new(item)));
|
let item = PileValue::Item(item.clone());
|
||||||
|
|
||||||
let mut empty = true;
|
let mut empty = true;
|
||||||
for name in self.fts_cfg().fields.keys() {
|
for name in self.fts_cfg().fields.keys() {
|
||||||
let x = self.get_field(&extractor, name).await?;
|
let x = self.get_field(&item, name).await?;
|
||||||
|
|
||||||
let val = match x {
|
let val = match x {
|
||||||
Some(x) => x,
|
Some(x) => x,
|
||||||
@@ -135,13 +130,6 @@ impl DbFtsIndex {
|
|||||||
x => x.clone(),
|
x => x.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
for post in &field.post {
|
|
||||||
val = match apply(post, &val) {
|
|
||||||
Some(x) => x,
|
|
||||||
None => return Ok(None),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
val = match val {
|
val = match val {
|
||||||
PileValue::String(x) => return Ok(Some(x.to_string())),
|
PileValue::String(x) => return Ok(Some(x.to_string())),
|
||||||
@@ -186,6 +174,15 @@ impl DbFtsIndex {
|
|||||||
continue 'outer;
|
continue 'outer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PileValue::Item(_) => {
|
||||||
|
trace!(
|
||||||
|
message = "Skipping field, is item",
|
||||||
|
field = field_name.to_string(),
|
||||||
|
?path,
|
||||||
|
);
|
||||||
|
continue 'outer;
|
||||||
|
}
|
||||||
|
|
||||||
PileValue::ListExtractor(_) => {
|
PileValue::ListExtractor(_) => {
|
||||||
trace!(
|
trace!(
|
||||||
message = "Skipping field, is ListExtractor",
|
message = "Skipping field, is ListExtractor",
|
||||||
@@ -296,104 +293,3 @@ impl DbFtsIndex {
|
|||||||
return Ok(out);
|
return Ok(out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn apply(post: &FieldSpecPost, val: &PileValue) -> Option<PileValue> {
|
|
||||||
Some(match post {
|
|
||||||
FieldSpecPost::NotEmpty { notempty: false } => val.clone(),
|
|
||||||
FieldSpecPost::NotEmpty { notempty: true } => match val {
|
|
||||||
PileValue::Null => return None,
|
|
||||||
PileValue::String(x) if x.is_empty() => return None,
|
|
||||||
PileValue::Array(x) if x.is_empty() => return None,
|
|
||||||
x => x.clone(),
|
|
||||||
},
|
|
||||||
|
|
||||||
FieldSpecPost::SetCase { case: Case::Lower } => match val {
|
|
||||||
PileValue::Null => return None,
|
|
||||||
PileValue::U64(_) => return None,
|
|
||||||
PileValue::I64(_) => return None,
|
|
||||||
PileValue::Blob { .. } => return None,
|
|
||||||
PileValue::ObjectExtractor(_) => return None,
|
|
||||||
PileValue::ListExtractor(_) => return None,
|
|
||||||
PileValue::String(x) => PileValue::String(Arc::new(x.as_str().to_lowercase().into())),
|
|
||||||
|
|
||||||
PileValue::Array(x) => PileValue::Array(Arc::new(
|
|
||||||
x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?,
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
|
|
||||||
FieldSpecPost::SetCase { case: Case::Upper } => match val {
|
|
||||||
PileValue::Null => return None,
|
|
||||||
PileValue::U64(_) => return None,
|
|
||||||
PileValue::I64(_) => return None,
|
|
||||||
PileValue::Blob { .. } => return None,
|
|
||||||
PileValue::ObjectExtractor(_) => return None,
|
|
||||||
PileValue::ListExtractor(_) => return None,
|
|
||||||
PileValue::String(x) => PileValue::String(Arc::new(x.as_str().to_uppercase().into())),
|
|
||||||
|
|
||||||
PileValue::Array(x) => PileValue::Array(Arc::new(
|
|
||||||
x.iter()
|
|
||||||
.map(|x| apply(post, x))
|
|
||||||
.collect::<Option<Vec<_>>>()?,
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
|
|
||||||
FieldSpecPost::TrimSuffix { trim_suffix } => match val {
|
|
||||||
PileValue::Null => return None,
|
|
||||||
PileValue::U64(_) => return None,
|
|
||||||
PileValue::I64(_) => return None,
|
|
||||||
PileValue::Blob { .. } => return None,
|
|
||||||
PileValue::ObjectExtractor(_) => return None,
|
|
||||||
PileValue::ListExtractor(_) => return None,
|
|
||||||
|
|
||||||
PileValue::String(x) => PileValue::String(Arc::new(
|
|
||||||
x.strip_suffix(trim_suffix).unwrap_or(x.as_str()).into(),
|
|
||||||
)),
|
|
||||||
|
|
||||||
PileValue::Array(x) => PileValue::Array(Arc::new(
|
|
||||||
x.iter()
|
|
||||||
.map(|x| apply(post, x))
|
|
||||||
.collect::<Option<Vec<_>>>()?,
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
|
|
||||||
FieldSpecPost::TrimPrefix { trim_prefix } => match val {
|
|
||||||
PileValue::Null => return None,
|
|
||||||
PileValue::U64(_) => return None,
|
|
||||||
PileValue::I64(_) => return None,
|
|
||||||
PileValue::Blob { .. } => return None,
|
|
||||||
PileValue::ObjectExtractor(_) => return None,
|
|
||||||
PileValue::ListExtractor(_) => return None,
|
|
||||||
|
|
||||||
PileValue::String(x) => PileValue::String(Arc::new(
|
|
||||||
x.strip_prefix(trim_prefix).unwrap_or(x.as_str()).into(),
|
|
||||||
)),
|
|
||||||
|
|
||||||
PileValue::Array(x) => PileValue::Array(Arc::new(
|
|
||||||
x.iter()
|
|
||||||
.map(|x| apply(post, x))
|
|
||||||
.collect::<Option<Vec<_>>>()?,
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
|
|
||||||
FieldSpecPost::Join { join } => match val {
|
|
||||||
PileValue::Null => return None,
|
|
||||||
PileValue::U64(_) => return None,
|
|
||||||
PileValue::I64(_) => return None,
|
|
||||||
PileValue::Blob { .. } => return None,
|
|
||||||
PileValue::ObjectExtractor(_) => return None,
|
|
||||||
PileValue::ListExtractor(_) => return None,
|
|
||||||
|
|
||||||
PileValue::String(x) => PileValue::String(x.clone()),
|
|
||||||
|
|
||||||
PileValue::Array(x) => PileValue::String(Arc::new(
|
|
||||||
x.iter()
|
|
||||||
.map(|x| apply(post, x))
|
|
||||||
.map(|x| x.and_then(|x| x.as_str().map(|x| x.to_owned())))
|
|
||||||
.collect::<Option<Vec<_>>>()?
|
|
||||||
.into_iter()
|
|
||||||
.join(join)
|
|
||||||
.into(),
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,21 +1,7 @@
|
|||||||
mod traits;
|
|
||||||
pub use traits::*;
|
|
||||||
|
|
||||||
mod misc;
|
|
||||||
pub use misc::*;
|
|
||||||
|
|
||||||
mod dataset;
|
mod dataset;
|
||||||
pub use dataset::{Dataset, DatasetError, Datasets};
|
pub use dataset::{Dataset, DatasetError, Datasets};
|
||||||
|
|
||||||
mod item;
|
|
||||||
pub use item::*;
|
|
||||||
|
|
||||||
mod value;
|
|
||||||
pub use value::*;
|
|
||||||
|
|
||||||
pub mod extract;
|
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod source;
|
|
||||||
|
|
||||||
#[cfg(feature = "axum")]
|
#[cfg(feature = "axum")]
|
||||||
pub mod serve;
|
pub mod serve;
|
||||||
|
|||||||
@@ -5,12 +5,13 @@ use axum::{
|
|||||||
response::{IntoResponse, Response},
|
response::{IntoResponse, Response},
|
||||||
};
|
};
|
||||||
use pile_config::{Label, objectpath::ObjectPath};
|
use pile_config::{Label, objectpath::ObjectPath};
|
||||||
|
use pile_value::value::PileValue;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::{sync::Arc, time::Instant};
|
use std::{sync::Arc, time::Instant};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
use crate::{Datasets, PileValue, extract::MetaExtractor};
|
use crate::Datasets;
|
||||||
|
|
||||||
#[derive(Deserialize, ToSchema)]
|
#[derive(Deserialize, ToSchema)]
|
||||||
pub struct FieldQuery {
|
pub struct FieldQuery {
|
||||||
@@ -61,10 +62,8 @@ pub async fn get_field(
|
|||||||
return StatusCode::NOT_FOUND.into_response();
|
return StatusCode::NOT_FOUND.into_response();
|
||||||
};
|
};
|
||||||
|
|
||||||
let extractor = MetaExtractor::new(&item);
|
let item = PileValue::Item(item);
|
||||||
let root: PileValue = PileValue::ObjectExtractor(Arc::new(extractor));
|
let value = match item.query(&path).await {
|
||||||
|
|
||||||
let value = match root.query(&path).await {
|
|
||||||
Ok(Some(v)) => v,
|
Ok(Some(v)) => v,
|
||||||
Ok(None) => return StatusCode::NOT_FOUND.into_response(),
|
Ok(None) => return StatusCode::NOT_FOUND.into_response(),
|
||||||
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("{e:?}")).into_response(),
|
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("{e:?}")).into_response(),
|
||||||
|
|||||||
@@ -4,12 +4,13 @@ use axum::{
|
|||||||
response::{IntoResponse, Response},
|
response::{IntoResponse, Response},
|
||||||
};
|
};
|
||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
|
use pile_value::value::AsyncReader;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::{sync::Arc, time::Instant};
|
use std::{sync::Arc, time::Instant};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
use crate::{AsyncReader, Datasets};
|
use crate::Datasets;
|
||||||
|
|
||||||
#[derive(Deserialize, ToSchema)]
|
#[derive(Deserialize, ToSchema)]
|
||||||
pub struct ItemQuery {
|
pub struct ItemQuery {
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
mod dir;
|
|
||||||
pub use dir::*;
|
|
||||||
|
|
||||||
mod s3;
|
|
||||||
pub use s3::*;
|
|
||||||
36
crates/pile-value/Cargo.toml
Normal file
36
crates/pile-value/Cargo.toml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
[package]
|
||||||
|
name = "pile-value"
|
||||||
|
version = { workspace = true }
|
||||||
|
rust-version = { workspace = true }
|
||||||
|
edition = { workspace = true }
|
||||||
|
|
||||||
|
[lints]
|
||||||
|
workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
pile-config = { workspace = true }
|
||||||
|
pile-flac = { workspace = true }
|
||||||
|
|
||||||
|
serde_json = { workspace = true }
|
||||||
|
walkdir = { workspace = true }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
chrono = { workspace = true }
|
||||||
|
toml = { workspace = true }
|
||||||
|
smartstring = { workspace = true }
|
||||||
|
blake3 = { workspace = true }
|
||||||
|
epub = { workspace = true }
|
||||||
|
kamadak-exif = { workspace = true }
|
||||||
|
pdf = { workspace = true }
|
||||||
|
pdfium-render = { workspace = true, optional = true }
|
||||||
|
image = { workspace = true, optional = true }
|
||||||
|
id3 = { workspace = true }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
tokio-stream = { workspace = true }
|
||||||
|
async-trait = { workspace = true }
|
||||||
|
aws-sdk-s3 = { workspace = true }
|
||||||
|
mime = { workspace = true }
|
||||||
|
mime_guess = { workspace = true }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
pdfium = ["dep:pdfium-render", "dep:image"]
|
||||||
@@ -6,7 +6,10 @@ use std::{
|
|||||||
};
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{
|
||||||
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue, SyncReadBridge},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct EpubMetaExtractor {
|
pub struct EpubMetaExtractor {
|
||||||
item: Item,
|
item: Item,
|
||||||
@@ -6,7 +6,10 @@ use std::{
|
|||||||
};
|
};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{
|
||||||
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue, SyncReadBridge},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct EpubTextExtractor {
|
pub struct EpubTextExtractor {
|
||||||
item: Item,
|
item: Item,
|
||||||
@@ -7,7 +7,10 @@ pub use epub_meta::*;
|
|||||||
mod epub_text;
|
mod epub_text;
|
||||||
pub use epub_text::*;
|
pub use epub_text::*;
|
||||||
|
|
||||||
use crate::{Item, PileValue, extract::ObjectExtractor};
|
use crate::{
|
||||||
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct EpubExtractor {
|
pub struct EpubExtractor {
|
||||||
text: Arc<EpubTextExtractor>,
|
text: Arc<EpubTextExtractor>,
|
||||||
@@ -6,7 +6,10 @@ use std::{
|
|||||||
};
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{
|
||||||
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue, SyncReadBridge},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct ExifExtractor {
|
pub struct ExifExtractor {
|
||||||
item: Item,
|
item: Item,
|
||||||
@@ -8,8 +8,8 @@ use std::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
Item, PileValue, SyncReadBridge,
|
extract::traits::{ListExtractor, ObjectExtractor},
|
||||||
extract::{ListExtractor, ObjectExtractor},
|
value::{Item, PileValue, SyncReadBridge},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct FlacImagesExtractor {
|
pub struct FlacImagesExtractor {
|
||||||
@@ -5,7 +5,10 @@ use std::{
|
|||||||
sync::{Arc, OnceLock},
|
sync::{Arc, OnceLock},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{Item, PileValue, extract::ObjectExtractor};
|
use crate::{
|
||||||
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct FsExtractor {
|
pub struct FsExtractor {
|
||||||
item: Item,
|
item: Item,
|
||||||
@@ -7,7 +7,10 @@ use std::{
|
|||||||
sync::{Arc, OnceLock},
|
sync::{Arc, OnceLock},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{
|
||||||
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue, SyncReadBridge},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct Id3Extractor {
|
pub struct Id3Extractor {
|
||||||
item: Item,
|
item: Item,
|
||||||
99
crates/pile-value/src/extract/item/mod.rs
Normal file
99
crates/pile-value/src/extract/item/mod.rs
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
mod flac;
|
||||||
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
|
pub use flac::*;
|
||||||
|
|
||||||
|
mod id3;
|
||||||
|
pub use id3::*;
|
||||||
|
|
||||||
|
mod fs;
|
||||||
|
pub use fs::*;
|
||||||
|
|
||||||
|
mod epub;
|
||||||
|
pub use epub::*;
|
||||||
|
|
||||||
|
mod exif;
|
||||||
|
pub use exif::*;
|
||||||
|
|
||||||
|
mod pdf;
|
||||||
|
pub use pdf::*;
|
||||||
|
|
||||||
|
mod toml;
|
||||||
|
use pile_config::Label;
|
||||||
|
pub use toml::*;
|
||||||
|
|
||||||
|
mod sidecar;
|
||||||
|
pub use sidecar::*;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
extract::{misc::MapExtractor, traits::ObjectExtractor},
|
||||||
|
value::{Item, PileValue},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct ItemExtractor {
|
||||||
|
inner: MapExtractor,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ItemExtractor {
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
|
pub fn new(item: &Item) -> Self {
|
||||||
|
let inner = MapExtractor {
|
||||||
|
inner: HashMap::from([
|
||||||
|
(
|
||||||
|
Label::new("flac").unwrap(),
|
||||||
|
PileValue::ObjectExtractor(Arc::new(FlacExtractor::new(item))),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Label::new("id3").unwrap(),
|
||||||
|
PileValue::ObjectExtractor(Arc::new(Id3Extractor::new(item))),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Label::new("fs").unwrap(),
|
||||||
|
PileValue::ObjectExtractor(Arc::new(FsExtractor::new(item))),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Label::new("epub").unwrap(),
|
||||||
|
PileValue::ObjectExtractor(Arc::new(EpubExtractor::new(item))),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Label::new("exif").unwrap(),
|
||||||
|
PileValue::ObjectExtractor(Arc::new(ExifExtractor::new(item))),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Label::new("pdf").unwrap(),
|
||||||
|
PileValue::ObjectExtractor(Arc::new(PdfExtractor::new(item))),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Label::new("toml").unwrap(),
|
||||||
|
PileValue::ObjectExtractor(Arc::new(TomlExtractor::new(item))),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Label::new("sidecar").unwrap(),
|
||||||
|
PileValue::ObjectExtractor(Arc::new(SidecarExtractor::new(item))),
|
||||||
|
),
|
||||||
|
]),
|
||||||
|
};
|
||||||
|
|
||||||
|
Self { inner }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl ObjectExtractor for ItemExtractor {
|
||||||
|
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
|
self.inner.field(name).await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
return Ok(vec![
|
||||||
|
Label::new("flac").unwrap(),
|
||||||
|
Label::new("id3").unwrap(),
|
||||||
|
Label::new("fs").unwrap(),
|
||||||
|
Label::new("epub").unwrap(),
|
||||||
|
Label::new("exif").unwrap(),
|
||||||
|
Label::new("pdf").unwrap(),
|
||||||
|
Label::new("sidecar").unwrap(),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,11 +1,6 @@
|
|||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
#[cfg(feature = "pdfium")]
|
|
||||||
mod pdf_cover;
|
|
||||||
#[cfg(feature = "pdfium")]
|
|
||||||
pub use pdf_cover::*;
|
|
||||||
|
|
||||||
#[cfg(feature = "pdfium")]
|
#[cfg(feature = "pdfium")]
|
||||||
mod pdf_pages;
|
mod pdf_pages;
|
||||||
#[cfg(feature = "pdfium")]
|
#[cfg(feature = "pdfium")]
|
||||||
@@ -17,14 +12,15 @@ pub use pdf_meta::*;
|
|||||||
mod pdf_text;
|
mod pdf_text;
|
||||||
pub use pdf_text::*;
|
pub use pdf_text::*;
|
||||||
|
|
||||||
use crate::{Item, PileValue, extract::ObjectExtractor};
|
use crate::{
|
||||||
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct PdfExtractor {
|
pub struct PdfExtractor {
|
||||||
text: Arc<PdfTextExtractor>,
|
text: Arc<PdfTextExtractor>,
|
||||||
meta: Arc<PdfMetaExtractor>,
|
meta: Arc<PdfMetaExtractor>,
|
||||||
#[cfg(feature = "pdfium")]
|
#[cfg(feature = "pdfium")]
|
||||||
cover: Arc<PdfCoverExtractor>,
|
|
||||||
#[cfg(feature = "pdfium")]
|
|
||||||
pages: Arc<PdfPagesExtractor>,
|
pages: Arc<PdfPagesExtractor>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,8 +30,6 @@ impl PdfExtractor {
|
|||||||
text: Arc::new(PdfTextExtractor::new(item)),
|
text: Arc::new(PdfTextExtractor::new(item)),
|
||||||
meta: Arc::new(PdfMetaExtractor::new(item)),
|
meta: Arc::new(PdfMetaExtractor::new(item)),
|
||||||
#[cfg(feature = "pdfium")]
|
#[cfg(feature = "pdfium")]
|
||||||
cover: Arc::new(PdfCoverExtractor::new(item)),
|
|
||||||
#[cfg(feature = "pdfium")]
|
|
||||||
pages: Arc::new(PdfPagesExtractor::new(item)),
|
pages: Arc::new(PdfPagesExtractor::new(item)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -48,8 +42,6 @@ impl ObjectExtractor for PdfExtractor {
|
|||||||
"text" => self.text.field(name).await,
|
"text" => self.text.field(name).await,
|
||||||
"meta" => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
|
"meta" => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
|
||||||
#[cfg(feature = "pdfium")]
|
#[cfg(feature = "pdfium")]
|
||||||
"cover" => self.cover.field(name).await,
|
|
||||||
#[cfg(feature = "pdfium")]
|
|
||||||
"pages" => Ok(Some(PileValue::ListExtractor(self.pages.clone()))),
|
"pages" => Ok(Some(PileValue::ListExtractor(self.pages.clone()))),
|
||||||
_ => Ok(None),
|
_ => Ok(None),
|
||||||
}
|
}
|
||||||
@@ -8,8 +8,10 @@ use std::{
|
|||||||
};
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::extract::ObjectExtractor;
|
use crate::{
|
||||||
use crate::{Item, PileValue, SyncReadBridge};
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue, SyncReadBridge},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct PdfMetaExtractor {
|
pub struct PdfMetaExtractor {
|
||||||
item: Item,
|
item: Item,
|
||||||
@@ -6,7 +6,10 @@ use std::{
|
|||||||
};
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ListExtractor};
|
use crate::{
|
||||||
|
extract::traits::ListExtractor,
|
||||||
|
value::{Item, PileValue, SyncReadBridge},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct PdfPagesExtractor {
|
pub struct PdfPagesExtractor {
|
||||||
item: Item,
|
item: Item,
|
||||||
@@ -8,8 +8,10 @@ use std::{
|
|||||||
};
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::extract::ObjectExtractor;
|
use crate::{
|
||||||
use crate::{Item, PileValue, SyncReadBridge};
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{Item, PileValue, SyncReadBridge},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct PdfTextExtractor {
|
pub struct PdfTextExtractor {
|
||||||
item: Item,
|
item: Item,
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::sync::OnceLock;
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
|
use super::TomlExtractor;
|
||||||
use crate::{
|
use crate::{
|
||||||
Item, PileValue,
|
extract::traits::ObjectExtractor,
|
||||||
extract::{ObjectExtractor, TomlExtractor},
|
value::{Item, PileValue},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct SidecarExtractor {
|
pub struct SidecarExtractor {
|
||||||
@@ -4,7 +4,10 @@ use std::{
|
|||||||
sync::{Arc, OnceLock},
|
sync::{Arc, OnceLock},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{AsyncReader, Item, PileValue, extract::ObjectExtractor};
|
use crate::{
|
||||||
|
extract::traits::ObjectExtractor,
|
||||||
|
value::{AsyncReader, Item, PileValue},
|
||||||
|
};
|
||||||
|
|
||||||
fn toml_to_pile(value: toml::Value) -> PileValue {
|
fn toml_to_pile(value: toml::Value) -> PileValue {
|
||||||
match value {
|
match value {
|
||||||
24
crates/pile-value/src/extract/misc/list.rs
Normal file
24
crates/pile-value/src/extract/misc/list.rs
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::{extract::traits::ListExtractor, value::PileValue};
|
||||||
|
|
||||||
|
pub struct ArrayExtractor {
|
||||||
|
inner: Arc<Vec<PileValue>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ArrayExtractor {
|
||||||
|
pub fn new(inner: Arc<Vec<PileValue>>) -> Self {
|
||||||
|
Self { inner }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl ListExtractor for ArrayExtractor {
|
||||||
|
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
|
Ok(self.inner.get(idx).cloned())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||||
|
Ok(self.inner.len())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,10 +1,11 @@
|
|||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::{PileValue, extract::ObjectExtractor};
|
use crate::{extract::traits::ObjectExtractor, value::PileValue};
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
pub struct MapExtractor {
|
pub struct MapExtractor {
|
||||||
pub(crate) inner: HashMap<Label, PileValue>,
|
pub inner: HashMap<Label, PileValue>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
8
crates/pile-value/src/extract/misc/mod.rs
Normal file
8
crates/pile-value/src/extract/misc/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
mod list;
|
||||||
|
pub use list::*;
|
||||||
|
|
||||||
|
mod vec;
|
||||||
|
pub use vec::*;
|
||||||
|
|
||||||
|
mod map;
|
||||||
|
pub use map::*;
|
||||||
17
crates/pile-value/src/extract/misc/vec.rs
Normal file
17
crates/pile-value/src/extract/misc/vec.rs
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
use crate::{extract::traits::ListExtractor, value::PileValue};
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct VecExtractor {
|
||||||
|
pub inner: Vec<PileValue>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl ListExtractor for VecExtractor {
|
||||||
|
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
|
Ok(self.inner.get(idx).cloned())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||||
|
Ok(self.inner.len())
|
||||||
|
}
|
||||||
|
}
|
||||||
4
crates/pile-value/src/extract/mod.rs
Normal file
4
crates/pile-value/src/extract/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
pub mod item;
|
||||||
|
pub mod misc;
|
||||||
|
pub mod string;
|
||||||
|
pub mod traits;
|
||||||
51
crates/pile-value/src/extract/string.rs
Normal file
51
crates/pile-value/src/extract/string.rs
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
use pile_config::Label;
|
||||||
|
use smartstring::{LazyCompact, SmartString};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::{extract::traits::ObjectExtractor, value::PileValue};
|
||||||
|
|
||||||
|
pub struct StringExtractor {
|
||||||
|
item: Arc<SmartString<LazyCompact>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringExtractor {
|
||||||
|
pub fn new(item: &Arc<SmartString<LazyCompact>>) -> Self {
|
||||||
|
Self { item: item.clone() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl ObjectExtractor for StringExtractor {
|
||||||
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
|
Ok(match name.as_str() {
|
||||||
|
"trim" => Some(PileValue::String(Arc::new(
|
||||||
|
self.item.as_str().trim().into(),
|
||||||
|
))),
|
||||||
|
|
||||||
|
"upper" => Some(PileValue::String(Arc::new(
|
||||||
|
self.item.as_str().to_lowercase().into(),
|
||||||
|
))),
|
||||||
|
|
||||||
|
"lower" => Some(PileValue::String(Arc::new(
|
||||||
|
self.item.as_str().to_uppercase().into(),
|
||||||
|
))),
|
||||||
|
|
||||||
|
"nonempty" => Some(match self.item.is_empty() {
|
||||||
|
true => PileValue::Null,
|
||||||
|
false => PileValue::String(self.item.clone()),
|
||||||
|
}),
|
||||||
|
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
return Ok(vec![
|
||||||
|
Label::new("trim").unwrap(),
|
||||||
|
Label::new("upper").unwrap(),
|
||||||
|
Label::new("lower").unwrap(),
|
||||||
|
Label::new("nonempty").unwrap(),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
}
|
||||||
68
crates/pile-value/src/extract/traits.rs
Normal file
68
crates/pile-value/src/extract/traits.rs
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
/// An attachment that extracts metadata from an [Item].
|
||||||
|
///
|
||||||
|
/// Metadata is exposed as an immutable map of {label: value},
|
||||||
|
/// much like a json object.
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
pub trait ObjectExtractor: Send + Sync {
|
||||||
|
/// Get the field at `name` from `item`.
|
||||||
|
/// - returns `None` if `name` is not a valid field
|
||||||
|
/// - returns `Some(Null)` if `name` is not available
|
||||||
|
async fn field(
|
||||||
|
&self,
|
||||||
|
name: &pile_config::Label,
|
||||||
|
) -> Result<Option<crate::value::PileValue>, std::io::Error>;
|
||||||
|
|
||||||
|
/// Return all fields in this extractor.
|
||||||
|
/// `Self::field` must return [Some] for all these keys
|
||||||
|
/// and [None] for all others.
|
||||||
|
async fn fields(&self) -> Result<Vec<pile_config::Label>, std::io::Error>;
|
||||||
|
|
||||||
|
/// Convert this to a JSON value.
|
||||||
|
async fn to_json(&self) -> Result<serde_json::Value, std::io::Error> {
|
||||||
|
let keys = self.fields().await?;
|
||||||
|
let mut map = serde_json::Map::new();
|
||||||
|
for k in &keys {
|
||||||
|
let v = match self.field(k).await? {
|
||||||
|
Some(x) => x,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
map.insert(k.to_string(), Box::pin(v.to_json()).await?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(serde_json::Value::Object(map))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An attachment that extracts metadata from an [Item].
|
||||||
|
///
|
||||||
|
/// Metadata is exposed as an immutable list of values.
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
pub trait ListExtractor: Send + Sync {
|
||||||
|
/// Get the item at index `idx`.
|
||||||
|
/// Indices start at zero, and must be consecutive.
|
||||||
|
/// - returns `None` if `idx` is out of range
|
||||||
|
/// - returns `Some(Null)` if `None` is at `idx`
|
||||||
|
async fn get(&self, idx: usize) -> Result<Option<crate::value::PileValue>, std::io::Error>;
|
||||||
|
|
||||||
|
async fn len(&self) -> Result<usize, std::io::Error>;
|
||||||
|
|
||||||
|
async fn is_empty(&self) -> Result<bool, std::io::Error> {
|
||||||
|
Ok(self.len().await? == 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert this list to a JSON value.
|
||||||
|
async fn to_json(&self) -> Result<serde_json::Value, std::io::Error> {
|
||||||
|
let len = self.len().await?;
|
||||||
|
let mut list = Vec::with_capacity(len);
|
||||||
|
for i in 0..len {
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
|
let v = self
|
||||||
|
.get(i)
|
||||||
|
.await?
|
||||||
|
.expect("value must be present according to length");
|
||||||
|
list.push(Box::pin(v.to_json()).await?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(serde_json::Value::Array(list))
|
||||||
|
}
|
||||||
|
}
|
||||||
3
crates/pile-value/src/lib.rs
Normal file
3
crates/pile-value/src/lib.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
pub mod extract;
|
||||||
|
pub mod source;
|
||||||
|
pub mod value;
|
||||||
@@ -4,7 +4,10 @@ use std::{path::PathBuf, sync::Arc};
|
|||||||
use tokio_stream::wrappers::ReceiverStream;
|
use tokio_stream::wrappers::ReceiverStream;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
use crate::{DataSource, Item, path_ts_latest};
|
use crate::{
|
||||||
|
source::{DataSource, misc::path_ts_latest},
|
||||||
|
value::Item,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct DirDataSource {
|
pub struct DirDataSource {
|
||||||
@@ -1,15 +1,24 @@
|
|||||||
|
mod dir;
|
||||||
|
pub use dir::*;
|
||||||
|
|
||||||
|
mod s3;
|
||||||
|
pub use s3::*;
|
||||||
|
|
||||||
|
pub mod misc;
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use tokio_stream::wrappers::ReceiverStream;
|
use tokio_stream::wrappers::ReceiverStream;
|
||||||
|
|
||||||
use crate::Item;
|
|
||||||
|
|
||||||
/// A read-only set of [Item]s.
|
/// A read-only set of [Item]s.
|
||||||
pub trait DataSource {
|
pub trait DataSource {
|
||||||
/// Get an item from this datasource
|
/// Get an item from this datasource
|
||||||
fn get(&self, key: &str) -> impl Future<Output = Result<Option<Item>, std::io::Error>> + Send;
|
fn get(
|
||||||
|
&self,
|
||||||
|
key: &str,
|
||||||
|
) -> impl Future<Output = Result<Option<crate::value::Item>, std::io::Error>> + Send;
|
||||||
|
|
||||||
/// Iterate over all items in this source in an arbitrary order
|
/// Iterate over all items in this source in an arbitrary order
|
||||||
fn iter(&self) -> ReceiverStream<Result<Item, std::io::Error>>;
|
fn iter(&self) -> ReceiverStream<Result<crate::value::Item, std::io::Error>>;
|
||||||
|
|
||||||
/// Return the time of the latest change to the data in this source
|
/// Return the time of the latest change to the data in this source
|
||||||
fn latest_change(
|
fn latest_change(
|
||||||
@@ -5,7 +5,7 @@ use smartstring::{LazyCompact, SmartString};
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio_stream::wrappers::ReceiverStream;
|
use tokio_stream::wrappers::ReceiverStream;
|
||||||
|
|
||||||
use crate::{DataSource, Item};
|
use crate::{source::DataSource, value::Item};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct S3DataSource {
|
pub struct S3DataSource {
|
||||||
105
crates/pile-value/src/value/item.rs
Normal file
105
crates/pile-value/src/value/item.rs
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
use mime::Mime;
|
||||||
|
use smartstring::{LazyCompact, SmartString};
|
||||||
|
use std::{fs::File, path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
source::{DirDataSource, S3DataSource},
|
||||||
|
value::{ItemReader, S3Reader},
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// MARK: item
|
||||||
|
//
|
||||||
|
|
||||||
|
/// A cheaply-clonable pointer to an item in a dataset
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Item {
|
||||||
|
File {
|
||||||
|
source: Arc<DirDataSource>,
|
||||||
|
mime: Mime,
|
||||||
|
|
||||||
|
path: PathBuf,
|
||||||
|
sidecar: Option<Box<Item>>,
|
||||||
|
},
|
||||||
|
|
||||||
|
S3 {
|
||||||
|
source: Arc<S3DataSource>,
|
||||||
|
mime: Mime,
|
||||||
|
|
||||||
|
key: SmartString<LazyCompact>,
|
||||||
|
sidecar: Option<Box<Item>>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Item {
|
||||||
|
/// Open the item for reading. For S3, performs a HEAD request to determine
|
||||||
|
/// the object size.
|
||||||
|
pub async fn read(&self) -> Result<ItemReader, std::io::Error> {
|
||||||
|
Ok(match self {
|
||||||
|
Self::File { path, .. } => ItemReader::File(File::open(path)?),
|
||||||
|
|
||||||
|
Self::S3 { source, key, .. } => {
|
||||||
|
let head = source
|
||||||
|
.client
|
||||||
|
.head_object()
|
||||||
|
.bucket(source.bucket.as_str())
|
||||||
|
.key(key.as_str())
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(std::io::Error::other)?;
|
||||||
|
|
||||||
|
let size = head.content_length().unwrap_or(0) as u64;
|
||||||
|
|
||||||
|
ItemReader::S3(S3Reader {
|
||||||
|
client: source.client.clone(),
|
||||||
|
bucket: source.bucket.clone(),
|
||||||
|
key: key.to_owned(),
|
||||||
|
cursor: 0,
|
||||||
|
size,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn source_name(&self) -> &pile_config::Label {
|
||||||
|
match self {
|
||||||
|
Self::File { source, .. } => &source.name,
|
||||||
|
Self::S3 { source, .. } => &source.name,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
|
pub fn key(&self) -> SmartString<LazyCompact> {
|
||||||
|
match self {
|
||||||
|
Self::File { path, .. } => path.to_str().expect("path is not utf-8").into(),
|
||||||
|
Self::S3 { key, .. } => key.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hash(&self) -> Result<blake3::Hash, std::io::Error> {
|
||||||
|
match self {
|
||||||
|
Self::File { path, .. } => {
|
||||||
|
let mut hasher = blake3::Hasher::new();
|
||||||
|
let mut file = std::fs::File::open(path)?;
|
||||||
|
std::io::copy(&mut file, &mut hasher)?;
|
||||||
|
return Ok(hasher.finalize());
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::S3 { .. } => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn mime(&self) -> &Mime {
|
||||||
|
match self {
|
||||||
|
Self::File { mime, .. } => mime,
|
||||||
|
Self::S3 { mime, .. } => mime,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn sidecar(&self) -> Option<&Self> {
|
||||||
|
match self {
|
||||||
|
Self::File { sidecar, .. } => sidecar.as_ref().map(|x| &**x),
|
||||||
|
Self::S3 { sidecar, .. } => sidecar.as_ref().map(|x| &**x),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
9
crates/pile-value/src/value/mod.rs
Normal file
9
crates/pile-value/src/value/mod.rs
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
mod item;
|
||||||
|
pub use item::*;
|
||||||
|
|
||||||
|
mod readers;
|
||||||
|
pub use readers::*;
|
||||||
|
|
||||||
|
#[expect(clippy::module_inception)]
|
||||||
|
mod value;
|
||||||
|
pub use value::*;
|
||||||
@@ -1,114 +1,13 @@
|
|||||||
use mime::Mime;
|
|
||||||
use smartstring::{LazyCompact, SmartString};
|
use smartstring::{LazyCompact, SmartString};
|
||||||
use std::{
|
use std::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{Read, Seek, SeekFrom},
|
io::{Read, Seek, SeekFrom},
|
||||||
path::PathBuf,
|
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
};
|
};
|
||||||
use tokio::runtime::Handle;
|
use tokio::runtime::Handle;
|
||||||
|
|
||||||
use crate::source::{DirDataSource, S3DataSource};
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// MARK: item
|
// MARK: traits
|
||||||
//
|
|
||||||
|
|
||||||
/// A cheaply-clonable pointer to an item in a dataset
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Item {
|
|
||||||
File {
|
|
||||||
source: Arc<DirDataSource>,
|
|
||||||
mime: Mime,
|
|
||||||
|
|
||||||
path: PathBuf,
|
|
||||||
sidecar: Option<Box<Item>>,
|
|
||||||
},
|
|
||||||
|
|
||||||
S3 {
|
|
||||||
source: Arc<S3DataSource>,
|
|
||||||
mime: Mime,
|
|
||||||
|
|
||||||
key: SmartString<LazyCompact>,
|
|
||||||
sidecar: Option<Box<Item>>,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Item {
|
|
||||||
/// Open the item for reading. For S3, performs a HEAD request to determine
|
|
||||||
/// the object size.
|
|
||||||
pub async fn read(&self) -> Result<ItemReader, std::io::Error> {
|
|
||||||
Ok(match self {
|
|
||||||
Self::File { path, .. } => ItemReader::File(File::open(path)?),
|
|
||||||
|
|
||||||
Self::S3 { source, key, .. } => {
|
|
||||||
let head = source
|
|
||||||
.client
|
|
||||||
.head_object()
|
|
||||||
.bucket(source.bucket.as_str())
|
|
||||||
.key(key.as_str())
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.map_err(std::io::Error::other)?;
|
|
||||||
|
|
||||||
let size = head.content_length().unwrap_or(0) as u64;
|
|
||||||
|
|
||||||
ItemReader::S3(S3Reader {
|
|
||||||
client: source.client.clone(),
|
|
||||||
bucket: source.bucket.clone(),
|
|
||||||
key: key.to_owned(),
|
|
||||||
cursor: 0,
|
|
||||||
size,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn source_name(&self) -> &pile_config::Label {
|
|
||||||
match self {
|
|
||||||
Self::File { source, .. } => &source.name,
|
|
||||||
Self::S3 { source, .. } => &source.name,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[expect(clippy::expect_used)]
|
|
||||||
pub fn key(&self) -> SmartString<LazyCompact> {
|
|
||||||
match self {
|
|
||||||
Self::File { path, .. } => path.to_str().expect("path is not utf-8").into(),
|
|
||||||
Self::S3 { key, .. } => key.clone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn hash(&self) -> Result<blake3::Hash, std::io::Error> {
|
|
||||||
match self {
|
|
||||||
Self::File { path, .. } => {
|
|
||||||
let mut hasher = blake3::Hasher::new();
|
|
||||||
let mut file = std::fs::File::open(path)?;
|
|
||||||
std::io::copy(&mut file, &mut hasher)?;
|
|
||||||
return Ok(hasher.finalize());
|
|
||||||
}
|
|
||||||
|
|
||||||
Self::S3 { .. } => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn mime(&self) -> &Mime {
|
|
||||||
match self {
|
|
||||||
Self::File { mime, .. } => mime,
|
|
||||||
Self::S3 { mime, .. } => mime,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sidecar(&self) -> Option<&Self> {
|
|
||||||
match self {
|
|
||||||
Self::File { sidecar, .. } => sidecar.as_ref().map(|x| &**x),
|
|
||||||
Self::S3 { sidecar, .. } => sidecar.as_ref().map(|x| &**x),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// MARK: reader
|
|
||||||
//
|
//
|
||||||
|
|
||||||
pub trait AsyncReader: Send {
|
pub trait AsyncReader: Send {
|
||||||
@@ -210,11 +109,11 @@ impl AsyncSeekReader for ItemReader {
|
|||||||
//
|
//
|
||||||
|
|
||||||
pub struct S3Reader {
|
pub struct S3Reader {
|
||||||
client: Arc<aws_sdk_s3::Client>,
|
pub client: Arc<aws_sdk_s3::Client>,
|
||||||
bucket: SmartString<LazyCompact>,
|
pub bucket: SmartString<LazyCompact>,
|
||||||
key: SmartString<LazyCompact>,
|
pub key: SmartString<LazyCompact>,
|
||||||
cursor: u64,
|
pub cursor: u64,
|
||||||
size: u64,
|
pub size: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AsyncReader for S3Reader {
|
impl AsyncReader for S3Reader {
|
||||||
@@ -4,7 +4,15 @@ use serde_json::{Map, Value};
|
|||||||
use smartstring::{LazyCompact, SmartString};
|
use smartstring::{LazyCompact, SmartString};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use crate::extract::{ListExtractor, ObjectExtractor};
|
use crate::{
|
||||||
|
extract::{
|
||||||
|
item::ItemExtractor,
|
||||||
|
misc::{ArrayExtractor, MapExtractor, VecExtractor},
|
||||||
|
string::StringExtractor,
|
||||||
|
traits::{ListExtractor, ObjectExtractor},
|
||||||
|
},
|
||||||
|
value::Item,
|
||||||
|
};
|
||||||
|
|
||||||
/// An immutable, cheaply-clonable, lazily-computed value.
|
/// An immutable, cheaply-clonable, lazily-computed value.
|
||||||
/// Very similar to [serde_json::Value].
|
/// Very similar to [serde_json::Value].
|
||||||
@@ -30,6 +38,9 @@ pub enum PileValue {
|
|||||||
|
|
||||||
/// A lazily-computed array
|
/// A lazily-computed array
|
||||||
ListExtractor(Arc<dyn ListExtractor>),
|
ListExtractor(Arc<dyn ListExtractor>),
|
||||||
|
|
||||||
|
/// An pointer to an item in this dataset
|
||||||
|
Item(Item),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for PileValue {
|
impl Clone for PileValue {
|
||||||
@@ -46,11 +57,40 @@ impl Clone for PileValue {
|
|||||||
mime: mime.clone(),
|
mime: mime.clone(),
|
||||||
bytes: bytes.clone(),
|
bytes: bytes.clone(),
|
||||||
},
|
},
|
||||||
|
Self::Item(i) => Self::Item(i.clone()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PileValue {
|
impl PileValue {
|
||||||
|
pub fn object_extractor(&self) -> Arc<dyn ObjectExtractor> {
|
||||||
|
match self {
|
||||||
|
Self::Null => Arc::new(MapExtractor::default()),
|
||||||
|
Self::U64(_) => Arc::new(MapExtractor::default()),
|
||||||
|
Self::I64(_) => Arc::new(MapExtractor::default()),
|
||||||
|
Self::Array(_) => Arc::new(MapExtractor::default()),
|
||||||
|
Self::String(s) => Arc::new(StringExtractor::new(s)),
|
||||||
|
Self::Blob { .. } => Arc::new(MapExtractor::default()),
|
||||||
|
Self::ListExtractor(_) => Arc::new(MapExtractor::default()),
|
||||||
|
Self::ObjectExtractor(e) => e.clone(),
|
||||||
|
Self::Item(i) => Arc::new(ItemExtractor::new(i)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn list_extractor(&self) -> Arc<dyn ListExtractor> {
|
||||||
|
match self {
|
||||||
|
Self::Null => Arc::new(VecExtractor::default()),
|
||||||
|
Self::U64(_) => Arc::new(VecExtractor::default()),
|
||||||
|
Self::I64(_) => Arc::new(VecExtractor::default()),
|
||||||
|
Self::Array(a) => Arc::new(ArrayExtractor::new(a.clone())),
|
||||||
|
Self::String(_) => Arc::new(VecExtractor::default()),
|
||||||
|
Self::Blob { .. } => Arc::new(VecExtractor::default()),
|
||||||
|
Self::ListExtractor(e) => e.clone(),
|
||||||
|
Self::ObjectExtractor(_) => Arc::new(VecExtractor::default()),
|
||||||
|
Self::Item(_) => Arc::new(VecExtractor::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn query(&self, query: &ObjectPath) -> Result<Option<Self>, std::io::Error> {
|
pub async fn query(&self, query: &ObjectPath) -> Result<Option<Self>, std::io::Error> {
|
||||||
let mut out: Option<PileValue> = Some(self.clone());
|
let mut out: Option<PileValue> = Some(self.clone());
|
||||||
|
|
||||||
@@ -58,50 +98,41 @@ impl PileValue {
|
|||||||
match s {
|
match s {
|
||||||
PathSegment::Root => out = Some(self.clone()),
|
PathSegment::Root => out = Some(self.clone()),
|
||||||
PathSegment::Field(field) => {
|
PathSegment::Field(field) => {
|
||||||
out = match out {
|
let e = match out.map(|x| x.object_extractor()) {
|
||||||
None => return Ok(None),
|
Some(e) => e,
|
||||||
Some(Self::Null) => None,
|
None => {
|
||||||
Some(Self::U64(_)) => None,
|
out = None;
|
||||||
Some(Self::I64(_)) => None,
|
continue;
|
||||||
Some(Self::Array(_)) => None,
|
}
|
||||||
Some(Self::String(_)) => None,
|
};
|
||||||
Some(Self::Blob { .. }) => None,
|
|
||||||
Some(Self::ListExtractor(_)) => None,
|
out = e.field(field).await?;
|
||||||
Some(Self::ObjectExtractor(e)) => e.field(field).await?,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PathSegment::Index(idx) => {
|
PathSegment::Index(idx) => {
|
||||||
out = match &out {
|
let e = match out.map(|x| x.list_extractor()) {
|
||||||
None => return Ok(None),
|
Some(e) => e,
|
||||||
Some(Self::Null) => None,
|
None => {
|
||||||
Some(Self::U64(_)) => None,
|
out = None;
|
||||||
Some(Self::I64(_)) => None,
|
continue;
|
||||||
Some(Self::Blob { .. }) => None,
|
|
||||||
Some(Self::Array(v)) => {
|
|
||||||
let idx = if *idx >= 0 {
|
|
||||||
usize::try_from(*idx).ok()
|
|
||||||
} else {
|
|
||||||
usize::try_from(v.len() as i64 - idx).ok()
|
|
||||||
};
|
|
||||||
|
|
||||||
idx.and_then(|idx| v.get(idx)).cloned()
|
|
||||||
}
|
}
|
||||||
Some(Self::String(_)) => None,
|
};
|
||||||
Some(Self::ObjectExtractor(_)) => None,
|
|
||||||
Some(Self::ListExtractor(e)) => {
|
|
||||||
let idx = if *idx >= 0 {
|
|
||||||
usize::try_from(*idx).ok()
|
|
||||||
} else {
|
|
||||||
usize::try_from(e.len().await? as i64 - idx).ok()
|
|
||||||
};
|
|
||||||
|
|
||||||
match idx {
|
let idx = if *idx >= 0 {
|
||||||
Some(idx) => e.get(idx).await?,
|
usize::try_from(*idx).ok()
|
||||||
None => None,
|
} else {
|
||||||
}
|
usize::try_from(e.len().await? as i64 - idx).ok()
|
||||||
|
};
|
||||||
|
|
||||||
|
let idx = match idx {
|
||||||
|
Some(idx) => idx,
|
||||||
|
None => {
|
||||||
|
out = None;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
|
out = e.get(idx).await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -127,7 +158,8 @@ impl PileValue {
|
|||||||
Self::Array(x) => (!x.is_empty()).then(|| Value::Number(1u64.into())),
|
Self::Array(x) => (!x.is_empty()).then(|| Value::Number(1u64.into())),
|
||||||
Self::ListExtractor(x) => (!x.is_empty().await?).then(|| Value::Number(1u64.into())),
|
Self::ListExtractor(x) => (!x.is_empty().await?).then(|| Value::Number(1u64.into())),
|
||||||
|
|
||||||
Self::ObjectExtractor(e) => {
|
Self::ObjectExtractor(_) | Self::Item(_) => {
|
||||||
|
let e = self.object_extractor();
|
||||||
let keys = e.fields().await?;
|
let keys = e.fields().await?;
|
||||||
let mut map = Map::new();
|
let mut map = Map::new();
|
||||||
for k in &keys {
|
for k in &keys {
|
||||||
@@ -160,22 +192,27 @@ impl PileValue {
|
|||||||
Self::Null => Value::Null,
|
Self::Null => Value::Null,
|
||||||
Self::U64(x) => Value::Number((*x).into()),
|
Self::U64(x) => Value::Number((*x).into()),
|
||||||
Self::I64(x) => Value::Number((*x).into()),
|
Self::I64(x) => Value::Number((*x).into()),
|
||||||
|
Self::String(x) => Value::String(x.to_string()),
|
||||||
|
|
||||||
// TODO: replace with something meaningful
|
// TODO: replace with something meaningful?
|
||||||
Self::Blob { mime, bytes } => {
|
Self::Blob { mime, bytes } => {
|
||||||
Value::String(format!("<Blob ({mime}, {} bytes)>", bytes.len()))
|
Value::String(format!("<Blob ({mime}, {} bytes)>", bytes.len()))
|
||||||
}
|
}
|
||||||
Self::String(x) => Value::String(x.to_string()),
|
|
||||||
|
|
||||||
Self::Array(x) => {
|
#[expect(clippy::expect_used)]
|
||||||
|
Self::Array(_) | Self::ListExtractor(_) => {
|
||||||
|
let e = self.list_extractor();
|
||||||
|
let len = e.len().await?;
|
||||||
let mut arr = Vec::new();
|
let mut arr = Vec::new();
|
||||||
for item in &**x {
|
for i in 0..len {
|
||||||
arr.push(Box::pin(item.to_json()).await?);
|
let v = e.get(i).await?.expect("item must be present");
|
||||||
|
arr.push(Box::pin(v.to_json()).await?);
|
||||||
}
|
}
|
||||||
Value::Array(arr)
|
Value::Array(arr)
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::ObjectExtractor(e) => {
|
Self::ObjectExtractor(_) | Self::Item(_) => {
|
||||||
|
let e = self.object_extractor();
|
||||||
let keys = e.fields().await?;
|
let keys = e.fields().await?;
|
||||||
let mut map = Map::new();
|
let mut map = Map::new();
|
||||||
for k in &keys {
|
for k in &keys {
|
||||||
@@ -187,8 +224,6 @@ impl PileValue {
|
|||||||
}
|
}
|
||||||
Value::Object(map)
|
Value::Object(map)
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::ListExtractor(e) => e.to_json().await?,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -10,6 +10,7 @@ workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
pile-toolbox = { workspace = true }
|
pile-toolbox = { workspace = true }
|
||||||
pile-dataset = { workspace = true, features = ["axum", "pdfium"] }
|
pile-dataset = { workspace = true, features = ["axum", "pdfium"] }
|
||||||
|
pile-value = { workspace = true, features = ["pdfium"] }
|
||||||
pile-config = { workspace = true }
|
pile-config = { workspace = true }
|
||||||
|
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use pile_config::{Label, Source};
|
use pile_config::{Label, Source};
|
||||||
use pile_dataset::index::DbFtsIndex;
|
use pile_dataset::{Datasets, index::DbFtsIndex};
|
||||||
use pile_dataset::source::DirDataSource;
|
|
||||||
use pile_dataset::{DataSource, Datasets, Item, PileValue, extract::MetaExtractor};
|
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
|
use pile_value::{
|
||||||
|
source::{DataSource, DirDataSource},
|
||||||
|
value::{Item, PileValue},
|
||||||
|
};
|
||||||
use std::{path::PathBuf, sync::Arc};
|
use std::{path::PathBuf, sync::Arc};
|
||||||
use tokio_stream::StreamExt;
|
use tokio_stream::StreamExt;
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
@@ -72,11 +74,9 @@ impl CliCmd for AnnotateCommand {
|
|||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
let meta = MetaExtractor::new(&item);
|
let item = PileValue::Item(item.clone());
|
||||||
let extractor = PileValue::ObjectExtractor(Arc::new(meta));
|
|
||||||
|
|
||||||
let Some(value) =
|
let Some(value) =
|
||||||
index.get_field(&extractor, &field).await.with_context(|| {
|
index.get_field(&item, &field).await.with_context(|| {
|
||||||
format!("while extracting field from {}", path.display())
|
format!("while extracting field from {}", path.display())
|
||||||
})?
|
})?
|
||||||
else {
|
else {
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use pile_dataset::{Datasets, PileValue, extract::MetaExtractor};
|
use pile_dataset::Datasets;
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
|
use pile_value::value::PileValue;
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
use std::{path::PathBuf, sync::Arc, time::Instant};
|
use std::{path::PathBuf, time::Instant};
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use tokio_stream::StreamExt;
|
use tokio_stream::StreamExt;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
@@ -93,9 +94,8 @@ impl CliCmd for FieldsCommand {
|
|||||||
item_result.with_context(|| format!("while reading source {name}"))?;
|
item_result.with_context(|| format!("while reading source {name}"))?;
|
||||||
let name = name.clone();
|
let name = name.clone();
|
||||||
join_set.spawn(async move {
|
join_set.spawn(async move {
|
||||||
let meta = MetaExtractor::new(&item);
|
let item = PileValue::Item(item);
|
||||||
let value = PileValue::ObjectExtractor(Arc::new(meta));
|
let result = item.count_fields().await.with_context(|| {
|
||||||
let result = value.count_fields().await.with_context(|| {
|
|
||||||
format!("while counting fields in source {name}")
|
format!("while counting fields in source {name}")
|
||||||
})?;
|
})?;
|
||||||
Ok(result.and_then(|v| {
|
Ok(result.and_then(|v| {
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use pile_config::objectpath::ObjectPath;
|
use pile_config::objectpath::ObjectPath;
|
||||||
use pile_dataset::{Datasets, PileValue, extract::MetaExtractor};
|
use pile_dataset::Datasets;
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
|
use pile_value::value::PileValue;
|
||||||
use std::{path::PathBuf, str::FromStr, sync::Arc};
|
use std::{path::PathBuf, str::FromStr, sync::Arc};
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use tokio_stream::StreamExt;
|
use tokio_stream::StreamExt;
|
||||||
@@ -79,9 +80,8 @@ impl CliCmd for ListCommand {
|
|||||||
let invert = self.invert;
|
let invert = self.invert;
|
||||||
|
|
||||||
join_set.spawn(async move {
|
join_set.spawn(async move {
|
||||||
let meta = MetaExtractor::new(&item);
|
let item = PileValue::Item(item);
|
||||||
let root = PileValue::ObjectExtractor(Arc::new(meta));
|
let value = item.query(&path).await?;
|
||||||
let value = root.query(&path).await?;
|
|
||||||
|
|
||||||
let is_present =
|
let is_present =
|
||||||
matches!(value, Some(v) if !matches!(v, PileValue::Null));
|
matches!(value, Some(v) if !matches!(v, PileValue::Null));
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use pile_config::{Label, objectpath::ObjectPath};
|
use pile_config::{Label, objectpath::ObjectPath};
|
||||||
use pile_dataset::{Datasets, PileValue, extract::MetaExtractor};
|
use pile_dataset::Datasets;
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
use std::{path::PathBuf, sync::Arc};
|
use pile_value::value::PileValue;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use crate::{CliCmd, GlobalContext};
|
use crate::{CliCmd, GlobalContext};
|
||||||
|
|
||||||
@@ -54,9 +55,8 @@ impl CliCmd for ProbeCommand {
|
|||||||
anyhow::anyhow!("{:?} not found in source {:?}", self.key, self.source)
|
anyhow::anyhow!("{:?} not found in source {:?}", self.key, self.source)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let value = PileValue::ObjectExtractor(Arc::new(MetaExtractor::new(&item)));
|
let item = PileValue::Item(item);
|
||||||
value
|
item.to_json()
|
||||||
.to_json()
|
|
||||||
.await
|
.await
|
||||||
.with_context(|| format!("while extracting {}", self.key))?
|
.with_context(|| format!("while extracting {}", self.key))?
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user