Filter by mime
This commit is contained in:
@@ -7,7 +7,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -82,6 +82,7 @@ impl EpubMetaExtractor {
|
||||
impl ObjectExtractor for EpubMetaExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -89,6 +90,10 @@ impl ObjectExtractor for EpubMetaExtractor {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "application/epub+zip" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(self.get_inner().await?.get(name).cloned())
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -92,6 +92,7 @@ fn strip_html(html: &str) -> String {
|
||||
impl ObjectExtractor for EpubTextExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -99,6 +100,10 @@ impl ObjectExtractor for EpubTextExtractor {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "application/epub+zip" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(self.get_inner().await?.get(name).cloned())
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ mod epub_text;
|
||||
pub use epub_text::*;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue},
|
||||
};
|
||||
|
||||
@@ -30,13 +30,14 @@ impl EpubExtractor {
|
||||
impl ObjectExtractor for EpubExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &pile_config::Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
match (name.as_str(), args) {
|
||||
("text", args) => Ok(Some(
|
||||
self.text
|
||||
.field(name, args)
|
||||
.field(state, name, args)
|
||||
.await
|
||||
.map(|x| x.unwrap_or(PileValue::Null))?,
|
||||
)),
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -87,6 +87,7 @@ fn tag_to_label(tag: &str) -> Option<Label> {
|
||||
impl ObjectExtractor for ExifExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -100,6 +101,10 @@ impl ObjectExtractor for ExifExtractor {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !state.ignore_mime && self.item.mime().type_() != mime::IMAGE {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(self.get_inner().await?.get(name).cloned())
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::{ListExtractor, ObjectExtractor},
|
||||
extract::traits::{ExtractState, ListExtractor, ObjectExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -46,16 +46,32 @@ impl FlacImagesExtractor {
|
||||
|
||||
return Ok(count);
|
||||
}
|
||||
|
||||
fn mime_ok(&self, state: &ExtractState) -> bool {
|
||||
if state.ignore_mime {
|
||||
return true;
|
||||
}
|
||||
let essence = self.item.mime().essence_str();
|
||||
essence == "audio/flac" || essence == "audio/x-flac"
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ListExtractor for FlacImagesExtractor {
|
||||
async fn get<'a>(&'a self, mut idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||
async fn get(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
mut idx: usize,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
trace!(
|
||||
key = self.item.key().as_str(),
|
||||
"Getting index {idx} from FlacImagesExtractor",
|
||||
);
|
||||
|
||||
if !self.mime_ok(state) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let key = self.item.key();
|
||||
let reader = SyncReadBridge::new_current(self.item.read().await?);
|
||||
let image = tokio::task::spawn_blocking(move || {
|
||||
@@ -98,7 +114,11 @@ impl ListExtractor for FlacImagesExtractor {
|
||||
}))
|
||||
}
|
||||
|
||||
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||
async fn len(&self, state: &ExtractState) -> Result<usize, std::io::Error> {
|
||||
if !self.mime_ok(state) {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
if let Some(x) = self.cached_count.get() {
|
||||
return Ok(*x);
|
||||
}
|
||||
@@ -178,12 +198,21 @@ impl FlacExtractor {
|
||||
|
||||
return Ok(self.output.get_or_init(|| output));
|
||||
}
|
||||
|
||||
fn mime_ok(&self, state: &ExtractState) -> bool {
|
||||
if state.ignore_mime {
|
||||
return true;
|
||||
}
|
||||
let essence = self.item.mime().essence_str();
|
||||
essence == "audio/flac" || essence == "audio/x-flac"
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ObjectExtractor for FlacExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -191,6 +220,10 @@ impl ObjectExtractor for FlacExtractor {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !self.mime_ok(state) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if name.as_str() == "images" {
|
||||
return Ok(Some(self.images.clone()));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue},
|
||||
};
|
||||
use pile_config::Label;
|
||||
@@ -88,6 +88,7 @@ impl FsExtractor {
|
||||
impl ObjectExtractor for FsExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
_state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -125,6 +125,7 @@ fn frame_id_to_field(id: &str) -> Cow<'static, str> {
|
||||
impl ObjectExtractor for Id3Extractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -132,6 +133,10 @@ impl ObjectExtractor for Id3Extractor {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "audio/mpeg" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(self.get_inner().await?.get(name).cloned())
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,10 @@ mod sidecar;
|
||||
pub use sidecar::*;
|
||||
|
||||
use crate::{
|
||||
extract::{misc::MapExtractor, traits::ObjectExtractor},
|
||||
extract::{
|
||||
misc::MapExtractor,
|
||||
traits::{ExtractState, ObjectExtractor},
|
||||
},
|
||||
value::{Item, PileValue},
|
||||
};
|
||||
|
||||
@@ -82,10 +85,11 @@ impl ItemExtractor {
|
||||
impl ObjectExtractor for ItemExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &pile_config::Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
self.inner.field(name, args).await
|
||||
self.inner.field(state, name, args).await
|
||||
}
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
|
||||
@@ -14,7 +14,7 @@ mod pdf_text;
|
||||
pub use pdf_text::*;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue},
|
||||
};
|
||||
|
||||
@@ -40,6 +40,7 @@ impl PdfExtractor {
|
||||
impl ObjectExtractor for PdfExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &pile_config::Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -50,7 +51,7 @@ impl ObjectExtractor for PdfExtractor {
|
||||
);
|
||||
|
||||
match (name.as_str(), args) {
|
||||
("text", args) => self.text.field(name, args).await,
|
||||
("text", args) => self.text.field(state, name, args).await,
|
||||
("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
|
||||
#[cfg(feature = "pdfium")]
|
||||
("pages", None) => Ok(Some(PileValue::ListExtractor(self.pages.clone()))),
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -124,12 +124,18 @@ fn format_date(d: &Date) -> String {
|
||||
impl ObjectExtractor for PdfMetaExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
if args.is_some() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "application/pdf" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(self.get_inner().await?.get(name).cloned())
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ListExtractor,
|
||||
extract::traits::{ExtractState, ListExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -34,12 +34,20 @@ impl PdfPagesExtractor {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ListExtractor for PdfPagesExtractor {
|
||||
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||
async fn get(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
idx: usize,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
trace!(
|
||||
key = self.item.key().as_str(),
|
||||
"Getting index {idx} from PdfPagesExtractor",
|
||||
);
|
||||
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "application/pdf" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let bytes = self.get_bytes().await?;
|
||||
let png = tokio::task::spawn_blocking(move || {
|
||||
let pdfium = Pdfium::default();
|
||||
@@ -81,7 +89,11 @@ impl ListExtractor for PdfPagesExtractor {
|
||||
Ok(Some(value))
|
||||
}
|
||||
|
||||
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||
async fn len(&self, state: &ExtractState) -> Result<usize, std::io::Error> {
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "application/pdf" {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let bytes = self.get_bytes().await?;
|
||||
let count = tokio::task::spawn_blocking(move || {
|
||||
let pdfium = Pdfium::default();
|
||||
@@ -103,10 +115,10 @@ impl ListExtractor for PdfPagesExtractor {
|
||||
|
||||
// Override, extracting all pages is very slow,
|
||||
// and we can't display binary in json anyway
|
||||
async fn to_json(&self) -> Result<serde_json::Value, std::io::Error> {
|
||||
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
|
||||
Ok(serde_json::Value::String(format!(
|
||||
"<PdfPages ({} pages)>",
|
||||
self.len().await?
|
||||
self.len(state).await?
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -104,6 +104,7 @@ impl PdfTextExtractor {
|
||||
impl ObjectExtractor for PdfTextExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -111,6 +112,10 @@ impl ObjectExtractor for PdfTextExtractor {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "application/pdf" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(self.get_inner().await?.get(name).cloned())
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use tracing::trace;
|
||||
|
||||
use super::TomlExtractor;
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{Item, PileValue},
|
||||
};
|
||||
|
||||
@@ -26,6 +26,7 @@ impl SidecarExtractor {
|
||||
impl ObjectExtractor for SidecarExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -39,7 +40,7 @@ impl ObjectExtractor for SidecarExtractor {
|
||||
.output
|
||||
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new))
|
||||
{
|
||||
Some(x) => Ok(x.field(name, args).await?),
|
||||
Some(x) => Ok(x.field(state, name, args).await?),
|
||||
None => Ok(Some(PileValue::Null)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::{
|
||||
};
|
||||
|
||||
use crate::{
|
||||
extract::traits::ObjectExtractor,
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{AsyncReader, Item, PileValue},
|
||||
};
|
||||
|
||||
@@ -64,6 +64,7 @@ impl TomlExtractor {
|
||||
impl ObjectExtractor for TomlExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -71,6 +72,10 @@ impl ObjectExtractor for TomlExtractor {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !state.ignore_mime && self.item.mime().type_() != mime::TEXT {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(self.get_inner().await?.get(name).cloned())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{extract::traits::ListExtractor, value::PileValue};
|
||||
use crate::{
|
||||
extract::traits::{ExtractState, ListExtractor},
|
||||
value::PileValue,
|
||||
};
|
||||
|
||||
pub struct ArrayExtractor {
|
||||
inner: Arc<Vec<PileValue>>,
|
||||
@@ -14,11 +17,15 @@ impl ArrayExtractor {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ListExtractor for ArrayExtractor {
|
||||
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||
async fn get(
|
||||
&self,
|
||||
_state: &ExtractState,
|
||||
idx: usize,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
Ok(self.inner.get(idx).cloned())
|
||||
}
|
||||
|
||||
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||
async fn len(&self, _state: &ExtractState) -> Result<usize, std::io::Error> {
|
||||
Ok(self.inner.len())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use pile_config::Label;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{extract::traits::ObjectExtractor, value::PileValue};
|
||||
use crate::{
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::PileValue,
|
||||
};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MapExtractor {
|
||||
@@ -12,6 +15,7 @@ pub struct MapExtractor {
|
||||
impl ObjectExtractor for MapExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
_state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
use crate::{extract::traits::ListExtractor, value::PileValue};
|
||||
use crate::{
|
||||
extract::traits::{ExtractState, ListExtractor},
|
||||
value::PileValue,
|
||||
};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VecExtractor {
|
||||
@@ -7,11 +10,15 @@ pub struct VecExtractor {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ListExtractor for VecExtractor {
|
||||
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||
async fn get(
|
||||
&self,
|
||||
_state: &ExtractState,
|
||||
idx: usize,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
Ok(self.inner.get(idx).cloned())
|
||||
}
|
||||
|
||||
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||
async fn len(&self, _state: &ExtractState) -> Result<usize, std::io::Error> {
|
||||
Ok(self.inner.len())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,10 @@ use pile_config::Label;
|
||||
use smartstring::{LazyCompact, SmartString};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{extract::traits::ObjectExtractor, value::PileValue};
|
||||
use crate::{
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::PileValue,
|
||||
};
|
||||
|
||||
pub struct StringExtractor {
|
||||
item: Arc<SmartString<LazyCompact>>,
|
||||
@@ -18,6 +21,7 @@ impl StringExtractor {
|
||||
impl ObjectExtractor for StringExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
_state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
@@ -89,7 +93,10 @@ mod tests {
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
async fn field(ext: &StringExtractor, name: &str, args: Option<&str>) -> Option<PileValue> {
|
||||
ext.field(&Label::new(name).unwrap(), args).await.unwrap()
|
||||
let state = ExtractState { ignore_mime: false };
|
||||
ext.field(&state, &Label::new(name).unwrap(), args)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn string(v: Option<PileValue>) -> Option<String> {
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ExtractState {
|
||||
/// If true, extract all fields from all items.
|
||||
/// Do not pre-filter using mime type.
|
||||
///
|
||||
/// This may detect additional fields, but
|
||||
/// makes extraction take much longer
|
||||
pub ignore_mime: bool,
|
||||
}
|
||||
|
||||
/// An attachment that extracts metadata from an [Item].
|
||||
///
|
||||
/// Metadata is exposed as an immutable map of {label: value},
|
||||
@@ -15,6 +25,7 @@ pub trait ObjectExtractor: Send + Sync {
|
||||
/// this fn should return `Ok(Some(None))`.
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &pile_config::Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<crate::value::PileValue>, std::io::Error>;
|
||||
@@ -25,15 +36,15 @@ pub trait ObjectExtractor: Send + Sync {
|
||||
async fn fields(&self) -> Result<Vec<pile_config::Label>, std::io::Error>;
|
||||
|
||||
/// Convert this to a JSON value.
|
||||
async fn to_json(&self) -> Result<serde_json::Value, std::io::Error> {
|
||||
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
|
||||
let keys = self.fields().await?;
|
||||
let mut map = serde_json::Map::new();
|
||||
for k in &keys {
|
||||
let v = match self.field(k, None).await? {
|
||||
let v = match self.field(state, k, None).await? {
|
||||
Some(x) => x,
|
||||
None => continue,
|
||||
};
|
||||
map.insert(k.to_string(), Box::pin(v.to_json()).await?);
|
||||
map.insert(k.to_string(), Box::pin(v.to_json(state)).await?);
|
||||
}
|
||||
|
||||
Ok(serde_json::Value::Object(map))
|
||||
@@ -49,25 +60,25 @@ pub trait ListExtractor: Send + Sync {
|
||||
/// Indices start at zero, and must be consecutive.
|
||||
/// - returns `None` if `idx` is out of range
|
||||
/// - returns `Some(Null)` if `None` is at `idx`
|
||||
async fn get(&self, idx: usize) -> Result<Option<crate::value::PileValue>, std::io::Error>;
|
||||
async fn get(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
idx: usize,
|
||||
) -> Result<Option<crate::value::PileValue>, std::io::Error>;
|
||||
|
||||
async fn len(&self) -> Result<usize, std::io::Error>;
|
||||
|
||||
async fn is_empty(&self) -> Result<bool, std::io::Error> {
|
||||
Ok(self.len().await? == 0)
|
||||
}
|
||||
async fn len(&self, state: &ExtractState) -> Result<usize, std::io::Error>;
|
||||
|
||||
/// Convert this list to a JSON value.
|
||||
async fn to_json(&self) -> Result<serde_json::Value, std::io::Error> {
|
||||
let len = self.len().await?;
|
||||
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
|
||||
let len = self.len(state).await?;
|
||||
let mut list = Vec::with_capacity(len);
|
||||
for i in 0..len {
|
||||
#[expect(clippy::expect_used)]
|
||||
let v = self
|
||||
.get(i)
|
||||
.get(state, i)
|
||||
.await?
|
||||
.expect("value must be present according to length");
|
||||
list.push(Box::pin(v.to_json()).await?);
|
||||
list.push(Box::pin(v.to_json(state)).await?);
|
||||
}
|
||||
|
||||
Ok(serde_json::Value::Array(list))
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::{
|
||||
item::ItemExtractor,
|
||||
misc::{ArrayExtractor, MapExtractor, VecExtractor},
|
||||
string::StringExtractor,
|
||||
traits::{ListExtractor, ObjectExtractor},
|
||||
traits::{ExtractState, ListExtractor, ObjectExtractor},
|
||||
},
|
||||
value::Item,
|
||||
};
|
||||
@@ -91,7 +91,11 @@ impl PileValue {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn query(&self, query: &ObjectPath) -> Result<Option<Self>, std::io::Error> {
|
||||
pub async fn query(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
query: &ObjectPath,
|
||||
) -> Result<Option<Self>, std::io::Error> {
|
||||
let mut out: Option<PileValue> = Some(self.clone());
|
||||
|
||||
for s in &query.segments {
|
||||
@@ -106,7 +110,7 @@ impl PileValue {
|
||||
}
|
||||
};
|
||||
|
||||
out = e.field(name, args.as_deref()).await?;
|
||||
out = e.field(state, name, args.as_deref()).await?;
|
||||
}
|
||||
|
||||
PathSegment::Index(idx) => {
|
||||
@@ -121,7 +125,7 @@ impl PileValue {
|
||||
let idx = if *idx >= 0 {
|
||||
usize::try_from(*idx).ok()
|
||||
} else {
|
||||
usize::try_from(e.len().await? as i64 - idx).ok()
|
||||
usize::try_from(e.len(state).await? as i64 - idx).ok()
|
||||
};
|
||||
|
||||
let idx = match idx {
|
||||
@@ -132,7 +136,7 @@ impl PileValue {
|
||||
}
|
||||
};
|
||||
|
||||
out = e.get(idx).await?;
|
||||
out = e.get(state, idx).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -147,7 +151,10 @@ impl PileValue {
|
||||
/// - `ObjectExtractor` is recursed into; returns `Some(Object(map))` with
|
||||
/// only the fields that had data, or `None` if all fields were absent.
|
||||
/// - `Array` / `ListExtractor` are treated as opaque leaf values (not descended into).
|
||||
pub async fn count_fields(&self) -> Result<Option<Value>, std::io::Error> {
|
||||
pub async fn count_fields(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
) -> Result<Option<Value>, std::io::Error> {
|
||||
Ok(match self {
|
||||
Self::Null => None,
|
||||
|
||||
@@ -156,18 +163,18 @@ impl PileValue {
|
||||
}
|
||||
|
||||
Self::Array(x) => (!x.is_empty()).then(|| Value::Number(1u64.into())),
|
||||
Self::ListExtractor(x) => (!x.is_empty().await?).then(|| Value::Number(1u64.into())),
|
||||
Self::ListExtractor(x) => (x.len(state).await? > 0).then(|| Value::Number(1u64.into())),
|
||||
|
||||
Self::ObjectExtractor(_) | Self::Item(_) => {
|
||||
let e = self.object_extractor();
|
||||
let keys = e.fields().await?;
|
||||
let mut map = Map::new();
|
||||
for k in &keys {
|
||||
let v = match e.field(k, None).await? {
|
||||
let v = match e.field(state, k, None).await? {
|
||||
Some(x) => x,
|
||||
None => continue,
|
||||
};
|
||||
if let Some(counted) = Box::pin(v.count_fields()).await? {
|
||||
if let Some(counted) = Box::pin(v.count_fields(state)).await? {
|
||||
map.insert(k.to_string(), counted);
|
||||
}
|
||||
}
|
||||
@@ -187,7 +194,7 @@ impl PileValue {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn to_json(&self) -> Result<Value, std::io::Error> {
|
||||
pub async fn to_json(&self, state: &ExtractState) -> Result<Value, std::io::Error> {
|
||||
Ok(match self {
|
||||
Self::Null => Value::Null,
|
||||
Self::U64(x) => Value::Number((*x).into()),
|
||||
@@ -201,12 +208,12 @@ impl PileValue {
|
||||
|
||||
Self::Array(_) | Self::ListExtractor(_) => {
|
||||
let e = self.list_extractor();
|
||||
return e.to_json().await;
|
||||
return e.to_json(state).await;
|
||||
}
|
||||
|
||||
Self::ObjectExtractor(_) | Self::Item(_) => {
|
||||
let e = self.object_extractor();
|
||||
return e.to_json().await;
|
||||
return e.to_json(state).await;
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user