Improve arg parsing
Some checks failed
CI / Typos (push) Successful in 20s
CI / Build and test (push) Successful in 2m28s
CI / Clippy (push) Failing after 2m50s
CI / Build and test (all features) (push) Successful in 7m27s

This commit is contained in:
2026-03-11 12:54:02 -07:00
parent 8a9388020c
commit f3bb1a265e
19 changed files with 327 additions and 98 deletions

View File

@@ -85,7 +85,15 @@ impl EpubMetaExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for EpubMetaExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.get_inner().await?.get(name).cloned())
}

View File

@@ -95,7 +95,15 @@ fn strip_html(html: &str) -> String {
#[async_trait::async_trait]
impl ObjectExtractor for EpubTextExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.get_inner().await?.get(name).cloned())
}

View File

@@ -28,10 +28,14 @@ impl EpubExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for EpubExtractor {
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error> {
match name.as_str() {
"text" => self.text.field(name).await,
"meta" => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
async fn field(
&self,
name: &pile_config::Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
match (name.as_str(), args) {
("text", args) => self.text.field(name, args).await,
("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
_ => Ok(None),
}
}

View File

@@ -86,7 +86,15 @@ fn tag_to_label(tag: &str) -> Option<Label> {
#[async_trait::async_trait]
impl ObjectExtractor for ExifExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.get_inner().await?.get(name).cloned())
}

View File

@@ -142,7 +142,15 @@ impl FlacExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for FlacExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
if name.as_str() == "images"
&& let Some(ref images) = self.images
{

View File

@@ -70,7 +70,15 @@ impl FsExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for FsExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.get_inner()?.get(name).cloned())
}

View File

@@ -123,7 +123,15 @@ fn frame_id_to_field(id: &str) -> Cow<'static, str> {
#[async_trait::async_trait]
impl ObjectExtractor for Id3Extractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.get_inner().await?.get(name).cloned())
}

View File

@@ -80,8 +80,12 @@ impl ItemExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for ItemExtractor {
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error> {
self.inner.field(name).await
async fn field(
&self,
name: &pile_config::Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
self.inner.field(name, args).await
}
#[expect(clippy::unwrap_used)]

View File

@@ -37,12 +37,16 @@ impl PdfExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for PdfExtractor {
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error> {
match name.as_str() {
"text" => self.text.field(name).await,
"meta" => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
async fn field(
&self,
name: &pile_config::Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
match (name.as_str(), args) {
("text", args) => self.text.field(name, args).await,
("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
#[cfg(feature = "pdfium")]
"pages" => Ok(Some(PileValue::ListExtractor(self.pages.clone()))),
("pages", None) => Ok(Some(PileValue::ListExtractor(self.pages.clone()))),
_ => Ok(None),
}
}

View File

@@ -122,7 +122,14 @@ fn format_date(d: &Date) -> String {
#[async_trait::async_trait]
impl ObjectExtractor for PdfMetaExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.get_inner().await?.get(name).cloned())
}

View File

@@ -102,7 +102,15 @@ impl PdfTextExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for PdfTextExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.get_inner().await?.get(name).cloned())
}

View File

@@ -23,12 +23,16 @@ impl SidecarExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for SidecarExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
match self
.output
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new))
{
Some(x) => Ok(x.field(name).await?),
Some(x) => Ok(x.field(name, args).await?),
None => Ok(Some(PileValue::Null)),
}
}

View File

@@ -68,7 +68,15 @@ impl TomlExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for TomlExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.get_inner().await?.get(name).cloned())
}

View File

@@ -10,7 +10,15 @@ pub struct MapExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for MapExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
if args.is_some() {
return Ok(None);
}
Ok(self.inner.get(name).cloned())
}

View File

@@ -4,20 +4,6 @@ use std::sync::Arc;
use crate::{extract::traits::ObjectExtractor, value::PileValue};
fn parse_name(s: &str) -> (&str, Option<&str>) {
match s.find('(') {
None => (s, None),
Some(i) => {
let name = &s[..i];
let rest = &s[i + 1..];
match rest.strip_suffix(')') {
Some(args) => (name, Some(args)),
None => (name, None),
}
}
}
}
pub struct StringExtractor {
item: Arc<SmartString<LazyCompact>>,
}
@@ -30,9 +16,12 @@ impl StringExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for StringExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
let (name, args) = parse_name(name.as_str());
Ok(match (name, args) {
async fn field(
&self,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
Ok(match (name.as_str(), args) {
("trim", None) => Some(PileValue::String(Arc::new(
self.item.as_str().trim().into(),
))),
@@ -98,8 +87,8 @@ mod tests {
}
#[expect(clippy::unwrap_used)]
async fn field(ext: &StringExtractor, name: &str) -> Option<PileValue> {
ext.field(&Label::new(name).unwrap()).await.unwrap()
async fn field(ext: &StringExtractor, name: &str, args: Option<&str>) -> Option<PileValue> {
ext.field(&Label::new(name).unwrap(), args).await.unwrap()
}
fn string(v: Option<PileValue>) -> Option<String> {
@@ -125,20 +114,20 @@ mod tests {
#[tokio::test]
async fn trim() {
assert_eq!(
string(field(&extractor(" hi "), "trim").await),
string(field(&extractor(" hi "), "trim", None).await),
Some("hi".into())
);
}
#[tokio::test]
async fn trim_no_args() {
assert!(field(&extractor("x"), "trim(foo)").await.is_none());
assert!(field(&extractor("x"), "trim", Some("foo")).await.is_none());
}
#[tokio::test]
async fn nonempty_with_content() {
assert!(matches!(
field(&extractor("hello"), "nonempty").await,
field(&extractor("hello"), "nonempty", None).await,
Some(PileValue::String(_))
));
}
@@ -146,7 +135,7 @@ mod tests {
#[tokio::test]
async fn nonempty_empty_string() {
assert!(matches!(
field(&extractor(""), "nonempty").await,
field(&extractor(""), "nonempty", None).await,
Some(PileValue::Null)
));
}
@@ -154,7 +143,7 @@ mod tests {
#[tokio::test]
async fn trimprefix_present() {
assert_eq!(
string(field(&extractor("foobar"), "trimprefix(foo)").await),
string(field(&extractor("foobar"), "trimprefix", Some("foo")).await),
Some("bar".into())
);
}
@@ -162,20 +151,24 @@ mod tests {
#[tokio::test]
async fn trimprefix_absent() {
assert_eq!(
string(field(&extractor("foobar"), "trimprefix(baz)").await),
string(field(&extractor("foobar"), "trimprefix", Some("baz")).await),
Some("foobar".into())
);
}
#[tokio::test]
async fn trimprefix_no_args() {
assert!(field(&extractor("foobar"), "trimprefix").await.is_none());
assert!(
field(&extractor("foobar"), "trimprefix", None)
.await
.is_none()
);
}
#[tokio::test]
async fn trimsuffix_present() {
assert_eq!(
string(field(&extractor("foobar"), "trimsuffix(bar)").await),
string(field(&extractor("foobar"), "trimsuffix", Some("bar")).await),
Some("foo".into())
);
}
@@ -183,7 +176,7 @@ mod tests {
#[tokio::test]
async fn trimsuffix_absent() {
assert_eq!(
string(field(&extractor("foobar"), "trimsuffix(baz)").await),
string(field(&extractor("foobar"), "trimsuffix", Some("baz")).await),
Some("foobar".into())
);
}
@@ -191,7 +184,7 @@ mod tests {
#[tokio::test]
async fn split_basic() {
assert_eq!(
array(field(&extractor("a,b,c"), "split(,)").await),
array(field(&extractor("a,b,c"), "split", Some(",")).await),
vec!["a", "b", "c"]
);
}
@@ -199,23 +192,18 @@ mod tests {
#[tokio::test]
async fn split_no_match() {
assert_eq!(
array(field(&extractor("abc"), "split(,)").await),
array(field(&extractor("abc"), "split", Some(",")).await),
vec!["abc"]
);
}
#[tokio::test]
async fn split_no_args() {
assert!(field(&extractor("abc"), "split").await.is_none());
}
#[tokio::test]
async fn split_unclosed_paren() {
assert!(field(&extractor("abc"), "split(,").await.is_none());
assert!(field(&extractor("abc"), "split", None).await.is_none());
}
#[tokio::test]
async fn unknown_field() {
assert!(field(&extractor("abc"), "bogus").await.is_none());
assert!(field(&extractor("abc"), "bogus", None).await.is_none());
}
}

View File

@@ -10,6 +10,7 @@ pub trait ObjectExtractor: Send + Sync {
async fn field(
&self,
name: &pile_config::Label,
args: Option<&str>,
) -> Result<Option<crate::value::PileValue>, std::io::Error>;
/// Return all fields in this extractor.
@@ -22,7 +23,7 @@ pub trait ObjectExtractor: Send + Sync {
let keys = self.fields().await?;
let mut map = serde_json::Map::new();
for k in &keys {
let v = match self.field(k).await? {
let v = match self.field(k, None).await? {
Some(x) => x,
None => continue,
};

View File

@@ -97,7 +97,7 @@ impl PileValue {
for s in &query.segments {
match s {
PathSegment::Root => out = Some(self.clone()),
PathSegment::Field(field) => {
PathSegment::Field { name, args } => {
let e = match out.map(|x| x.object_extractor()) {
Some(e) => e,
None => {
@@ -106,7 +106,7 @@ impl PileValue {
}
};
out = e.field(field).await?;
out = e.field(name, args.as_deref()).await?;
}
PathSegment::Index(idx) => {
@@ -163,7 +163,7 @@ impl PileValue {
let keys = e.fields().await?;
let mut map = Map::new();
for k in &keys {
let v = match e.field(k).await? {
let v = match e.field(k, None).await? {
Some(x) => x,
None => continue,
};
@@ -216,7 +216,7 @@ impl PileValue {
let keys = e.fields().await?;
let mut map = Map::new();
for k in &keys {
let v = match e.field(k).await? {
let v = match e.field(k, None).await? {
Some(x) => x,
None => continue,
};