Refactor errors
Some checks failed
CI / Typos (push) Successful in 28s
CI / Clippy (push) Failing after 1m21s
CI / Build and test (all features) (push) Successful in 4m18s
CI / Build and test (push) Successful in 6m10s

This commit is contained in:
2026-03-12 23:04:59 -07:00
parent 15e56d895c
commit 95a547045d
17 changed files with 192 additions and 161 deletions

1
Cargo.lock generated
View File

@@ -2559,6 +2559,7 @@ dependencies = [
name = "pile-value" name = "pile-value"
version = "0.0.2" version = "0.0.2"
dependencies = [ dependencies = [
"anyhow",
"async-trait", "async-trait",
"aws-sdk-s3", "aws-sdk-s3",
"blake3", "blake3",

View File

@@ -11,6 +11,7 @@ workspace = true
pile-config = { workspace = true } pile-config = { workspace = true }
pile-flac = { workspace = true } pile-flac = { workspace = true }
anyhow = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
walkdir = { workspace = true } walkdir = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }

View File

@@ -29,16 +29,9 @@ impl EpubMetaExtractor {
return Ok(x); return Ok(x);
} }
let key = self.item.key();
let ext = key.as_str().rsplit('.').next();
if !matches!(ext, Some("epub")) {
return Ok(self.output.get_or_init(HashMap::new));
}
let reader = SyncReadBridge::new_current(self.item.read().await?); let reader = SyncReadBridge::new_current(self.item.read().await?);
let raw_meta = tokio::task::spawn_blocking(move || { let raw_meta = tokio::task::spawn_blocking(move || {
let doc = EpubDoc::from_reader(reader) let doc = EpubDoc::from_reader(reader)?;
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
let fields: &[&'static str] = &[ let fields: &[&'static str] = &[
"title", "title",
@@ -54,17 +47,19 @@ impl EpubMetaExtractor {
let meta: Vec<(&'static str, Option<String>)> = let meta: Vec<(&'static str, Option<String>)> =
fields.iter().map(|&key| (key, doc.mdata(key))).collect(); fields.iter().map(|&key| (key, doc.mdata(key))).collect();
Ok::<_, std::io::Error>(meta) Ok::<_, anyhow::Error>(meta)
}) })
.await .await?;
.map_err(std::io::Error::other)?;
let raw_meta = match raw_meta { let raw_meta = match raw_meta {
Ok(x) => x, Ok(x) => x,
Err(error) => match error.downcast::<std::io::Error>() {
Ok(x) => return Err(x),
Err(error) => { Err(error) => {
trace!(message = "Could not process epub", ?error, key = ?self.item.key()); trace!(message = "Could not process epub", ?error, key = ?self.item.key());
return Ok(self.output.get_or_init(HashMap::new)); return Ok(self.output.get_or_init(HashMap::new));
} }
},
}; };
let mut output: HashMap<Label, PileValue> = HashMap::new(); let mut output: HashMap<Label, PileValue> = HashMap::new();

View File

@@ -4,7 +4,7 @@ use std::{
collections::HashMap, collections::HashMap,
sync::{Arc, OnceLock}, sync::{Arc, OnceLock},
}; };
use tracing::debug; use tracing::trace;
use crate::{ use crate::{
extract::traits::ObjectExtractor, extract::traits::ObjectExtractor,
@@ -29,16 +29,9 @@ impl EpubTextExtractor {
return Ok(x); return Ok(x);
} }
let key = self.item.key();
let ext = key.as_str().rsplit('.').next();
if !matches!(ext, Some("epub")) {
return Ok(self.output.get_or_init(HashMap::new));
}
let reader = SyncReadBridge::new_current(self.item.read().await?); let reader = SyncReadBridge::new_current(self.item.read().await?);
let raw_text = tokio::task::spawn_blocking(move || { let raw_text = tokio::task::spawn_blocking(move || {
let mut doc = EpubDoc::from_reader(reader) let mut doc = EpubDoc::from_reader(reader)?;
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
let mut text_parts: Vec<String> = Vec::new(); let mut text_parts: Vec<String> = Vec::new();
@@ -51,17 +44,19 @@ impl EpubTextExtractor {
} }
} }
Ok::<_, std::io::Error>(text_parts.join(" ")) Ok::<_, anyhow::Error>(text_parts.join(" "))
}) })
.await .await?;
.map_err(std::io::Error::other)?;
let raw_text = match raw_text { let raw_text = match raw_text {
Ok(x) => x, Ok(x) => x,
Err(error) => match error.downcast::<std::io::Error>() {
Ok(x) => return Err(x),
Err(error) => { Err(error) => {
debug!(message = "Could not process epub", ?error, key = ?self.item.key()); trace!(message = "Could not process epub", ?error, key = ?self.item.key());
return Ok(self.output.get_or_init(HashMap::new)); return Ok(self.output.get_or_init(HashMap::new));
} }
},
}; };
#[expect(clippy::unwrap_used)] #[expect(clippy::unwrap_used)]

View File

@@ -34,7 +34,13 @@ impl ObjectExtractor for EpubExtractor {
args: Option<&str>, args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> { ) -> Result<Option<PileValue>, std::io::Error> {
match (name.as_str(), args) { match (name.as_str(), args) {
("text", args) => self.text.field(name, args).await, ("text", args) => Ok(Some(
self.text
.field(name, args)
.await
.map(|x| x.unwrap_or(PileValue::Null))?,
)),
("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))), ("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
_ => Ok(None), _ => Ok(None),
} }

View File

@@ -32,9 +32,7 @@ impl ExifExtractor {
let reader = SyncReadBridge::new_current(self.item.read().await?); let reader = SyncReadBridge::new_current(self.item.read().await?);
let raw_fields = tokio::task::spawn_blocking(move || { let raw_fields = tokio::task::spawn_blocking(move || {
let mut br = BufReader::new(reader); let mut br = BufReader::new(reader);
let exif = exif::Reader::new() let exif = exif::Reader::new().read_from_container(&mut br)?;
.read_from_container(&mut br)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
let fields: Vec<(String, String)> = exif let fields: Vec<(String, String)> = exif
.fields() .fields()
@@ -46,13 +44,13 @@ impl ExifExtractor {
}) })
.collect(); .collect();
Ok::<_, std::io::Error>(fields) Ok::<_, exif::Error>(fields)
}) })
.await .await?;
.map_err(std::io::Error::other)?;
let raw_fields = match raw_fields { let raw_fields = match raw_fields {
Ok(x) => x, Ok(x) => x,
Err(exif::Error::Io(x)) => return Err(x),
Err(error) => { Err(error) => {
trace!(message = "Could not process exif", ?error, key = ?self.item.key()); trace!(message = "Could not process exif", ?error, key = ?self.item.key());
return Ok(self.output.get_or_init(HashMap::new)); return Ok(self.output.get_or_init(HashMap::new));
@@ -65,6 +63,7 @@ impl ExifExtractor {
let Some(label) = tag_to_label(&tag_name) else { let Some(label) = tag_to_label(&tag_name) else {
continue; continue;
}; };
// First occurrence wins (PRIMARY IFD comes before THUMBNAIL) // First occurrence wins (PRIMARY IFD comes before THUMBNAIL)
output output
.entry(label) .entry(label)
@@ -91,6 +90,12 @@ impl ObjectExtractor for ExifExtractor {
name: &Label, name: &Label,
args: Option<&str>, args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> { ) -> Result<Option<PileValue>, std::io::Error> {
trace!(
?args,
key = self.item.key().as_str(),
"Getting field {name:?} from ExifExtractor",
);
if args.is_some() { if args.is_some() {
return Ok(None); return Ok(None);
} }

View File

@@ -1,11 +1,12 @@
use mime::Mime; use mime::Mime;
use pile_config::Label; use pile_config::Label;
use pile_flac::{FlacBlock, FlacReader}; use pile_flac::{FlacBlock, FlacDecodeError, FlacReader};
use std::{ use std::{
collections::HashMap, collections::HashMap,
io::BufReader, io::BufReader,
sync::{Arc, OnceLock}, sync::{Arc, OnceLock},
}; };
use tracing::trace;
use crate::{ use crate::{
extract::traits::{ListExtractor, ObjectExtractor}, extract::traits::{ListExtractor, ObjectExtractor},
@@ -31,16 +32,17 @@ impl FlacImagesExtractor {
let reader = FlacReader::new(BufReader::new(reader)); let reader = FlacReader::new(BufReader::new(reader));
let mut count = 0usize; let mut count = 0usize;
for block in reader { for block in reader {
match block.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))? { match block {
FlacBlock::AudioFrame(_) => break, Ok(FlacBlock::AudioFrame(_)) => break,
FlacBlock::Picture(_) => count += 1, Ok(FlacBlock::Picture(_)) => count += 1,
Err(FlacDecodeError::IoError(err)) => return Err(err),
Err(_) => return Ok(0),
_ => {} _ => {}
} }
} }
Ok::<_, std::io::Error>(count) Ok::<_, std::io::Error>(count)
}) })
.await .await??;
.map_err(std::io::Error::other)??;
return Ok(count); return Ok(count);
} }
@@ -49,14 +51,20 @@ impl FlacImagesExtractor {
#[async_trait::async_trait] #[async_trait::async_trait]
impl ListExtractor for FlacImagesExtractor { impl ListExtractor for FlacImagesExtractor {
async fn get<'a>(&'a self, mut idx: usize) -> Result<Option<PileValue>, std::io::Error> { async fn get<'a>(&'a self, mut idx: usize) -> Result<Option<PileValue>, std::io::Error> {
trace!(
key = self.item.key().as_str(),
"Getting index {idx} from FlacImagesExtractor",
);
let key = self.item.key();
let reader = SyncReadBridge::new_current(self.item.read().await?); let reader = SyncReadBridge::new_current(self.item.read().await?);
let image = tokio::task::spawn_blocking(move || { let image = tokio::task::spawn_blocking(move || {
let reader = FlacReader::new(BufReader::new(reader)); let reader = FlacReader::new(BufReader::new(reader));
let mut out: Option<(Mime, Vec<u8>)> = None; let mut out: Option<(Mime, Vec<u8>)> = None;
'blocks: for block in reader { 'blocks: for block in reader {
match block.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))? { match block {
FlacBlock::AudioFrame(_) => break, Ok(FlacBlock::AudioFrame(_)) => break,
FlacBlock::Picture(picture) => { Ok(FlacBlock::Picture(picture)) => {
if idx > 0 { if idx > 0 {
idx -= 1; idx -= 1;
continue; continue;
@@ -66,6 +74,16 @@ impl ListExtractor for FlacImagesExtractor {
break 'blocks; break 'blocks;
} }
Err(FlacDecodeError::IoError(err)) => return Err(err),
Err(error) => {
trace!(
message = "Could not parse FLAC images",
key = key.as_str(),
?error
);
return Ok(None);
}
_ => {} _ => {}
} }
} }
@@ -93,23 +111,15 @@ impl ListExtractor for FlacImagesExtractor {
pub struct FlacExtractor { pub struct FlacExtractor {
item: Item, item: Item,
output: OnceLock<HashMap<Label, PileValue>>, output: OnceLock<HashMap<Label, PileValue>>,
images: Option<PileValue>, images: PileValue,
} }
impl FlacExtractor { impl FlacExtractor {
pub fn new(item: &Item) -> Self { pub fn new(item: &Item) -> Self {
let is_flac = match item {
Item::File { path, .. } => path.to_str().unwrap_or_default().ends_with(".flac"),
Item::S3 { key, .. } => key.ends_with(".flac"),
};
let images =
is_flac.then(|| PileValue::ListExtractor(Arc::new(FlacImagesExtractor::new(item))));
Self { Self {
item: item.clone(), item: item.clone(),
output: OnceLock::new(), output: OnceLock::new(),
images, images: PileValue::ListExtractor(Arc::new(FlacImagesExtractor::new(item))),
} }
} }
@@ -118,54 +128,55 @@ impl FlacExtractor {
return Ok(x); return Ok(x);
} }
let key = match &self.item { trace!(
Item::File { path, .. } => path.to_str().unwrap_or_default().to_owned(), message = "Reading FLAC tags",
Item::S3 { key, .. } => key.to_string(), key = self.item.key().as_str()
}; );
if !key.ends_with(".flac") {
let _ = self.output.set(HashMap::new());
#[expect(clippy::unwrap_used)]
return Ok(self.output.get().unwrap());
}
let key = self.item.key();
let reader = SyncReadBridge::new_current(self.item.read().await?); let reader = SyncReadBridge::new_current(self.item.read().await?);
let raw_tags = tokio::task::spawn_blocking(move || { let output = tokio::task::spawn_blocking(move || {
let reader = FlacReader::new(BufReader::new(reader)); let reader = FlacReader::new(BufReader::new(reader));
let mut tags: Vec<(String, String)> = Vec::new();
for block in reader {
match block.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))? {
FlacBlock::VorbisComment(comment) => {
for (k, v) in comment.comment.comments {
tags.push((k.to_string().to_lowercase(), v.into()));
}
}
FlacBlock::AudioFrame(_) => break,
_ => {}
}
}
Ok::<_, std::io::Error>(tags)
})
.await
.map_err(std::io::Error::other)??;
let mut output: HashMap<Label, Vec<PileValue>> = HashMap::new(); let mut output: HashMap<Label, Vec<PileValue>> = HashMap::new();
for (k, v) in raw_tags {
if let Some(label) = Label::new(k) { for block in reader {
match block {
Ok(FlacBlock::AudioFrame(_)) => break,
Ok(FlacBlock::VorbisComment(comment)) => {
for (k, v) in comment.comment.comments {
if let Some(label) = Label::new(k.to_string().to_lowercase()) {
output output
.entry(label) .entry(label)
.or_default() .or_default()
.push(PileValue::String(Arc::new(v.into()))); .push(PileValue::String(Arc::new(v)));
} }
} }
}
Err(FlacDecodeError::IoError(err)) => return Err(err),
Err(error) => {
trace!(
message = "Could not parse FLAC metadata",
key = key.as_str(),
?error
);
return Ok(HashMap::new());
}
_ => {}
}
}
let output: HashMap<Label, PileValue> = output let output: HashMap<Label, PileValue> = output
.into_iter() .into_iter()
.map(|(k, v)| (k, PileValue::Array(Arc::new(v)))) .map(|(k, v)| (k, PileValue::Array(Arc::new(v))))
.collect(); .collect();
let _ = self.output.set(output); Ok::<HashMap<Label, PileValue>, std::io::Error>(output)
#[expect(clippy::unwrap_used)] })
return Ok(self.output.get().unwrap()); .await??;
return Ok(self.output.get_or_init(|| output));
} }
} }
@@ -180,20 +191,21 @@ impl ObjectExtractor for FlacExtractor {
return Ok(None); return Ok(None);
} }
if name.as_str() == "images" if name.as_str() == "images" {
&& let Some(ref images) = self.images return Ok(Some(self.images.clone()));
{
return Ok(Some(images.clone()));
} }
Ok(self.get_inner().await?.get(name).cloned()) Ok(self.get_inner().await?.get(name).cloned())
} }
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
let mut fields = self.get_inner().await?.keys().cloned().collect::<Vec<_>>();
if self.images.is_some() {
#[expect(clippy::unwrap_used)] #[expect(clippy::unwrap_used)]
fields.push(Label::new("images").unwrap()); async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
} Ok(self
Ok(fields) .get_inner()
.await?
.keys()
.cloned()
.chain([Label::new("images").unwrap()])
.collect::<Vec<_>>())
} }
} }

View File

@@ -1,3 +1,7 @@
use crate::{
extract::traits::ObjectExtractor,
value::{Item, PileValue},
};
use pile_config::Label; use pile_config::Label;
use std::{ use std::{
collections::HashMap, collections::HashMap,
@@ -5,11 +9,6 @@ use std::{
sync::{Arc, OnceLock}, sync::{Arc, OnceLock},
}; };
use crate::{
extract::traits::ObjectExtractor,
value::{Item, PileValue},
};
pub struct FsExtractor { pub struct FsExtractor {
item: Item, item: Item,
output: OnceLock<HashMap<Label, PileValue>>, output: OnceLock<HashMap<Label, PileValue>>,
@@ -32,6 +31,26 @@ impl FsExtractor {
return Ok(self.output.get_or_init(HashMap::new)); return Ok(self.output.get_or_init(HashMap::new));
}; };
let mut root = false;
let components = path
.components()
.map(|x| match x {
Component::CurDir => None,
Component::Normal(x) => x.to_str().map(|x| x.to_owned()),
Component::ParentDir => Some("..".to_owned()),
Component::RootDir => {
root = true;
None
}
Component::Prefix(x) => x.as_os_str().to_str().map(|x| x.to_owned()),
})
.collect::<Option<Vec<_>>>();
let mut path_str = components.as_ref().map(|x| x.join("/"));
if root {
path_str = path_str.map(|x| format!("/{x}"));
}
#[expect(clippy::unwrap_used)] #[expect(clippy::unwrap_used)]
let output = HashMap::from([ let output = HashMap::from([
( (
@@ -43,23 +62,20 @@ impl FsExtractor {
), ),
( (
Label::new("path").unwrap(), Label::new("path").unwrap(),
path.to_str() path_str
.map(|x| PileValue::String(Arc::new(x.into()))) .map(|x| PileValue::String(Arc::new(x.into())))
.unwrap_or(PileValue::Null), .unwrap_or(PileValue::Null),
), ),
( (
Label::new("segments").unwrap(), Label::new("segments").unwrap(),
path.components() components
.map(|x| match x { .map(|x| {
Component::CurDir => Some(".".to_owned()), PileValue::Array(Arc::new(
Component::Normal(x) => x.to_str().map(|x| x.to_owned()), x.iter()
Component::ParentDir => Some("..".to_owned()), .map(|x| PileValue::String(Arc::new(x.into())))
Component::RootDir => Some("/".to_owned()), .collect(),
Component::Prefix(x) => x.as_os_str().to_str().map(|x| x.to_owned()), ))
}) })
.map(|x| x.map(|x| PileValue::String(Arc::new(x.into()))))
.collect::<Option<Vec<_>>>()
.map(|v| PileValue::Array(Arc::new(v)))
.unwrap_or(PileValue::Null), .unwrap_or(PileValue::Null),
), ),
]); ]);

View File

@@ -6,6 +6,7 @@ use std::{
io::BufReader, io::BufReader,
sync::{Arc, OnceLock}, sync::{Arc, OnceLock},
}; };
use tracing::trace;
use crate::{ use crate::{
extract::traits::ObjectExtractor, extract::traits::ObjectExtractor,
@@ -30,32 +31,29 @@ impl Id3Extractor {
return Ok(x); return Ok(x);
} }
let key = self.item.key(); trace!(message = "Reading id3 tags", key = self.item.key().as_str());
let ext = key.as_str().rsplit('.').next();
if !matches!(ext, Some("mp3") | Some("aiff") | Some("aif") | Some("wav")) {
return Ok(self.output.get_or_init(HashMap::new));
}
let key = self.item.key();
let reader = SyncReadBridge::new_current(self.item.read().await?); let reader = SyncReadBridge::new_current(self.item.read().await?);
let tag = match tokio::task::spawn_blocking(move || Tag::read_from2(BufReader::new(reader))) let tag = match tokio::task::spawn_blocking(move || Tag::read_from2(BufReader::new(reader)))
.await .await
{ {
Ok(Ok(tag)) => tag, Ok(Ok(tag)) => tag,
Ok(Err(id3::Error { Err(e) => return Err(e.into()),
kind: id3::ErrorKind::NoTag,
..
})) => {
return Ok(self.output.get_or_init(HashMap::new));
}
Ok(Err(id3::Error { Ok(Err(id3::Error {
kind: id3::ErrorKind::Io(e), kind: id3::ErrorKind::Io(e),
.. ..
})) => return Err(e), })) => return Err(e),
Ok(Err(e)) => return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e)), Ok(Err(error)) => {
Err(e) => return Err(e.into()), trace!(
message = "Could not parse id3 tags",
key = key.as_str(),
?error
);
return Ok(self.output.get_or_init(HashMap::new));
}
}; };
let mut output: HashMap<Label, Vec<PileValue>> = HashMap::new(); let mut output: HashMap<Label, Vec<PileValue>> = HashMap::new();

View File

@@ -1,5 +1,6 @@
use pile_config::Label; use pile_config::Label;
use std::sync::Arc; use std::sync::Arc;
use tracing::trace;
#[cfg(feature = "pdfium")] #[cfg(feature = "pdfium")]
mod pdf_pages; mod pdf_pages;
@@ -42,6 +43,12 @@ impl ObjectExtractor for PdfExtractor {
name: &pile_config::Label, name: &pile_config::Label,
args: Option<&str>, args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> { ) -> Result<Option<PileValue>, std::io::Error> {
trace!(
?args,
key = self.text.item.key().as_str(),
"Getting field {name:?} from PdfExtractor",
);
match (name.as_str(), args) { match (name.as_str(), args) {
("text", args) => self.text.field(name, args).await, ("text", args) => self.text.field(name, args).await,
("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))), ("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
@@ -57,8 +64,6 @@ impl ObjectExtractor for PdfExtractor {
Label::new("text").unwrap(), Label::new("text").unwrap(),
Label::new("meta").unwrap(), Label::new("meta").unwrap(),
#[cfg(feature = "pdfium")] #[cfg(feature = "pdfium")]
Label::new("cover").unwrap(),
#[cfg(feature = "pdfium")]
Label::new("pages").unwrap(), Label::new("pages").unwrap(),
]) ])
} }

View File

@@ -35,6 +35,11 @@ impl PdfPagesExtractor {
#[async_trait::async_trait] #[async_trait::async_trait]
impl ListExtractor for PdfPagesExtractor { impl ListExtractor for PdfPagesExtractor {
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> { async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
trace!(
key = self.item.key().as_str(),
"Getting index {idx} from PdfPagesExtractor",
);
let bytes = self.get_bytes().await?; let bytes = self.get_bytes().await?;
let png = tokio::task::spawn_blocking(move || { let png = tokio::task::spawn_blocking(move || {
let pdfium = Pdfium::default(); let pdfium = Pdfium::default();

View File

@@ -14,7 +14,7 @@ use crate::{
}; };
pub struct PdfTextExtractor { pub struct PdfTextExtractor {
item: Item, pub(super) item: Item,
output: OnceLock<HashMap<Label, PileValue>>, output: OnceLock<HashMap<Label, PileValue>>,
} }

View File

@@ -1,5 +1,6 @@
use pile_config::Label; use pile_config::Label;
use std::sync::OnceLock; use std::sync::OnceLock;
use tracing::trace;
use super::TomlExtractor; use super::TomlExtractor;
use crate::{ use crate::{
@@ -28,6 +29,12 @@ impl ObjectExtractor for SidecarExtractor {
name: &Label, name: &Label,
args: Option<&str>, args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> { ) -> Result<Option<PileValue>, std::io::Error> {
trace!(
?args,
key = self.item.key().as_str(),
"Getting field {name:?} from SidecarExtractor",
);
match self match self
.output .output
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new)) .get_or_init(|| self.item.sidecar().map(TomlExtractor::new))

View File

@@ -41,13 +41,7 @@ impl TomlExtractor {
return Ok(x); return Ok(x);
} }
let mut reader = match self.item.read().await { let mut reader = self.item.read().await?;
Ok(r) => r,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
return Ok(self.output.get_or_init(HashMap::new));
}
Err(e) => return Err(e),
};
let bytes = reader.read_to_end().await?; let bytes = reader.read_to_end().await?;
let toml: toml::Value = match toml::from_slice(&bytes) { let toml: toml::Value = match toml::from_slice(&bytes) {
Ok(x) => x, Ok(x) => x,

View File

@@ -79,6 +79,7 @@ impl ObjectExtractor for StringExtractor {
} }
#[cfg(test)] #[cfg(test)]
#[expect(clippy::expect_used)]
mod tests { mod tests {
use super::*; use super::*;

View File

@@ -7,6 +7,12 @@ pub trait ObjectExtractor: Send + Sync {
/// Get the field at `name` from `item`. /// Get the field at `name` from `item`.
/// - returns `None` if `name` is not a valid field /// - returns `None` if `name` is not a valid field
/// - returns `Some(Null)` if `name` is not available /// - returns `Some(Null)` if `name` is not available
///
/// For extractors that parse binary, this fn should return
/// an error only if we failed to obtain the data we need (permission denied, etc).
///
/// If the underlying data has an invalid format (e.g, running a pdf extractor on a non-pdf file),
/// this fn should return `Ok(Some(None))`.
async fn field( async fn field(
&self, &self,
name: &pile_config::Label, name: &pile_config::Label,

View File

@@ -199,30 +199,14 @@ impl PileValue {
Value::String(format!("<Blob ({mime}, {} bytes)>", bytes.len())) Value::String(format!("<Blob ({mime}, {} bytes)>", bytes.len()))
} }
#[expect(clippy::expect_used)]
Self::Array(_) | Self::ListExtractor(_) => { Self::Array(_) | Self::ListExtractor(_) => {
let e = self.list_extractor(); let e = self.list_extractor();
let len = e.len().await?; return e.to_json().await;
let mut arr = Vec::new();
for i in 0..len {
let v = e.get(i).await?.expect("item must be present");
arr.push(Box::pin(v.to_json()).await?);
}
Value::Array(arr)
} }
Self::ObjectExtractor(_) | Self::Item(_) => { Self::ObjectExtractor(_) | Self::Item(_) => {
let e = self.object_extractor(); let e = self.object_extractor();
let keys = e.fields().await?; return e.to_json().await;
let mut map = Map::new();
for k in &keys {
let v = match e.field(k, None).await? {
Some(x) => x,
None => continue,
};
map.insert(k.to_string(), Box::pin(v.to_json()).await?);
}
Value::Object(map)
} }
}) })
} }