Files
pile/crates/pile-dataset/src/extract/mod.rs
2026-03-10 09:19:14 -07:00

127 lines
2.7 KiB
Rust

use pile_config::Label;
use std::{collections::HashMap, sync::Arc};
mod flac;
pub use flac::*;
mod id3;
pub use id3::*;
mod fs;
pub use fs::*;
mod epub;
pub use epub::*;
mod exif;
pub use exif::*;
mod pdf;
pub use pdf::*;
mod toml;
pub use toml::*;
mod map;
pub use map::*;
mod sidecar;
pub use sidecar::*;
use crate::Item;
/// An attachment that extracts metadata from an [Item].
///
/// Metadata is exposed as an immutable map of {label: value},
/// much like a json object.
#[async_trait::async_trait]
pub trait Extractor: Send + Sync {
/// Get the field at `name` from `item`.
/// - returns `None` if `name` is not a valid field
/// - returns `Some(Null)` if `name` is not available
async fn field<'a>(
&'a self,
name: &pile_config::Label,
) -> Result<Option<&'a crate::PileValue<'a>>, std::io::Error>;
/// Return all fields in this extractor.
/// `Self::field` must return [Some] for all these keys
/// and [None] for all others.
async fn fields(&self) -> Result<Vec<Label>, std::io::Error>;
}
pub struct MetaExtractor<'a> {
inner: MapExtractor<'a>,
}
//
// MARK: file
//
impl<'a> MetaExtractor<'a> {
#[expect(clippy::unwrap_used)]
pub fn new(item: &'a Item) -> Self {
let inner = MapExtractor {
inner: HashMap::from([
(
Label::new("flac").unwrap(),
crate::PileValue::Extractor(Arc::new(FlacExtractor::new(item))),
),
(
Label::new("id3").unwrap(),
crate::PileValue::Extractor(Arc::new(Id3Extractor::new(item))),
),
(
Label::new("fs").unwrap(),
crate::PileValue::Extractor(Arc::new(FsExtractor::new(item))),
),
(
Label::new("epub").unwrap(),
crate::PileValue::Extractor(Arc::new(EpubExtractor::new(item))),
),
(
Label::new("exif").unwrap(),
crate::PileValue::Extractor(Arc::new(ExifExtractor::new(item))),
),
(
Label::new("pdf").unwrap(),
crate::PileValue::Extractor(Arc::new(PdfExtractor::new(item))),
),
(
Label::new("toml").unwrap(),
crate::PileValue::Extractor(Arc::new(TomlExtractor::new(item))),
),
(
Label::new("sidecar").unwrap(),
crate::PileValue::Extractor(Arc::new(SidecarExtractor::new(item))),
),
]),
};
Self { inner }
}
}
#[async_trait::async_trait]
impl Extractor for MetaExtractor<'_> {
async fn field<'a>(
&'a self,
name: &pile_config::Label,
) -> Result<Option<&'a crate::PileValue<'a>>, std::io::Error> {
self.inner.field(name).await
}
#[expect(clippy::unwrap_used)]
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
return Ok(vec![
Label::new("flac").unwrap(),
Label::new("id3").unwrap(),
Label::new("fs").unwrap(),
Label::new("epub").unwrap(),
Label::new("exif").unwrap(),
Label::new("pdf").unwrap(),
Label::new("sidecar").unwrap(),
]);
}
}