Add epub extractor

This commit is contained in:
2026-03-09 22:34:39 -07:00
parent aecc84233b
commit ad41a8abbd
7 changed files with 269 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
use pile_config::Label;
use std::{collections::HashMap, sync::Arc};
mod epub_meta;
pub use epub_meta::*;
mod epub_text;
pub use epub_text::*;
use crate::{
Item, PileValue,
extract::{Extractor, MapExtractor},
};
pub struct EpubExtractor<'a> {
inner: MapExtractor<'a>,
}
impl<'a> EpubExtractor<'a> {
#[expect(clippy::unwrap_used)]
pub fn new(item: &'a Item) -> Self {
let inner = MapExtractor {
inner: HashMap::from([
(
Label::new("text").unwrap(),
PileValue::Extractor(Arc::new(EpubTextExtractor::new(item))),
),
(
Label::new("meta").unwrap(),
PileValue::Extractor(Arc::new(EpubMetaExtractor::new(item))),
),
]),
};
Self { inner }
}
}
#[async_trait::async_trait]
impl Extractor for EpubExtractor<'_> {
async fn field<'a>(
&'a self,
name: &pile_config::Label,
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
#[expect(clippy::unwrap_used)]
if name.as_str() == "text" {
match self.inner.inner.get(name).unwrap() {
PileValue::Extractor(x) => return x.field(name).await,
_ => unreachable!(),
};
}
self.inner.field(name).await
}
#[expect(clippy::unwrap_used)]
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
Ok(vec![
Label::new("text").unwrap(),
Label::new("meta").unwrap(),
])
}
}