use epub::doc::EpubDoc; use pile_config::Label; use std::{ collections::HashMap, sync::{Arc, OnceLock}, }; use tracing::trace; use crate::{ extract::traits::ObjectExtractor, value::{Item, PileValue, SyncReadBridge}, }; pub struct EpubMetaExtractor { item: Item, output: OnceLock>, } impl EpubMetaExtractor { pub fn new(item: &Item) -> Self { Self { item: item.clone(), output: OnceLock::new(), } } async fn get_inner(&self) -> Result<&HashMap, std::io::Error> { if let Some(x) = self.output.get() { return Ok(x); } let key = self.item.key(); let ext = key.as_str().rsplit('.').next(); if !matches!(ext, Some("epub")) { return Ok(self.output.get_or_init(HashMap::new)); } let reader = SyncReadBridge::new_current(self.item.read().await?); let raw_meta = tokio::task::spawn_blocking(move || { let doc = EpubDoc::from_reader(reader) .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?; let fields: &[&'static str] = &[ "title", "creator", "description", "language", "publisher", "date", "subject", "identifier", ]; let meta: Vec<(&'static str, Option)> = fields.iter().map(|&key| (key, doc.mdata(key))).collect(); Ok::<_, std::io::Error>(meta) }) .await .map_err(std::io::Error::other)?; let raw_meta = match raw_meta { Ok(x) => x, Err(error) => { trace!(message = "Could not process epub", ?error, key = ?self.item.key()); return Ok(self.output.get_or_init(HashMap::new)); } }; let mut output: HashMap = HashMap::new(); #[expect(clippy::unwrap_used)] for (key, val) in raw_meta { let label = Label::new(key).unwrap(); let value = match val { Some(s) => PileValue::String(Arc::new(s.into())), None => PileValue::Null, }; output.insert(label, value); } return Ok(self.output.get_or_init(|| output)); } } #[async_trait::async_trait] impl ObjectExtractor for EpubMetaExtractor { async fn field(&self, name: &Label) -> Result, std::io::Error> { Ok(self.get_inner().await?.get(name).cloned()) } async fn fields(&self) -> Result, std::io::Error> { Ok(self.get_inner().await?.keys().cloned().collect()) } }