diff --git a/crates/pile-value/src/extract/item/epub/epub_cover.rs b/crates/pile-value/src/extract/item/epub/epub_cover.rs new file mode 100644 index 0000000..2623309 --- /dev/null +++ b/crates/pile-value/src/extract/item/epub/epub_cover.rs @@ -0,0 +1,76 @@ +use epub::doc::EpubDoc; +use mime::Mime; +use pile_io::SyncReadBridge; +use std::sync::{Arc, OnceLock}; +use tracing::trace; + +use crate::{ + extract::traits::ExtractState, + value::{Item, PileValue}, +}; + +pub struct EpubCoverExtractor { + item: Item, + output: OnceLock)>>, +} + +impl EpubCoverExtractor { + pub fn new(item: &Item) -> Self { + Self { + item: item.clone(), + output: OnceLock::new(), + } + } + + async fn get_inner(&self) -> Result)>, std::io::Error> { + if let Some(x) = self.output.get() { + return Ok(x.as_ref()); + } + + let reader = SyncReadBridge::new_current(self.item.read().await?); + let result = tokio::task::spawn_blocking(move || { + let mut doc = EpubDoc::from_reader(reader)?; + let cover_id = match doc.get_cover_id() { + Ok(id) => id, + Err(_) => return Ok::<_, anyhow::Error>(None), + }; + + let mime: Mime = doc + .resources + .get(&cover_id) + .and_then(|(_, mime_str)| mime_str.parse().ok()) + .unwrap_or(mime::IMAGE_JPEG); + + let bytes = doc.get_cover()?; + Ok(Some((mime, bytes))) + }) + .await?; + + let result = match result { + Ok(x) => x, + Err(error) => match error.downcast::() { + Ok(x) => return Err(x), + Err(error) => { + trace!(message = "Could not extract epub cover", ?error, key = ?self.item.key()); + None + } + }, + }; + + Ok(self.output.get_or_init(|| result).as_ref()) + } + + pub async fn get(&self, state: &ExtractState) -> Result, std::io::Error> { + if !state.ignore_mime && self.item.mime().essence_str() != "application/epub+zip" { + return Ok(None); + } + + Ok(self + .get_inner() + .await? + .map(|(mime, bytes)| PileValue::Blob { + mime: mime.clone(), + bytes: Arc::new(bytes.clone()), + })) + } +} diff --git a/crates/pile-value/src/extract/item/epub/mod.rs b/crates/pile-value/src/extract/item/epub/mod.rs index a67e4f4..d81e129 100644 --- a/crates/pile-value/src/extract/item/epub/mod.rs +++ b/crates/pile-value/src/extract/item/epub/mod.rs @@ -1,6 +1,9 @@ use pile_config::Label; use std::sync::Arc; +mod epub_cover; +pub use epub_cover::*; + mod epub_meta; pub use epub_meta::*; @@ -15,6 +18,7 @@ use crate::{ pub struct EpubExtractor { text: Arc, meta: Arc, + cover: Arc, } impl EpubExtractor { @@ -22,6 +26,7 @@ impl EpubExtractor { Self { text: Arc::new(EpubTextExtractor::new(item)), meta: Arc::new(EpubMetaExtractor::new(item)), + cover: Arc::new(EpubCoverExtractor::new(item)), } } } @@ -43,6 +48,7 @@ impl ObjectExtractor for EpubExtractor { )), ("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))), + ("cover", None) => self.cover.get(state).await, _ => Ok(None), } } @@ -52,6 +58,7 @@ impl ObjectExtractor for EpubExtractor { Ok(vec![ Label::new("text").unwrap(), Label::new("meta").unwrap(), + Label::new("cover").unwrap(), ]) } @@ -78,6 +85,18 @@ impl ObjectExtractor for EpubExtractor { continue; } + if k.as_str() == "cover" { + let summary = match &v { + PileValue::Blob { mime, bytes } => { + format!("", mime, bytes.len()) + } + PileValue::Null => "".to_owned(), + _ => "".to_owned(), + }; + map.insert(k.to_string(), serde_json::Value::String(summary)); + continue; + } + map.insert(k.to_string(), Box::pin(v.to_json(state)).await?); }