Extract epub covers

This commit is contained in:
2026-03-23 22:40:32 -07:00
parent 0792b2f2c6
commit d95ebeaba0
2 changed files with 95 additions and 0 deletions

View File

@@ -0,0 +1,76 @@
use epub::doc::EpubDoc;
use mime::Mime;
use pile_io::SyncReadBridge;
use std::sync::{Arc, OnceLock};
use tracing::trace;
use crate::{
extract::traits::ExtractState,
value::{Item, PileValue},
};
pub struct EpubCoverExtractor {
item: Item,
output: OnceLock<Option<(Mime, Vec<u8>)>>,
}
impl EpubCoverExtractor {
pub fn new(item: &Item) -> Self {
Self {
item: item.clone(),
output: OnceLock::new(),
}
}
async fn get_inner(&self) -> Result<Option<&(Mime, Vec<u8>)>, std::io::Error> {
if let Some(x) = self.output.get() {
return Ok(x.as_ref());
}
let reader = SyncReadBridge::new_current(self.item.read().await?);
let result = tokio::task::spawn_blocking(move || {
let mut doc = EpubDoc::from_reader(reader)?;
let cover_id = match doc.get_cover_id() {
Ok(id) => id,
Err(_) => return Ok::<_, anyhow::Error>(None),
};
let mime: Mime = doc
.resources
.get(&cover_id)
.and_then(|(_, mime_str)| mime_str.parse().ok())
.unwrap_or(mime::IMAGE_JPEG);
let bytes = doc.get_cover()?;
Ok(Some((mime, bytes)))
})
.await?;
let result = match result {
Ok(x) => x,
Err(error) => match error.downcast::<std::io::Error>() {
Ok(x) => return Err(x),
Err(error) => {
trace!(message = "Could not extract epub cover", ?error, key = ?self.item.key());
None
}
},
};
Ok(self.output.get_or_init(|| result).as_ref())
}
pub async fn get(&self, state: &ExtractState) -> Result<Option<PileValue>, std::io::Error> {
if !state.ignore_mime && self.item.mime().essence_str() != "application/epub+zip" {
return Ok(None);
}
Ok(self
.get_inner()
.await?
.map(|(mime, bytes)| PileValue::Blob {
mime: mime.clone(),
bytes: Arc::new(bytes.clone()),
}))
}
}

View File

@@ -1,6 +1,9 @@
use pile_config::Label; use pile_config::Label;
use std::sync::Arc; use std::sync::Arc;
mod epub_cover;
pub use epub_cover::*;
mod epub_meta; mod epub_meta;
pub use epub_meta::*; pub use epub_meta::*;
@@ -15,6 +18,7 @@ use crate::{
pub struct EpubExtractor { pub struct EpubExtractor {
text: Arc<EpubTextExtractor>, text: Arc<EpubTextExtractor>,
meta: Arc<EpubMetaExtractor>, meta: Arc<EpubMetaExtractor>,
cover: Arc<EpubCoverExtractor>,
} }
impl EpubExtractor { impl EpubExtractor {
@@ -22,6 +26,7 @@ impl EpubExtractor {
Self { Self {
text: Arc::new(EpubTextExtractor::new(item)), text: Arc::new(EpubTextExtractor::new(item)),
meta: Arc::new(EpubMetaExtractor::new(item)), meta: Arc::new(EpubMetaExtractor::new(item)),
cover: Arc::new(EpubCoverExtractor::new(item)),
} }
} }
} }
@@ -43,6 +48,7 @@ impl ObjectExtractor for EpubExtractor {
)), )),
("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))), ("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
("cover", None) => self.cover.get(state).await,
_ => Ok(None), _ => Ok(None),
} }
} }
@@ -52,6 +58,7 @@ impl ObjectExtractor for EpubExtractor {
Ok(vec![ Ok(vec![
Label::new("text").unwrap(), Label::new("text").unwrap(),
Label::new("meta").unwrap(), Label::new("meta").unwrap(),
Label::new("cover").unwrap(),
]) ])
} }
@@ -78,6 +85,18 @@ impl ObjectExtractor for EpubExtractor {
continue; continue;
} }
if k.as_str() == "cover" {
let summary = match &v {
PileValue::Blob { mime, bytes } => {
format!("<Blob ({}, {} bytes)>", mime, bytes.len())
}
PileValue::Null => "<null>".to_owned(),
_ => "<cover>".to_owned(),
};
map.insert(k.to_string(), serde_json::Value::String(summary));
continue;
}
map.insert(k.to_string(), Box::pin(v.to_json(state)).await?); map.insert(k.to_string(), Box::pin(v.to_json(state)).await?);
} }