87 lines
1.8 KiB
Rust
87 lines
1.8 KiB
Rust
use pile_config::Label;
|
|
use std::sync::Arc;
|
|
|
|
mod epub_meta;
|
|
pub use epub_meta::*;
|
|
|
|
mod epub_text;
|
|
pub use epub_text::*;
|
|
|
|
use crate::{
|
|
extract::traits::{ExtractState, ObjectExtractor},
|
|
value::{Item, PileValue},
|
|
};
|
|
|
|
pub struct EpubExtractor {
|
|
text: Arc<EpubTextExtractor>,
|
|
meta: Arc<EpubMetaExtractor>,
|
|
}
|
|
|
|
impl EpubExtractor {
|
|
pub fn new(item: &Item) -> Self {
|
|
Self {
|
|
text: Arc::new(EpubTextExtractor::new(item)),
|
|
meta: Arc::new(EpubMetaExtractor::new(item)),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait]
|
|
impl ObjectExtractor for EpubExtractor {
|
|
async fn field(
|
|
&self,
|
|
state: &ExtractState,
|
|
name: &pile_config::Label,
|
|
args: Option<&str>,
|
|
) -> Result<Option<PileValue>, std::io::Error> {
|
|
match (name.as_str(), args) {
|
|
("text", args) => Ok(Some(
|
|
self.text
|
|
.field(state, name, args)
|
|
.await
|
|
.map(|x| x.unwrap_or(PileValue::Null))?,
|
|
)),
|
|
|
|
("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
|
|
_ => Ok(None),
|
|
}
|
|
}
|
|
|
|
#[expect(clippy::unwrap_used)]
|
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
|
Ok(vec![
|
|
Label::new("text").unwrap(),
|
|
Label::new("meta").unwrap(),
|
|
])
|
|
}
|
|
|
|
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
|
|
let keys = self.fields().await?;
|
|
let mut map = serde_json::Map::new();
|
|
for k in &keys {
|
|
let v = match self.field(state, k, None).await? {
|
|
Some(x) => x,
|
|
None => continue,
|
|
};
|
|
|
|
if k.as_str() == "text" {
|
|
map.insert(
|
|
k.to_string(),
|
|
serde_json::Value::String(format!(
|
|
"<String ({} bytes)",
|
|
match v {
|
|
PileValue::String(x) => x.len(),
|
|
_ => 0,
|
|
}
|
|
)),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
map.insert(k.to_string(), Box::pin(v.to_json(state)).await?);
|
|
}
|
|
|
|
Ok(serde_json::Value::Object(map))
|
|
}
|
|
}
|