Compare commits

...

2 Commits

Author SHA1 Message Date
6f267880c8 Many field paths
Some checks failed
CI / Typos (push) Successful in 22s
CI / Build and test (push) Failing after 2m40s
CI / Clippy (push) Successful in 3m28s
CI / Build and test (all features) (push) Failing after 11m10s
2026-03-23 22:41:07 -07:00
d95ebeaba0 Extract epub covers 2026-03-23 22:40:32 -07:00
3 changed files with 146 additions and 21 deletions

View File

@@ -1,6 +1,6 @@
use axum::{ use axum::{
Json, Json,
extract::{Query, State}, extract::{Query, RawQuery, State},
http::{StatusCode, header}, http::{StatusCode, header},
response::{IntoResponse, Response}, response::{IntoResponse, Response},
}; };
@@ -17,19 +17,19 @@ use crate::Datasets;
pub struct FieldQuery { pub struct FieldQuery {
source: String, source: String,
key: String, key: String,
path: String,
#[serde(default)] #[serde(default)]
download: bool, download: bool,
} }
/// Extract a specific field from an item's metadata /// Extract a specific field from an item's metadata.
/// Multiple `path` parameters may be provided; the first non-null result is returned.
#[utoipa::path( #[utoipa::path(
get, get,
path = "/field", path = "/field",
params( params(
("source" = String, Query, description = "Source label"), ("source" = String, Query, description = "Source label"),
("key" = String, Query, description = "Item key"), ("key" = String, Query, description = "Item key"),
("path" = String, Query, description = "Object path (e.g. $.flac.title)"), ("path" = String, Query, description = "Object path (e.g. $.flac.title); repeat for fallbacks"),
), ),
responses( responses(
(status = 200, description = "Field value as JSON"), (status = 200, description = "Field value as JSON"),
@@ -41,43 +41,73 @@ pub struct FieldQuery {
pub async fn get_field( pub async fn get_field(
State(state): State<Arc<Datasets>>, State(state): State<Arc<Datasets>>,
Query(params): Query<FieldQuery>, Query(params): Query<FieldQuery>,
RawQuery(raw_query): RawQuery,
) -> Response { ) -> Response {
let start = Instant::now(); let start = Instant::now();
debug!(
message = "Serving /field",
source = params.source,
key = params.key,
path = params.path,
);
let label = match Label::try_from(params.source.clone()) { let label = match Label::try_from(params.source.clone()) {
Ok(l) => l, Ok(l) => l,
Err(e) => return (StatusCode::BAD_REQUEST, format!("{e:?}")).into_response(), Err(e) => return (StatusCode::BAD_REQUEST, format!("{e:?}")).into_response(),
}; };
let path: ObjectPath = match params.path.parse() { // Collect all `path` query params in order (supports repeated ?path=...&path=...)
Ok(p) => p, let raw = raw_query.as_deref().unwrap_or("");
Err(e) => return (StatusCode::BAD_REQUEST, format!("{e:?}")).into_response(), let paths: Vec<ObjectPath> = {
let mut result = Vec::new();
for part in raw.split('&') {
if let Some((k, v)) = part.split_once('=')
&& k == "path"
{
match v.parse::<ObjectPath>() {
Ok(p) => result.push(p),
Err(e) => {
return (StatusCode::BAD_REQUEST, format!("{e:?}")).into_response();
}
}
}
}
result
}; };
if paths.is_empty() {
return (StatusCode::BAD_REQUEST, "Missing `path` query parameter").into_response();
}
debug!(
message = "Serving /field",
source = params.source,
key = params.key,
paths = paths.len(),
);
let Some(item) = state.get(&label, &params.key).await else { let Some(item) = state.get(&label, &params.key).await else {
return StatusCode::NOT_FOUND.into_response(); return StatusCode::NOT_FOUND.into_response();
}; };
let state = ExtractState { ignore_mime: false }; let extract_state = ExtractState { ignore_mime: false };
let item = PileValue::Item(item); let item = PileValue::Item(item);
let value = match item.query(&state, &path).await {
Ok(Some(v)) => v, // Try each path in order, returning the first non-null result
Ok(None) => return StatusCode::NOT_FOUND.into_response(), let mut value = None;
for path in &paths {
match item.query(&extract_state, path).await {
Ok(Some(PileValue::Null)) | Ok(None) => continue,
Ok(Some(v)) => {
value = Some(v);
break;
}
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("{e:?}")).into_response(), Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("{e:?}")).into_response(),
}
}
let Some(value) = value else {
return StatusCode::NOT_FOUND.into_response();
}; };
debug!( debug!(
message = "Served /field", message = "Served /field",
source = params.source, source = params.source,
key = params.key, key = params.key,
path = params.path,
time_ms = start.elapsed().as_millis() time_ms = start.elapsed().as_millis()
); );
@@ -106,7 +136,7 @@ pub async fn get_field(
bytes.as_ref().clone(), bytes.as_ref().clone(),
) )
.into_response(), .into_response(),
_ => match value.to_json(&state).await { _ => match value.to_json(&extract_state).await {
Ok(json) => ( Ok(json) => (
StatusCode::OK, StatusCode::OK,
[(header::CONTENT_DISPOSITION, disposition.to_owned())], [(header::CONTENT_DISPOSITION, disposition.to_owned())],

View File

@@ -0,0 +1,76 @@
use epub::doc::EpubDoc;
use mime::Mime;
use pile_io::SyncReadBridge;
use std::sync::{Arc, OnceLock};
use tracing::trace;
use crate::{
extract::traits::ExtractState,
value::{Item, PileValue},
};
pub struct EpubCoverExtractor {
item: Item,
output: OnceLock<Option<(Mime, Vec<u8>)>>,
}
impl EpubCoverExtractor {
pub fn new(item: &Item) -> Self {
Self {
item: item.clone(),
output: OnceLock::new(),
}
}
async fn get_inner(&self) -> Result<Option<&(Mime, Vec<u8>)>, std::io::Error> {
if let Some(x) = self.output.get() {
return Ok(x.as_ref());
}
let reader = SyncReadBridge::new_current(self.item.read().await?);
let result = tokio::task::spawn_blocking(move || {
let mut doc = EpubDoc::from_reader(reader)?;
let cover_id = match doc.get_cover_id() {
Ok(id) => id,
Err(_) => return Ok::<_, anyhow::Error>(None),
};
let mime: Mime = doc
.resources
.get(&cover_id)
.and_then(|(_, mime_str)| mime_str.parse().ok())
.unwrap_or(mime::IMAGE_JPEG);
let bytes = doc.get_cover()?;
Ok(Some((mime, bytes)))
})
.await?;
let result = match result {
Ok(x) => x,
Err(error) => match error.downcast::<std::io::Error>() {
Ok(x) => return Err(x),
Err(error) => {
trace!(message = "Could not extract epub cover", ?error, key = ?self.item.key());
None
}
},
};
Ok(self.output.get_or_init(|| result).as_ref())
}
pub async fn get(&self, state: &ExtractState) -> Result<Option<PileValue>, std::io::Error> {
if !state.ignore_mime && self.item.mime().essence_str() != "application/epub+zip" {
return Ok(None);
}
Ok(self
.get_inner()
.await?
.map(|(mime, bytes)| PileValue::Blob {
mime: mime.clone(),
bytes: Arc::new(bytes.clone()),
}))
}
}

View File

@@ -1,6 +1,9 @@
use pile_config::Label; use pile_config::Label;
use std::sync::Arc; use std::sync::Arc;
mod epub_cover;
pub use epub_cover::*;
mod epub_meta; mod epub_meta;
pub use epub_meta::*; pub use epub_meta::*;
@@ -15,6 +18,7 @@ use crate::{
pub struct EpubExtractor { pub struct EpubExtractor {
text: Arc<EpubTextExtractor>, text: Arc<EpubTextExtractor>,
meta: Arc<EpubMetaExtractor>, meta: Arc<EpubMetaExtractor>,
cover: Arc<EpubCoverExtractor>,
} }
impl EpubExtractor { impl EpubExtractor {
@@ -22,6 +26,7 @@ impl EpubExtractor {
Self { Self {
text: Arc::new(EpubTextExtractor::new(item)), text: Arc::new(EpubTextExtractor::new(item)),
meta: Arc::new(EpubMetaExtractor::new(item)), meta: Arc::new(EpubMetaExtractor::new(item)),
cover: Arc::new(EpubCoverExtractor::new(item)),
} }
} }
} }
@@ -43,6 +48,7 @@ impl ObjectExtractor for EpubExtractor {
)), )),
("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))), ("meta", None) => Ok(Some(PileValue::ObjectExtractor(self.meta.clone()))),
("cover", None) => self.cover.get(state).await,
_ => Ok(None), _ => Ok(None),
} }
} }
@@ -52,6 +58,7 @@ impl ObjectExtractor for EpubExtractor {
Ok(vec![ Ok(vec![
Label::new("text").unwrap(), Label::new("text").unwrap(),
Label::new("meta").unwrap(), Label::new("meta").unwrap(),
Label::new("cover").unwrap(),
]) ])
} }
@@ -78,6 +85,18 @@ impl ObjectExtractor for EpubExtractor {
continue; continue;
} }
if k.as_str() == "cover" {
let summary = match &v {
PileValue::Blob { mime, bytes } => {
format!("<Blob ({}, {} bytes)>", mime, bytes.len())
}
PileValue::Null => "<null>".to_owned(),
_ => "<cover>".to_owned(),
};
map.insert(k.to_string(), serde_json::Value::String(summary));
continue;
}
map.insert(k.to_string(), Box::pin(v.to_json(state)).await?); map.insert(k.to_string(), Box::pin(v.to_json(state)).await?);
} }