Add ListExtractor
This commit is contained in:
@@ -4,7 +4,8 @@ use pile_config::Label;
|
||||
use std::{collections::HashMap, io::BufReader, sync::OnceLock};
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{Item, PileValue, SyncReadBridge, extract::Extractor};
|
||||
use crate::extract::ObjectExtractor;
|
||||
use crate::{Item, PileValue, SyncReadBridge};
|
||||
|
||||
pub struct PdfMetaExtractor<'a> {
|
||||
item: &'a Item,
|
||||
@@ -40,6 +41,8 @@ impl<'a> PdfMetaExtractor<'a> {
|
||||
}
|
||||
};
|
||||
|
||||
let page_count = file.num_pages();
|
||||
|
||||
let mut meta: Vec<(&'static str, Option<String>)> = Vec::new();
|
||||
|
||||
if let Some(info) = &file.trailer.info_dict {
|
||||
@@ -64,12 +67,12 @@ impl<'a> PdfMetaExtractor<'a> {
|
||||
meta.push(("mod_date", info.mod_date.as_ref().map(format_date)));
|
||||
}
|
||||
|
||||
Ok::<_, std::io::Error>(meta)
|
||||
Ok::<_, std::io::Error>((page_count, meta))
|
||||
})
|
||||
.await
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
let raw_meta = match raw_meta {
|
||||
let (page_count, raw_meta) = match raw_meta {
|
||||
Ok(x) => x,
|
||||
Err(error) => {
|
||||
trace!(message = "Could not process pdf", ?error, key = ?self.item.key());
|
||||
@@ -79,6 +82,12 @@ impl<'a> PdfMetaExtractor<'a> {
|
||||
|
||||
let mut output: HashMap<Label, PileValue<'a>> = HashMap::new();
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
output.insert(
|
||||
Label::new("pages").unwrap(),
|
||||
PileValue::U64(page_count as u64),
|
||||
);
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
for (key, val) in raw_meta {
|
||||
let label = Label::new(key).unwrap();
|
||||
@@ -106,7 +115,7 @@ fn format_date(d: &Date) -> String {
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Extractor for PdfMetaExtractor<'_> {
|
||||
impl ObjectExtractor for PdfMetaExtractor<'_> {
|
||||
async fn field<'a>(
|
||||
&'a self,
|
||||
name: &Label,
|
||||
|
||||
Reference in New Issue
Block a user