Add exif extractor
This commit is contained in:
74
crates/pile-dataset/src/extract/exif.rs
Normal file
74
crates/pile-dataset/src/extract/exif.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
use pile_config::Label;
|
||||
use std::{collections::HashMap, io::Cursor, sync::OnceLock};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::{Item, PileValue, extract::Extractor};
|
||||
|
||||
pub struct ExifExtractor<'a> {
|
||||
item: &'a Item,
|
||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
||||
}
|
||||
|
||||
impl<'a> ExifExtractor<'a> {
|
||||
pub fn new(item: &'a Item) -> Self {
|
||||
Self {
|
||||
item,
|
||||
output: OnceLock::new(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
||||
if let Some(x) = self.output.get() {
|
||||
return Ok(x);
|
||||
}
|
||||
|
||||
let bytes = self.item.read().await?.read_to_end().await?;
|
||||
let mut cursor = Cursor::new(bytes);
|
||||
|
||||
let exif = match exif::Reader::new().read_from_container(&mut cursor) {
|
||||
Ok(x) => x,
|
||||
Err(error) => {
|
||||
debug!(message = "Could not process exif", ?error, key = ?self.item.key());
|
||||
return Ok(self.output.get_or_init(HashMap::new));
|
||||
}
|
||||
};
|
||||
|
||||
let mut output: HashMap<Label, PileValue<'a>> = HashMap::new();
|
||||
|
||||
for field in exif.fields() {
|
||||
let Some(label) = tag_to_label(&field.tag) else {
|
||||
continue;
|
||||
};
|
||||
// First occurrence wins (PRIMARY IFD comes before THUMBNAIL)
|
||||
output.entry(label).or_insert_with(|| {
|
||||
PileValue::String(field.display_value().with_unit(&exif).to_string().into())
|
||||
});
|
||||
}
|
||||
|
||||
return Ok(self.output.get_or_init(|| output));
|
||||
}
|
||||
}
|
||||
|
||||
fn tag_to_label(tag: &exif::Tag) -> Option<Label> {
|
||||
let sanitized: String = tag
|
||||
.to_string()
|
||||
.chars()
|
||||
.map(|c| if c == ' ' { '_' } else { c })
|
||||
.filter(|c| Label::VALID_CHARS.contains(*c))
|
||||
.collect();
|
||||
Label::new(sanitized)
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Extractor for ExifExtractor<'_> {
|
||||
async fn field<'a>(
|
||||
&'a self,
|
||||
name: &Label,
|
||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
||||
Ok(self.get_inner().await?.get(name))
|
||||
}
|
||||
|
||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||
Ok(self.get_inner().await?.keys().cloned().collect())
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,9 @@ pub use fs::*;
|
||||
mod epub;
|
||||
pub use epub::*;
|
||||
|
||||
mod exif;
|
||||
pub use exif::*;
|
||||
|
||||
mod pdf;
|
||||
pub use pdf::*;
|
||||
|
||||
@@ -76,6 +79,10 @@ impl<'a> MetaExtractor<'a> {
|
||||
Label::new("epub").unwrap(),
|
||||
crate::PileValue::Extractor(Arc::new(EpubExtractor::new(item))),
|
||||
),
|
||||
(
|
||||
Label::new("exif").unwrap(),
|
||||
crate::PileValue::Extractor(Arc::new(ExifExtractor::new(item))),
|
||||
),
|
||||
(
|
||||
Label::new("pdf").unwrap(),
|
||||
crate::PileValue::Extractor(Arc::new(PdfExtractor::new(item))),
|
||||
@@ -111,6 +118,7 @@ impl Extractor for MetaExtractor<'_> {
|
||||
Label::new("id3").unwrap(),
|
||||
Label::new("fs").unwrap(),
|
||||
Label::new("epub").unwrap(),
|
||||
Label::new("exif").unwrap(),
|
||||
Label::new("pdf").unwrap(),
|
||||
Label::new("sidecar").unwrap(),
|
||||
]);
|
||||
|
||||
Reference in New Issue
Block a user