Add id3 extractor

This commit is contained in:
2026-03-06 17:03:35 -08:00
parent 32c611186f
commit 77b3125af4
6 changed files with 134 additions and 1 deletions

View File

@@ -91,6 +91,7 @@ toml_edit = "0.25.4"
sha2 = "0.11.0-rc.5" sha2 = "0.11.0-rc.5"
blake3 = "1.8.3" blake3 = "1.8.3"
pdf = "0.10.0" pdf = "0.10.0"
id3 = "1.16.4"
# Misc helpers # Misc helpers
thiserror = "2.0.18" thiserror = "2.0.18"

View File

@@ -25,3 +25,4 @@ smartstring = { workspace = true }
blake3 = { workspace = true } blake3 = { workspace = true }
toml_edit = { workspace = true } toml_edit = { workspace = true }
pdf = { workspace = true } pdf = { workspace = true }
id3 = { workspace = true }

View File

@@ -0,0 +1,115 @@
use id3::Tag;
use pile_config::Label;
use std::{borrow::Cow, collections::HashMap, sync::OnceLock};
use crate::{FileItem, PileValue, extract::Extractor};
pub struct Id3Extractor<'a> {
item: &'a FileItem,
output: OnceLock<HashMap<Label, PileValue<'a, FileItem>>>,
}
impl<'a> Id3Extractor<'a> {
pub fn new(item: &'a FileItem) -> Self {
Self {
item,
output: OnceLock::new(),
}
}
fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a, FileItem>>, std::io::Error> {
if let Some(x) = self.output.get() {
return Ok(x);
}
let ext = self.item.path.extension().and_then(|x| x.to_str());
if !matches!(ext, Some("mp3") | Some("aiff") | Some("aif") | Some("wav")) {
return Ok(self.output.get_or_init(HashMap::new));
}
let tag = match Tag::read_from_path(&self.item.path) {
Ok(tag) => tag,
Err(id3::Error {
kind: id3::ErrorKind::NoTag,
..
}) => return Ok(self.output.get_or_init(HashMap::new)),
Err(id3::Error {
kind: id3::ErrorKind::Io(e),
..
}) => return Err(e),
Err(e) => return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e)),
};
let mut output: HashMap<Label, Vec<PileValue<'a, FileItem>>> = HashMap::new();
for frame in tag.frames() {
if let Some(text) = frame.content().text() {
let name = frame_id_to_field(frame.id());
if let Some(key) = Label::new(name) {
output
.entry(key)
.or_default()
.push(PileValue::String(text.into()));
}
}
}
let output = output
.into_iter()
.map(|(k, v)| (k, PileValue::Array(v)))
.collect();
return Ok(self.output.get_or_init(|| output));
}
}
/// Map an ID3 frame ID to the equivalent Vorbis Comment field name.
/// Falls back to the lowercased frame ID if no mapping exists.
fn frame_id_to_field(id: &str) -> Cow<'static, str> {
match id {
"TIT2" => Cow::Borrowed("title"),
"TIT1" => Cow::Borrowed("grouping"),
"TIT3" => Cow::Borrowed("subtitle"),
"TPE1" => Cow::Borrowed("artist"),
"TPE2" => Cow::Borrowed("albumartist"),
"TPE3" => Cow::Borrowed("conductor"),
"TOPE" => Cow::Borrowed("originalartist"),
"TALB" => Cow::Borrowed("album"),
"TOAL" => Cow::Borrowed("originalalbum"),
"TRCK" => Cow::Borrowed("tracknumber"),
"TPOS" => Cow::Borrowed("discnumber"),
"TSST" => Cow::Borrowed("discsubtitle"),
"TDRC" | "TYER" => Cow::Borrowed("date"),
"TDOR" | "TORY" => Cow::Borrowed("originaldate"),
"TCON" => Cow::Borrowed("genre"),
"TCOM" => Cow::Borrowed("composer"),
"TEXT" => Cow::Borrowed("lyricist"),
"TPUB" => Cow::Borrowed("label"),
"TSRC" => Cow::Borrowed("isrc"),
"TBPM" => Cow::Borrowed("bpm"),
"TLAN" => Cow::Borrowed("language"),
"TMED" => Cow::Borrowed("media"),
"TMOO" => Cow::Borrowed("mood"),
"TCOP" => Cow::Borrowed("copyright"),
"TENC" => Cow::Borrowed("encodedby"),
"TSSE" => Cow::Borrowed("encodersettings"),
"TSOA" => Cow::Borrowed("albumsort"),
"TSOP" => Cow::Borrowed("artistsort"),
"TSOT" => Cow::Borrowed("titlesort"),
"MVNM" => Cow::Borrowed("movement"),
"MVIN" => Cow::Borrowed("movementnumber"),
_ => Cow::Owned(id.to_lowercase()),
}
}
impl Extractor<FileItem> for Id3Extractor<'_> {
fn field<'a>(
&'a self,
name: &Label,
) -> Result<Option<&'a PileValue<'a, FileItem>>, std::io::Error> {
Ok(self.get_inner()?.get(name))
}
fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
Ok(self.get_inner()?.keys().cloned().collect())
}
}

View File

@@ -4,6 +4,9 @@ use std::{collections::HashMap, rc::Rc};
mod flac; mod flac;
pub use flac::*; pub use flac::*;
mod id3;
pub use id3::*;
mod fs; mod fs;
pub use fs::*; pub use fs::*;
@@ -48,6 +51,10 @@ impl<'a> MetaExtractor<'a, crate::FileItem> {
Label::new("flac").unwrap(), Label::new("flac").unwrap(),
crate::PileValue::Extractor(Rc::new(FlacExtractor::new(item))), crate::PileValue::Extractor(Rc::new(FlacExtractor::new(item))),
), ),
(
Label::new("id3").unwrap(),
crate::PileValue::Extractor(Rc::new(Id3Extractor::new(item))),
),
( (
Label::new("fs").unwrap(), Label::new("fs").unwrap(),
crate::PileValue::Extractor(Rc::new(FsExtractor::new(item))), crate::PileValue::Extractor(Rc::new(FsExtractor::new(item))),
@@ -79,6 +86,7 @@ impl Extractor<crate::FileItem> for MetaExtractor<'_, crate::FileItem> {
fn fields(&self) -> Result<Vec<Label>, std::io::Error> { fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
return Ok(vec![ return Ok(vec![
Label::new("flac").unwrap(), Label::new("flac").unwrap(),
Label::new("id3").unwrap(),
Label::new("fs").unwrap(), Label::new("fs").unwrap(),
Label::new("pdf").unwrap(), Label::new("pdf").unwrap(),
Label::new("sidecar").unwrap(), Label::new("sidecar").unwrap(),

View File

@@ -23,6 +23,10 @@ impl<'a> PdfMetaExtractor<'a> {
return Ok(x); return Ok(x);
} }
if self.item.path.extension().map(|x| x.to_str()).flatten() != Some("pdf") {
return Ok(self.output.get_or_init(|| HashMap::new()));
}
let file = FileOptions::cached() let file = FileOptions::cached()
.open(&self.item.path) .open(&self.item.path)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?; .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;

View File

@@ -23,6 +23,10 @@ impl<'a> PdfTextExtractor<'a> {
return Ok(x); return Ok(x);
} }
if self.item.path.extension().map(|x| x.to_str()).flatten() != Some("pdf") {
return Ok(self.output.get_or_init(|| HashMap::new()));
}
let file = FileOptions::cached() let file = FileOptions::cached()
.open(&self.item.path) .open(&self.item.path)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?; .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;