Add id3 extractor
This commit is contained in:
@@ -91,6 +91,7 @@ toml_edit = "0.25.4"
|
||||
sha2 = "0.11.0-rc.5"
|
||||
blake3 = "1.8.3"
|
||||
pdf = "0.10.0"
|
||||
id3 = "1.16.4"
|
||||
|
||||
# Misc helpers
|
||||
thiserror = "2.0.18"
|
||||
|
||||
@@ -25,3 +25,4 @@ smartstring = { workspace = true }
|
||||
blake3 = { workspace = true }
|
||||
toml_edit = { workspace = true }
|
||||
pdf = { workspace = true }
|
||||
id3 = { workspace = true }
|
||||
|
||||
115
crates/pile-dataset/src/extract/id3.rs
Normal file
115
crates/pile-dataset/src/extract/id3.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
use id3::Tag;
|
||||
use pile_config::Label;
|
||||
use std::{borrow::Cow, collections::HashMap, sync::OnceLock};
|
||||
|
||||
use crate::{FileItem, PileValue, extract::Extractor};
|
||||
|
||||
pub struct Id3Extractor<'a> {
|
||||
item: &'a FileItem,
|
||||
output: OnceLock<HashMap<Label, PileValue<'a, FileItem>>>,
|
||||
}
|
||||
|
||||
impl<'a> Id3Extractor<'a> {
|
||||
pub fn new(item: &'a FileItem) -> Self {
|
||||
Self {
|
||||
item,
|
||||
output: OnceLock::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a, FileItem>>, std::io::Error> {
|
||||
if let Some(x) = self.output.get() {
|
||||
return Ok(x);
|
||||
}
|
||||
|
||||
let ext = self.item.path.extension().and_then(|x| x.to_str());
|
||||
if !matches!(ext, Some("mp3") | Some("aiff") | Some("aif") | Some("wav")) {
|
||||
return Ok(self.output.get_or_init(HashMap::new));
|
||||
}
|
||||
|
||||
let tag = match Tag::read_from_path(&self.item.path) {
|
||||
Ok(tag) => tag,
|
||||
Err(id3::Error {
|
||||
kind: id3::ErrorKind::NoTag,
|
||||
..
|
||||
}) => return Ok(self.output.get_or_init(HashMap::new)),
|
||||
Err(id3::Error {
|
||||
kind: id3::ErrorKind::Io(e),
|
||||
..
|
||||
}) => return Err(e),
|
||||
Err(e) => return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e)),
|
||||
};
|
||||
|
||||
let mut output: HashMap<Label, Vec<PileValue<'a, FileItem>>> = HashMap::new();
|
||||
for frame in tag.frames() {
|
||||
if let Some(text) = frame.content().text() {
|
||||
let name = frame_id_to_field(frame.id());
|
||||
if let Some(key) = Label::new(name) {
|
||||
output
|
||||
.entry(key)
|
||||
.or_default()
|
||||
.push(PileValue::String(text.into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let output = output
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, PileValue::Array(v)))
|
||||
.collect();
|
||||
|
||||
return Ok(self.output.get_or_init(|| output));
|
||||
}
|
||||
}
|
||||
|
||||
/// Map an ID3 frame ID to the equivalent Vorbis Comment field name.
|
||||
/// Falls back to the lowercased frame ID if no mapping exists.
|
||||
fn frame_id_to_field(id: &str) -> Cow<'static, str> {
|
||||
match id {
|
||||
"TIT2" => Cow::Borrowed("title"),
|
||||
"TIT1" => Cow::Borrowed("grouping"),
|
||||
"TIT3" => Cow::Borrowed("subtitle"),
|
||||
"TPE1" => Cow::Borrowed("artist"),
|
||||
"TPE2" => Cow::Borrowed("albumartist"),
|
||||
"TPE3" => Cow::Borrowed("conductor"),
|
||||
"TOPE" => Cow::Borrowed("originalartist"),
|
||||
"TALB" => Cow::Borrowed("album"),
|
||||
"TOAL" => Cow::Borrowed("originalalbum"),
|
||||
"TRCK" => Cow::Borrowed("tracknumber"),
|
||||
"TPOS" => Cow::Borrowed("discnumber"),
|
||||
"TSST" => Cow::Borrowed("discsubtitle"),
|
||||
"TDRC" | "TYER" => Cow::Borrowed("date"),
|
||||
"TDOR" | "TORY" => Cow::Borrowed("originaldate"),
|
||||
"TCON" => Cow::Borrowed("genre"),
|
||||
"TCOM" => Cow::Borrowed("composer"),
|
||||
"TEXT" => Cow::Borrowed("lyricist"),
|
||||
"TPUB" => Cow::Borrowed("label"),
|
||||
"TSRC" => Cow::Borrowed("isrc"),
|
||||
"TBPM" => Cow::Borrowed("bpm"),
|
||||
"TLAN" => Cow::Borrowed("language"),
|
||||
"TMED" => Cow::Borrowed("media"),
|
||||
"TMOO" => Cow::Borrowed("mood"),
|
||||
"TCOP" => Cow::Borrowed("copyright"),
|
||||
"TENC" => Cow::Borrowed("encodedby"),
|
||||
"TSSE" => Cow::Borrowed("encodersettings"),
|
||||
"TSOA" => Cow::Borrowed("albumsort"),
|
||||
"TSOP" => Cow::Borrowed("artistsort"),
|
||||
"TSOT" => Cow::Borrowed("titlesort"),
|
||||
"MVNM" => Cow::Borrowed("movement"),
|
||||
"MVIN" => Cow::Borrowed("movementnumber"),
|
||||
_ => Cow::Owned(id.to_lowercase()),
|
||||
}
|
||||
}
|
||||
|
||||
impl Extractor<FileItem> for Id3Extractor<'_> {
|
||||
fn field<'a>(
|
||||
&'a self,
|
||||
name: &Label,
|
||||
) -> Result<Option<&'a PileValue<'a, FileItem>>, std::io::Error> {
|
||||
Ok(self.get_inner()?.get(name))
|
||||
}
|
||||
|
||||
fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||
Ok(self.get_inner()?.keys().cloned().collect())
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,9 @@ use std::{collections::HashMap, rc::Rc};
|
||||
mod flac;
|
||||
pub use flac::*;
|
||||
|
||||
mod id3;
|
||||
pub use id3::*;
|
||||
|
||||
mod fs;
|
||||
pub use fs::*;
|
||||
|
||||
@@ -48,6 +51,10 @@ impl<'a> MetaExtractor<'a, crate::FileItem> {
|
||||
Label::new("flac").unwrap(),
|
||||
crate::PileValue::Extractor(Rc::new(FlacExtractor::new(item))),
|
||||
),
|
||||
(
|
||||
Label::new("id3").unwrap(),
|
||||
crate::PileValue::Extractor(Rc::new(Id3Extractor::new(item))),
|
||||
),
|
||||
(
|
||||
Label::new("fs").unwrap(),
|
||||
crate::PileValue::Extractor(Rc::new(FsExtractor::new(item))),
|
||||
@@ -79,9 +86,10 @@ impl Extractor<crate::FileItem> for MetaExtractor<'_, crate::FileItem> {
|
||||
fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||
return Ok(vec![
|
||||
Label::new("flac").unwrap(),
|
||||
Label::new("id3").unwrap(),
|
||||
Label::new("fs").unwrap(),
|
||||
Label::new("pdf").unwrap(),
|
||||
Label::new("sidecar").unwrap(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -23,6 +23,10 @@ impl<'a> PdfMetaExtractor<'a> {
|
||||
return Ok(x);
|
||||
}
|
||||
|
||||
if self.item.path.extension().map(|x| x.to_str()).flatten() != Some("pdf") {
|
||||
return Ok(self.output.get_or_init(|| HashMap::new()));
|
||||
}
|
||||
|
||||
let file = FileOptions::cached()
|
||||
.open(&self.item.path)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||
|
||||
@@ -23,6 +23,10 @@ impl<'a> PdfTextExtractor<'a> {
|
||||
return Ok(x);
|
||||
}
|
||||
|
||||
if self.item.path.extension().map(|x| x.to_str()).flatten() != Some("pdf") {
|
||||
return Ok(self.output.get_or_init(|| HashMap::new()));
|
||||
}
|
||||
|
||||
let file = FileOptions::cached()
|
||||
.open(&self.item.path)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||
|
||||
Reference in New Issue
Block a user