From 16f1e38087f9773bc32a0a06c6c10063145ea35d Mon Sep 17 00:00:00 2001 From: rm-dr <96270320+rm-dr@users.noreply.github.com> Date: Thu, 5 Mar 2026 22:02:38 -0800 Subject: [PATCH] Add sidecar metadata files --- crates/pile-config/src/lib.rs | 21 ++++++- crates/pile-dataset/src/dataset.rs | 12 ++-- crates/pile-dataset/src/extract/flac.rs | 4 +- crates/pile-dataset/src/extract/mod.rs | 17 +++++- crates/pile-dataset/src/extract/sidecar.rs | 69 ++++++++++++++++++++++ crates/pile-dataset/src/item.rs | 3 + crates/pile-dataset/src/source/dir.rs | 7 ++- crates/pile/src/command/probe.rs | 1 + 8 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 crates/pile-dataset/src/extract/sidecar.rs diff --git a/crates/pile-config/src/lib.rs b/crates/pile-config/src/lib.rs index 819501b..461a271 100644 --- a/crates/pile-config/src/lib.rs +++ b/crates/pile-config/src/lib.rs @@ -13,6 +13,10 @@ pub mod objectpath; pub static INIT_DB_TOML: &str = include_str!("./config.toml"); +fn default_true() -> bool { + true +} + #[test] #[expect(clippy::expect_used)] fn init_db_toml_valid() { @@ -46,8 +50,21 @@ pub struct DatasetConfig { #[serde(tag = "type")] #[serde(rename_all = "lowercase")] pub enum Source { - /// A directory of FLAC files - Flac { path: OneOrMany }, + /// A directory files + Filesystem { + /// The directories to scan. + /// Must be relative. + #[serde(alias = "paths")] + path: OneOrMany, + + /// If true, all toml files are ignored. + /// Metadata can be added to any file using a {filename}.toml. + /// + /// If false, toml files are treated as regular files + /// and sidecar metadata is disabled. + #[serde(default = "default_true")] + sidecars: bool, + }, } // diff --git a/crates/pile-dataset/src/dataset.rs b/crates/pile-dataset/src/dataset.rs index ab37cf9..27f9d2c 100644 --- a/crates/pile-dataset/src/dataset.rs +++ b/crates/pile-dataset/src/dataset.rs @@ -103,7 +103,9 @@ impl Dataset { ) -> Option + 'static>> { let s = self.config.dataset.source.get(source)?; let s = match s { - Source::Flac { path } => DirDataSource::new(source, path.clone().to_vec()), + Source::Filesystem { path, sidecars } => { + DirDataSource::new(source, path.clone().to_vec(), *sidecars) + } }; s.get(key).ok().flatten() @@ -254,8 +256,8 @@ impl Dataset { for (label, source) in &self.config.dataset.source { match source { - Source::Flac { path } => { - let s = DirDataSource::new(label, path.clone().to_vec()); + Source::Filesystem { path, sidecars } => { + let s = DirDataSource::new(label, path.clone().to_vec(), *sidecars); match (ts, s.latest_change()?) { (_, None) => continue, (None, Some(new)) => ts = Some(new), @@ -315,8 +317,8 @@ fn start_read_task( info!("Loading source {name}"); match source { - Source::Flac { path: dir } => { - let source = DirDataSource::new(name, dir.clone().to_vec()); + Source::Filesystem { path, sidecars } => { + let source = DirDataSource::new(name, path.clone().to_vec(), *sidecars); for i in source.iter() { match i { Ok(x) => batch.push(x), diff --git a/crates/pile-dataset/src/extract/flac.rs b/crates/pile-dataset/src/extract/flac.rs index e36b934..65c7ee4 100644 --- a/crates/pile-dataset/src/extract/flac.rs +++ b/crates/pile-dataset/src/extract/flac.rs @@ -27,7 +27,9 @@ impl<'a> FlacExtractor<'a> { let mut output: HashMap> = HashMap::new(); for block in reader { - if let FlacBlock::VorbisComment(comment) = block.unwrap() { + if let FlacBlock::VorbisComment(comment) = + block.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))? + { for (k, v) in comment.comment.comments { match Label::new(k.to_string().to_lowercase()) { Some(k) => output.entry(k).or_default().push(PileValue::String(v)), diff --git a/crates/pile-dataset/src/extract/mod.rs b/crates/pile-dataset/src/extract/mod.rs index 55aa7d9..ee3aa9d 100644 --- a/crates/pile-dataset/src/extract/mod.rs +++ b/crates/pile-dataset/src/extract/mod.rs @@ -1,14 +1,17 @@ -mod flac; +use pile_config::Label; use std::{collections::HashMap, rc::Rc}; +mod flac; pub use flac::*; mod fs; pub use fs::*; +mod sidecar; +pub use sidecar::*; + mod map; pub use map::*; -use pile_config::Label; /// An attachment that extracts metadata from an [Item]. /// @@ -46,6 +49,10 @@ impl<'a> MetaExtractor<'a, crate::FileItem> { Label::new("fs").unwrap(), crate::PileValue::Extractor(Rc::new(FsExtractor::new(item))), ), + ( + Label::new("sidecar").unwrap(), + crate::PileValue::Extractor(Rc::new(SidecarExtractor::new(item))), + ), ]), }; @@ -63,6 +70,10 @@ impl Extractor for MetaExtractor<'_, crate::FileItem> { #[expect(clippy::unwrap_used)] fn fields(&self) -> Result, std::io::Error> { - return Ok(vec![Label::new("flac").unwrap(), Label::new("fs").unwrap()]); + return Ok(vec![ + Label::new("flac").unwrap(), + Label::new("fs").unwrap(), + Label::new("sidecar").unwrap(), + ]); } } diff --git a/crates/pile-dataset/src/extract/sidecar.rs b/crates/pile-dataset/src/extract/sidecar.rs new file mode 100644 index 0000000..b282877 --- /dev/null +++ b/crates/pile-dataset/src/extract/sidecar.rs @@ -0,0 +1,69 @@ +use pile_config::Label; +use std::{collections::HashMap, sync::OnceLock}; + +use crate::{FileItem, Item, PileValue, extract::Extractor}; + +fn toml_to_pile(value: toml::Value) -> PileValue<'static, I> { + match value { + toml::Value::String(s) => PileValue::String(s.into()), + toml::Value::Integer(i) => PileValue::String(i.to_string().into()), + toml::Value::Float(f) => PileValue::String(f.to_string().into()), + toml::Value::Boolean(b) => PileValue::String(b.to_string().into()), + toml::Value::Datetime(d) => PileValue::String(d.to_string().into()), + toml::Value::Array(a) => PileValue::Array(a.into_iter().map(toml_to_pile).collect()), + toml::Value::Table(_) => PileValue::Null, + } +} + +pub struct SidecarExtractor<'a> { + item: &'a FileItem, + output: OnceLock>>, +} + +impl<'a> SidecarExtractor<'a> { + pub fn new(item: &'a FileItem) -> Self { + Self { + item, + output: OnceLock::new(), + } + } + + fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { + if let Some(x) = self.output.get() { + return Ok(x); + } + + let sidecar_file = self.item.path.with_extension("toml"); + + if !(sidecar_file.is_file() && self.item.sidecar) { + return Ok(self.output.get_or_init(HashMap::new)); + } + + let sidecar = std::fs::read_to_string(&sidecar_file)?; + let sidecar: toml::Value = toml::from_str(&sidecar) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + + let output: HashMap> = match sidecar { + toml::Value::Table(t) => t + .into_iter() + .filter_map(|(k, v)| Label::new(&k).map(|label| (label, toml_to_pile(v)))) + .collect(), + _ => HashMap::new(), + }; + + return Ok(self.output.get_or_init(|| output)); + } +} + +impl Extractor for SidecarExtractor<'_> { + fn field<'a>( + &'a self, + name: &Label, + ) -> Result>, std::io::Error> { + Ok(self.get_inner()?.get(name)) + } + + fn fields(&self) -> Result, std::io::Error> { + Ok(self.get_inner()?.keys().cloned().collect()) + } +} diff --git a/crates/pile-dataset/src/item.rs b/crates/pile-dataset/src/item.rs index f976156..a274547 100644 --- a/crates/pile-dataset/src/item.rs +++ b/crates/pile-dataset/src/item.rs @@ -43,6 +43,9 @@ pub struct FileItem { /// Must be relative to source root dir. pub path: PathBuf, pub source_name: Label, + + /// If true, look for a sidecar file + pub sidecar: bool, } impl Item for FileItem { diff --git a/crates/pile-dataset/src/source/dir.rs b/crates/pile-dataset/src/source/dir.rs index 7c2f828..15b582f 100644 --- a/crates/pile-dataset/src/source/dir.rs +++ b/crates/pile-dataset/src/source/dir.rs @@ -10,13 +10,16 @@ use crate::{DataSource, Item, item::FileItem, path_ts_latest}; pub struct DirDataSource { pub name: Label, pub dirs: Vec, + + pub sidecars: bool, } impl DirDataSource { - pub fn new(name: &Label, dirs: Vec) -> Self { + pub fn new(name: &Label, dirs: Vec, sidecars: bool) -> Self { Self { name: name.clone(), dirs, + sidecars, } } } @@ -36,6 +39,7 @@ impl DataSource for DirDataSource { return Ok(Some(Box::new(FileItem { source_name: self.name.clone(), path: key.to_owned(), + sidecar: self.sidecars, }))); } @@ -64,6 +68,7 @@ impl DataSource for DirDataSource { Some("flac") => Box::new(FileItem { source_name: self.name.clone(), path: path.clone(), + sidecar: self.sidecars, }), Some(_) => return None, }; diff --git a/crates/pile/src/command/probe.rs b/crates/pile/src/command/probe.rs index 422e833..829c8ba 100644 --- a/crates/pile/src/command/probe.rs +++ b/crates/pile/src/command/probe.rs @@ -24,6 +24,7 @@ impl CliCmd for ProbeCommand { let item = FileItem { path: self.file.clone(), source_name: Label::new("probe-source").unwrap(), + sidecar: true, }; let value = PileValue::Extractor(Rc::new(MetaExtractor::new(&item)));