Write sidecar fields

This commit is contained in:
2026-03-06 15:16:35 -08:00
parent 22724eee3f
commit d51b8b51bf
11 changed files with 311 additions and 49 deletions

View File

@@ -22,3 +22,5 @@ toml = { workspace = true }
thiserror = { workspace = true }
rayon = { workspace = true }
smartstring = { workspace = true }
blake3 = { workspace = true }
toml_edit = { workspace = true }

View File

@@ -21,7 +21,7 @@ use thiserror::Error;
use tracing::{debug, info, trace, warn};
use crate::{
DataSource, Item,
DataSource, FileItem,
index::{DbFtsIndex, FtsLookupResult},
path_ts_earliest,
source::DirDataSource,
@@ -96,11 +96,7 @@ impl Dataset {
// MARK: get
//
pub fn get(
&self,
source: &Label,
key: &PathBuf,
) -> Option<Box<dyn Item<Key = PathBuf> + 'static>> {
pub fn get(&self, source: &Label, key: &PathBuf) -> Option<FileItem> {
let s = self.config.dataset.source.get(source)?;
let s = match s {
Source::Filesystem { path, sidecars } => {
@@ -115,7 +111,7 @@ impl Dataset {
// MARK: fts
//
/// Refresh this dataset's fts index
/// Refresh this dataset's fts index.
pub fn fts_refresh(
&self,
threads: usize,
@@ -163,7 +159,7 @@ impl Dataset {
.install(|| {
batch
.into_par_iter()
.filter_map(|(key, item)| match db_index.entry_to_document(&*item) {
.filter_map(|(key, item)| match db_index.entry_to_document(&item) {
Ok(Some(doc)) => Some((key, doc)),
Ok(None) => {
warn!("Skipping {key:?}, document is empty");
@@ -306,7 +302,7 @@ fn start_read_task(
batch_size: usize,
) -> (
JoinHandle<()>,
Receiver<Result<Vec<(PathBuf, Box<dyn Item<Key = PathBuf>>)>, DatasetError>>,
Receiver<Result<Vec<(PathBuf, FileItem)>, DatasetError>>,
) {
let config = config.clone();
let (read_tx, read_rx) = std::sync::mpsc::sync_channel(2);

View File

@@ -39,9 +39,11 @@ impl<'a> SidecarExtractor<'a> {
return Ok(self.output.get_or_init(HashMap::new));
}
let sidecar = std::fs::read_to_string(&sidecar_file)?;
let sidecar: toml::Value = toml::from_str(&sidecar)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
let sidecar = std::fs::read(&sidecar_file)?;
let sidecar: toml::Value = match toml::from_slice(&sidecar) {
Ok(x) => x,
Err(_) => return Ok(self.output.get_or_init(HashMap::new)),
};
let output: HashMap<Label, PileValue<'_, FileItem>> = match sidecar {
toml::Value::Table(t) => t

View File

@@ -63,9 +63,9 @@ impl DbFtsIndex {
//
/// Turn an entry into a tantivy document
pub fn entry_to_document<K: Key>(
pub fn entry_to_document<K: Key, I: Item<Key = K>>(
&self,
item: &dyn Item<Key = K>,
item: &I,
) -> Result<Option<TantivyDocument>, TantivyError> {
let mut doc = TantivyDocument::default();

View File

@@ -1,5 +1,10 @@
use pile_config::Label;
use std::{fmt::Debug, path::PathBuf};
use std::{fmt::Debug, path::PathBuf, rc::Rc};
use crate::{
PileValue,
extract::{Extractor, SidecarExtractor},
};
//
// MARK: key
@@ -28,12 +33,27 @@ impl Key for PathBuf {
//
/// A pointer to raw data
pub trait Item: Debug + Send + Sync + 'static {
pub trait Item: Debug + Send + Sync + 'static + Sized {
type Key: Key;
fn source_name(&self) -> &str;
fn key(&self) -> &Self::Key;
/// Get this item's sidecar metadata
fn sidecar(&self) -> Result<Option<Rc<dyn Extractor<Self> + '_>>, std::io::Error>;
/// Set this file's sidecar metadata,
/// overwriting any existing file.
fn write_sidecar(
&self,
path: Vec<Label>,
value: PileValue<'_, Self>,
) -> Result<(), std::io::Error>;
fn hash(&self) -> Result<blake3::Hash, std::io::Error>;
/// Item conversion, downcast to specific type.
/// Returns `None` if this is not a [FileItem]
fn as_file(&self) -> Option<&FileItem>;
}
@@ -62,4 +82,97 @@ impl Item for FileItem {
fn as_file(&self) -> Option<&FileItem> {
Some(self)
}
fn hash(&self) -> Result<blake3::Hash, std::io::Error> {
let mut hasher = blake3::Hasher::new();
let mut file = std::fs::File::open(&self.path)?;
std::io::copy(&mut file, &mut hasher)?;
return Ok(hasher.finalize());
}
fn sidecar(&self) -> Result<Option<Rc<dyn Extractor<Self> + '_>>, std::io::Error> {
if !self.sidecar {
return Ok(None);
}
// TODO: use a generic tomlextractor instead?
// you'll need a fake _ref_ to the toml file, though.
return Ok(Some(Rc::new(SidecarExtractor::new(self))));
}
fn write_sidecar(
&self,
path: Vec<Label>,
value: PileValue<'_, Self>,
) -> Result<(), std::io::Error> {
if !self.sidecar {
return Ok(());
}
let sidecar_path = self.path.with_extension("toml");
let mut doc: toml_edit::DocumentMut = if sidecar_path.is_file() {
let content = std::fs::read_to_string(&sidecar_path)?;
content.parse().unwrap_or_default()
} else {
toml_edit::DocumentMut::new()
};
fn to_edit_item(v: toml::Value) -> toml_edit::Item {
match v {
toml::Value::String(s) => toml_edit::value(s),
toml::Value::Integer(i) => toml_edit::value(i),
toml::Value::Float(f) => toml_edit::value(f),
toml::Value::Boolean(b) => toml_edit::value(b),
toml::Value::Datetime(d) => toml_edit::value(d.to_string()),
toml::Value::Array(arr) => {
let mut array = toml_edit::Array::new();
for item in arr {
if let toml_edit::Item::Value(v) = to_edit_item(item) {
array.push_formatted(v);
}
}
toml_edit::Item::Value(toml_edit::Value::Array(array))
}
toml::Value::Table(t) => {
let mut table = toml_edit::Table::new();
for (k, v) in t {
table.insert(&k, to_edit_item(v));
}
toml_edit::Item::Table(table)
}
}
}
let json_value = value.to_json()?;
let toml_value: toml::Value = serde_json::from_value(json_value)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
let item = to_edit_item(toml_value);
let Some((path_last, path_init)) = path.split_last() else {
return Ok(());
};
let mut table = doc.as_table_mut();
for label in path_init {
let key = label.as_str();
if !table.contains_key(key) {
table.insert(key, toml_edit::Item::Table(toml_edit::Table::new()));
}
table = table
.get_mut(key)
.and_then(|item| item.as_table_mut())
.ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
"path element is not a table",
)
})?;
}
table.insert(path_last.as_str(), item);
std::fs::write(&sidecar_path, doc.to_string())?;
Ok(())
}
}

View File

@@ -4,7 +4,7 @@ use pile_config::Label;
use std::path::PathBuf;
use walkdir::WalkDir;
use crate::{DataSource, Item, item::FileItem, path_ts_latest};
use crate::{DataSource, item::FileItem, path_ts_latest};
#[derive(Debug)]
pub struct DirDataSource {
@@ -26,26 +26,27 @@ impl DirDataSource {
impl DataSource for DirDataSource {
type Key = PathBuf;
type Item = FileItem;
type Error = std::io::Error;
fn get(
&self,
key: &Self::Key,
) -> Result<Option<Box<dyn Item<Key = Self::Key> + 'static>>, Self::Error> {
fn get(&self, key: &Self::Key) -> Result<Option<Self::Item>, Self::Error> {
if !key.is_file() {
return Ok(None);
}
return Ok(Some(Box::new(FileItem {
// Ignore toml files if sidecars are enabled
if self.sidecars && key.extension().and_then(|x| x.to_str()) == Some("toml") {
return Ok(None);
}
return Ok(Some(FileItem {
source_name: self.name.clone(),
path: key.to_owned(),
sidecar: self.sidecars,
})));
}));
}
fn iter(
&self,
) -> impl Iterator<Item = Result<(Self::Key, Box<dyn Item<Key = Self::Key>>), Self::Error>> {
fn iter(&self) -> impl Iterator<Item = Result<(Self::Key, Self::Item), Self::Error>> {
return self
.dirs
.iter()
@@ -62,16 +63,18 @@ impl DataSource for DirDataSource {
Ok((_, entry)) => {
let path = entry.into_path();
let item: Box<dyn Item<Key = Self::Key>> =
match path.extension().and_then(|x| x.to_str()) {
None => return None,
Some("flac") => Box::new(FileItem {
source_name: self.name.clone(),
path: path.clone(),
sidecar: self.sidecars,
}),
Some(_) => return None,
};
let item = match path.extension().and_then(|x| x.to_str()) {
None => return None,
// Ignore toml if sidecars are enabled
Some("toml") if self.sidecars => return None,
Some(_) => FileItem {
source_name: self.name.clone(),
path: path.clone(),
sidecar: self.sidecars,
},
};
Some(Ok((path, item)))
}

View File

@@ -8,19 +8,15 @@ pub trait DataSource {
/// The type used to retrieve items from this source
/// (e.g, a PathBuf or a primary key)
type Key: Key;
type Item: Item<Key = Self::Key>;
type Error: Error + Sync + Send;
/// Get an item from this datasource
fn get(
&self,
key: &Self::Key,
) -> Result<Option<Box<dyn Item<Key = Self::Key> + 'static>>, Self::Error>;
fn get(&self, key: &Self::Key) -> Result<Option<Self::Item>, Self::Error>;
/// Iterate over all items in this source in an arbitrary order
fn iter(
&self,
) -> impl Iterator<Item = Result<(Self::Key, Box<dyn Item<Key = Self::Key> + 'static>), Self::Error>>;
fn iter(&self) -> impl Iterator<Item = Result<(Self::Key, Self::Item), Self::Error>>;
/// Return the time of the latest change to the data in this source
fn latest_change(&self) -> Result<Option<DateTime<Utc>>, Self::Error>;