Write sidecar fields
This commit is contained in:
56
Cargo.lock
generated
56
Cargo.lock
generated
@@ -91,6 +91,12 @@ dependencies = [
|
|||||||
"rustversion",
|
"rustversion",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arrayref"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arrayvec"
|
name = "arrayvec"
|
||||||
version = "0.7.6"
|
version = "0.7.6"
|
||||||
@@ -135,6 +141,20 @@ dependencies = [
|
|||||||
"crunchy",
|
"crunchy",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "blake3"
|
||||||
|
version = "1.8.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
|
||||||
|
dependencies = [
|
||||||
|
"arrayref",
|
||||||
|
"arrayvec",
|
||||||
|
"cc",
|
||||||
|
"cfg-if",
|
||||||
|
"constant_time_eq",
|
||||||
|
"cpufeatures",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "block-buffer"
|
name = "block-buffer"
|
||||||
version = "0.11.0"
|
version = "0.11.0"
|
||||||
@@ -289,6 +309,12 @@ version = "0.10.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c"
|
checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "constant_time_eq"
|
||||||
|
version = "0.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "core-foundation-sys"
|
name = "core-foundation-sys"
|
||||||
version = "0.8.7"
|
version = "0.8.7"
|
||||||
@@ -448,7 +474,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -858,7 +884,7 @@ version = "0.50.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -968,6 +994,7 @@ dependencies = [
|
|||||||
name = "pile-dataset"
|
name = "pile-dataset"
|
||||||
version = "0.0.1"
|
version = "0.0.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"blake3",
|
||||||
"chrono",
|
"chrono",
|
||||||
"itertools",
|
"itertools",
|
||||||
"pile-config",
|
"pile-config",
|
||||||
@@ -979,6 +1006,7 @@ dependencies = [
|
|||||||
"tantivy",
|
"tantivy",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"toml",
|
"toml",
|
||||||
|
"toml_edit",
|
||||||
"tracing",
|
"tracing",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
@@ -1196,7 +1224,7 @@ dependencies = [
|
|||||||
"errno",
|
"errno",
|
||||||
"libc",
|
"libc",
|
||||||
"linux-raw-sys",
|
"linux-raw-sys",
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1569,10 +1597,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
|
checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"fastrand",
|
"fastrand",
|
||||||
"getrandom 0.3.4",
|
"getrandom 0.4.1",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"rustix",
|
"rustix",
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1687,6 +1715,19 @@ dependencies = [
|
|||||||
"serde_core",
|
"serde_core",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml_edit"
|
||||||
|
version = "0.25.4+spec-1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7193cbd0ce53dc966037f54351dbbcf0d5a642c7f0038c382ef9e677ce8c13f2"
|
||||||
|
dependencies = [
|
||||||
|
"indexmap",
|
||||||
|
"toml_datetime",
|
||||||
|
"toml_parser",
|
||||||
|
"toml_writer",
|
||||||
|
"winnow",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "toml_parser"
|
name = "toml_parser"
|
||||||
version = "1.0.9+spec-1.1.0"
|
version = "1.0.9+spec-1.1.0"
|
||||||
@@ -2026,7 +2067,7 @@ version = "0.1.11"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2255,6 +2296,9 @@ name = "winnow"
|
|||||||
version = "0.7.14"
|
version = "0.7.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
|
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wit-bindgen"
|
name = "wit-bindgen"
|
||||||
|
|||||||
@@ -87,7 +87,9 @@ serde = { version = "1.0.228", features = ["derive"] }
|
|||||||
serde_json = "1.0.149"
|
serde_json = "1.0.149"
|
||||||
base64 = "0.22.1"
|
base64 = "0.22.1"
|
||||||
toml = "1.0.3"
|
toml = "1.0.3"
|
||||||
|
toml_edit = "0.25.4"
|
||||||
sha2 = "0.11.0-rc.5"
|
sha2 = "0.11.0-rc.5"
|
||||||
|
blake3 = "1.8.3"
|
||||||
|
|
||||||
# Misc helpers
|
# Misc helpers
|
||||||
thiserror = "2.0.18"
|
thiserror = "2.0.18"
|
||||||
|
|||||||
@@ -22,3 +22,5 @@ toml = { workspace = true }
|
|||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
rayon = { workspace = true }
|
rayon = { workspace = true }
|
||||||
smartstring = { workspace = true }
|
smartstring = { workspace = true }
|
||||||
|
blake3 = { workspace = true }
|
||||||
|
toml_edit = { workspace = true }
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ use thiserror::Error;
|
|||||||
use tracing::{debug, info, trace, warn};
|
use tracing::{debug, info, trace, warn};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
DataSource, Item,
|
DataSource, FileItem,
|
||||||
index::{DbFtsIndex, FtsLookupResult},
|
index::{DbFtsIndex, FtsLookupResult},
|
||||||
path_ts_earliest,
|
path_ts_earliest,
|
||||||
source::DirDataSource,
|
source::DirDataSource,
|
||||||
@@ -96,11 +96,7 @@ impl Dataset {
|
|||||||
// MARK: get
|
// MARK: get
|
||||||
//
|
//
|
||||||
|
|
||||||
pub fn get(
|
pub fn get(&self, source: &Label, key: &PathBuf) -> Option<FileItem> {
|
||||||
&self,
|
|
||||||
source: &Label,
|
|
||||||
key: &PathBuf,
|
|
||||||
) -> Option<Box<dyn Item<Key = PathBuf> + 'static>> {
|
|
||||||
let s = self.config.dataset.source.get(source)?;
|
let s = self.config.dataset.source.get(source)?;
|
||||||
let s = match s {
|
let s = match s {
|
||||||
Source::Filesystem { path, sidecars } => {
|
Source::Filesystem { path, sidecars } => {
|
||||||
@@ -115,7 +111,7 @@ impl Dataset {
|
|||||||
// MARK: fts
|
// MARK: fts
|
||||||
//
|
//
|
||||||
|
|
||||||
/// Refresh this dataset's fts index
|
/// Refresh this dataset's fts index.
|
||||||
pub fn fts_refresh(
|
pub fn fts_refresh(
|
||||||
&self,
|
&self,
|
||||||
threads: usize,
|
threads: usize,
|
||||||
@@ -163,7 +159,7 @@ impl Dataset {
|
|||||||
.install(|| {
|
.install(|| {
|
||||||
batch
|
batch
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.filter_map(|(key, item)| match db_index.entry_to_document(&*item) {
|
.filter_map(|(key, item)| match db_index.entry_to_document(&item) {
|
||||||
Ok(Some(doc)) => Some((key, doc)),
|
Ok(Some(doc)) => Some((key, doc)),
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
warn!("Skipping {key:?}, document is empty");
|
warn!("Skipping {key:?}, document is empty");
|
||||||
@@ -306,7 +302,7 @@ fn start_read_task(
|
|||||||
batch_size: usize,
|
batch_size: usize,
|
||||||
) -> (
|
) -> (
|
||||||
JoinHandle<()>,
|
JoinHandle<()>,
|
||||||
Receiver<Result<Vec<(PathBuf, Box<dyn Item<Key = PathBuf>>)>, DatasetError>>,
|
Receiver<Result<Vec<(PathBuf, FileItem)>, DatasetError>>,
|
||||||
) {
|
) {
|
||||||
let config = config.clone();
|
let config = config.clone();
|
||||||
let (read_tx, read_rx) = std::sync::mpsc::sync_channel(2);
|
let (read_tx, read_rx) = std::sync::mpsc::sync_channel(2);
|
||||||
|
|||||||
@@ -39,9 +39,11 @@ impl<'a> SidecarExtractor<'a> {
|
|||||||
return Ok(self.output.get_or_init(HashMap::new));
|
return Ok(self.output.get_or_init(HashMap::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
let sidecar = std::fs::read_to_string(&sidecar_file)?;
|
let sidecar = std::fs::read(&sidecar_file)?;
|
||||||
let sidecar: toml::Value = toml::from_str(&sidecar)
|
let sidecar: toml::Value = match toml::from_slice(&sidecar) {
|
||||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
|
Ok(x) => x,
|
||||||
|
Err(_) => return Ok(self.output.get_or_init(HashMap::new)),
|
||||||
|
};
|
||||||
|
|
||||||
let output: HashMap<Label, PileValue<'_, FileItem>> = match sidecar {
|
let output: HashMap<Label, PileValue<'_, FileItem>> = match sidecar {
|
||||||
toml::Value::Table(t) => t
|
toml::Value::Table(t) => t
|
||||||
|
|||||||
@@ -63,9 +63,9 @@ impl DbFtsIndex {
|
|||||||
//
|
//
|
||||||
|
|
||||||
/// Turn an entry into a tantivy document
|
/// Turn an entry into a tantivy document
|
||||||
pub fn entry_to_document<K: Key>(
|
pub fn entry_to_document<K: Key, I: Item<Key = K>>(
|
||||||
&self,
|
&self,
|
||||||
item: &dyn Item<Key = K>,
|
item: &I,
|
||||||
) -> Result<Option<TantivyDocument>, TantivyError> {
|
) -> Result<Option<TantivyDocument>, TantivyError> {
|
||||||
let mut doc = TantivyDocument::default();
|
let mut doc = TantivyDocument::default();
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,10 @@
|
|||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{fmt::Debug, path::PathBuf};
|
use std::{fmt::Debug, path::PathBuf, rc::Rc};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
PileValue,
|
||||||
|
extract::{Extractor, SidecarExtractor},
|
||||||
|
};
|
||||||
|
|
||||||
//
|
//
|
||||||
// MARK: key
|
// MARK: key
|
||||||
@@ -28,12 +33,27 @@ impl Key for PathBuf {
|
|||||||
//
|
//
|
||||||
|
|
||||||
/// A pointer to raw data
|
/// A pointer to raw data
|
||||||
pub trait Item: Debug + Send + Sync + 'static {
|
pub trait Item: Debug + Send + Sync + 'static + Sized {
|
||||||
type Key: Key;
|
type Key: Key;
|
||||||
|
|
||||||
fn source_name(&self) -> &str;
|
fn source_name(&self) -> &str;
|
||||||
fn key(&self) -> &Self::Key;
|
fn key(&self) -> &Self::Key;
|
||||||
|
|
||||||
|
/// Get this item's sidecar metadata
|
||||||
|
fn sidecar(&self) -> Result<Option<Rc<dyn Extractor<Self> + '_>>, std::io::Error>;
|
||||||
|
|
||||||
|
/// Set this file's sidecar metadata,
|
||||||
|
/// overwriting any existing file.
|
||||||
|
fn write_sidecar(
|
||||||
|
&self,
|
||||||
|
path: Vec<Label>,
|
||||||
|
value: PileValue<'_, Self>,
|
||||||
|
) -> Result<(), std::io::Error>;
|
||||||
|
|
||||||
|
fn hash(&self) -> Result<blake3::Hash, std::io::Error>;
|
||||||
|
|
||||||
|
/// Item conversion, downcast to specific type.
|
||||||
|
/// Returns `None` if this is not a [FileItem]
|
||||||
fn as_file(&self) -> Option<&FileItem>;
|
fn as_file(&self) -> Option<&FileItem>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -62,4 +82,97 @@ impl Item for FileItem {
|
|||||||
fn as_file(&self) -> Option<&FileItem> {
|
fn as_file(&self) -> Option<&FileItem> {
|
||||||
Some(self)
|
Some(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn hash(&self) -> Result<blake3::Hash, std::io::Error> {
|
||||||
|
let mut hasher = blake3::Hasher::new();
|
||||||
|
let mut file = std::fs::File::open(&self.path)?;
|
||||||
|
std::io::copy(&mut file, &mut hasher)?;
|
||||||
|
return Ok(hasher.finalize());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sidecar(&self) -> Result<Option<Rc<dyn Extractor<Self> + '_>>, std::io::Error> {
|
||||||
|
if !self.sidecar {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: use a generic tomlextractor instead?
|
||||||
|
// you'll need a fake _ref_ to the toml file, though.
|
||||||
|
return Ok(Some(Rc::new(SidecarExtractor::new(self))));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_sidecar(
|
||||||
|
&self,
|
||||||
|
path: Vec<Label>,
|
||||||
|
value: PileValue<'_, Self>,
|
||||||
|
) -> Result<(), std::io::Error> {
|
||||||
|
if !self.sidecar {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let sidecar_path = self.path.with_extension("toml");
|
||||||
|
|
||||||
|
let mut doc: toml_edit::DocumentMut = if sidecar_path.is_file() {
|
||||||
|
let content = std::fs::read_to_string(&sidecar_path)?;
|
||||||
|
content.parse().unwrap_or_default()
|
||||||
|
} else {
|
||||||
|
toml_edit::DocumentMut::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
fn to_edit_item(v: toml::Value) -> toml_edit::Item {
|
||||||
|
match v {
|
||||||
|
toml::Value::String(s) => toml_edit::value(s),
|
||||||
|
toml::Value::Integer(i) => toml_edit::value(i),
|
||||||
|
toml::Value::Float(f) => toml_edit::value(f),
|
||||||
|
toml::Value::Boolean(b) => toml_edit::value(b),
|
||||||
|
toml::Value::Datetime(d) => toml_edit::value(d.to_string()),
|
||||||
|
toml::Value::Array(arr) => {
|
||||||
|
let mut array = toml_edit::Array::new();
|
||||||
|
for item in arr {
|
||||||
|
if let toml_edit::Item::Value(v) = to_edit_item(item) {
|
||||||
|
array.push_formatted(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
toml_edit::Item::Value(toml_edit::Value::Array(array))
|
||||||
|
}
|
||||||
|
toml::Value::Table(t) => {
|
||||||
|
let mut table = toml_edit::Table::new();
|
||||||
|
for (k, v) in t {
|
||||||
|
table.insert(&k, to_edit_item(v));
|
||||||
|
}
|
||||||
|
toml_edit::Item::Table(table)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let json_value = value.to_json()?;
|
||||||
|
let toml_value: toml::Value = serde_json::from_value(json_value)
|
||||||
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
|
||||||
|
let item = to_edit_item(toml_value);
|
||||||
|
|
||||||
|
let Some((path_last, path_init)) = path.split_last() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut table = doc.as_table_mut();
|
||||||
|
for label in path_init {
|
||||||
|
let key = label.as_str();
|
||||||
|
if !table.contains_key(key) {
|
||||||
|
table.insert(key, toml_edit::Item::Table(toml_edit::Table::new()));
|
||||||
|
}
|
||||||
|
table = table
|
||||||
|
.get_mut(key)
|
||||||
|
.and_then(|item| item.as_table_mut())
|
||||||
|
.ok_or_else(|| {
|
||||||
|
std::io::Error::new(
|
||||||
|
std::io::ErrorKind::InvalidData,
|
||||||
|
"path element is not a table",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
table.insert(path_last.as_str(), item);
|
||||||
|
|
||||||
|
std::fs::write(&sidecar_path, doc.to_string())?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use pile_config::Label;
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
use crate::{DataSource, Item, item::FileItem, path_ts_latest};
|
use crate::{DataSource, item::FileItem, path_ts_latest};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct DirDataSource {
|
pub struct DirDataSource {
|
||||||
@@ -26,26 +26,27 @@ impl DirDataSource {
|
|||||||
|
|
||||||
impl DataSource for DirDataSource {
|
impl DataSource for DirDataSource {
|
||||||
type Key = PathBuf;
|
type Key = PathBuf;
|
||||||
|
type Item = FileItem;
|
||||||
type Error = std::io::Error;
|
type Error = std::io::Error;
|
||||||
|
|
||||||
fn get(
|
fn get(&self, key: &Self::Key) -> Result<Option<Self::Item>, Self::Error> {
|
||||||
&self,
|
|
||||||
key: &Self::Key,
|
|
||||||
) -> Result<Option<Box<dyn Item<Key = Self::Key> + 'static>>, Self::Error> {
|
|
||||||
if !key.is_file() {
|
if !key.is_file() {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(Some(Box::new(FileItem {
|
// Ignore toml files if sidecars are enabled
|
||||||
|
if self.sidecars && key.extension().and_then(|x| x.to_str()) == Some("toml") {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(Some(FileItem {
|
||||||
source_name: self.name.clone(),
|
source_name: self.name.clone(),
|
||||||
path: key.to_owned(),
|
path: key.to_owned(),
|
||||||
sidecar: self.sidecars,
|
sidecar: self.sidecars,
|
||||||
})));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn iter(
|
fn iter(&self) -> impl Iterator<Item = Result<(Self::Key, Self::Item), Self::Error>> {
|
||||||
&self,
|
|
||||||
) -> impl Iterator<Item = Result<(Self::Key, Box<dyn Item<Key = Self::Key>>), Self::Error>> {
|
|
||||||
return self
|
return self
|
||||||
.dirs
|
.dirs
|
||||||
.iter()
|
.iter()
|
||||||
@@ -62,16 +63,18 @@ impl DataSource for DirDataSource {
|
|||||||
Ok((_, entry)) => {
|
Ok((_, entry)) => {
|
||||||
let path = entry.into_path();
|
let path = entry.into_path();
|
||||||
|
|
||||||
let item: Box<dyn Item<Key = Self::Key>> =
|
let item = match path.extension().and_then(|x| x.to_str()) {
|
||||||
match path.extension().and_then(|x| x.to_str()) {
|
None => return None,
|
||||||
None => return None,
|
|
||||||
Some("flac") => Box::new(FileItem {
|
// Ignore toml if sidecars are enabled
|
||||||
source_name: self.name.clone(),
|
Some("toml") if self.sidecars => return None,
|
||||||
path: path.clone(),
|
|
||||||
sidecar: self.sidecars,
|
Some(_) => FileItem {
|
||||||
}),
|
source_name: self.name.clone(),
|
||||||
Some(_) => return None,
|
path: path.clone(),
|
||||||
};
|
sidecar: self.sidecars,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
Some(Ok((path, item)))
|
Some(Ok((path, item)))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,19 +8,15 @@ pub trait DataSource {
|
|||||||
/// The type used to retrieve items from this source
|
/// The type used to retrieve items from this source
|
||||||
/// (e.g, a PathBuf or a primary key)
|
/// (e.g, a PathBuf or a primary key)
|
||||||
type Key: Key;
|
type Key: Key;
|
||||||
|
type Item: Item<Key = Self::Key>;
|
||||||
|
|
||||||
type Error: Error + Sync + Send;
|
type Error: Error + Sync + Send;
|
||||||
|
|
||||||
/// Get an item from this datasource
|
/// Get an item from this datasource
|
||||||
fn get(
|
fn get(&self, key: &Self::Key) -> Result<Option<Self::Item>, Self::Error>;
|
||||||
&self,
|
|
||||||
key: &Self::Key,
|
|
||||||
) -> Result<Option<Box<dyn Item<Key = Self::Key> + 'static>>, Self::Error>;
|
|
||||||
|
|
||||||
/// Iterate over all items in this source in an arbitrary order
|
/// Iterate over all items in this source in an arbitrary order
|
||||||
fn iter(
|
fn iter(&self) -> impl Iterator<Item = Result<(Self::Key, Self::Item), Self::Error>>;
|
||||||
&self,
|
|
||||||
) -> impl Iterator<Item = Result<(Self::Key, Box<dyn Item<Key = Self::Key> + 'static>), Self::Error>>;
|
|
||||||
|
|
||||||
/// Return the time of the latest change to the data in this source
|
/// Return the time of the latest change to the data in this source
|
||||||
fn latest_change(&self) -> Result<Option<DateTime<Utc>>, Self::Error>;
|
fn latest_change(&self) -> Result<Option<DateTime<Utc>>, Self::Error>;
|
||||||
|
|||||||
96
crates/pile/src/command/annotate.rs
Normal file
96
crates/pile/src/command/annotate.rs
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use clap::Args;
|
||||||
|
use pile_config::{Label, Source};
|
||||||
|
use pile_dataset::index::DbFtsIndex;
|
||||||
|
use pile_dataset::source::DirDataSource;
|
||||||
|
use pile_dataset::{DataSource, Dataset, FileItem, Item, PileValue, extract::MetaExtractor};
|
||||||
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
|
use std::{path::PathBuf, rc::Rc};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
use crate::{CliCmd, GlobalContext};
|
||||||
|
|
||||||
|
#[derive(Debug, Args)]
|
||||||
|
pub struct AnnotateCommand {
|
||||||
|
/// The schema field to read (must be defined in pile.toml)
|
||||||
|
field: String,
|
||||||
|
|
||||||
|
/// Sidecar path to write to (e.g. meta.title)
|
||||||
|
dest: String,
|
||||||
|
|
||||||
|
/// Path to dataset config
|
||||||
|
#[arg(long, short = 'c', default_value = "./pile.toml")]
|
||||||
|
config: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AnnotateCommand {
|
||||||
|
fn parse_dest(dest: &str) -> Result<Vec<Label>> {
|
||||||
|
dest.split('.')
|
||||||
|
.map(|s| {
|
||||||
|
Label::new(s).ok_or_else(|| anyhow::anyhow!("invalid label {s:?} in dest path"))
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CliCmd for AnnotateCommand {
|
||||||
|
async fn run(
|
||||||
|
self,
|
||||||
|
_ctx: GlobalContext,
|
||||||
|
_flag: CancelFlag,
|
||||||
|
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
|
||||||
|
let field = Label::new(&self.field)
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("invalid field name {:?}", self.field))?;
|
||||||
|
let dest_path = Self::parse_dest(&self.dest)?;
|
||||||
|
|
||||||
|
let ds = Dataset::open(&self.config)
|
||||||
|
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
|
||||||
|
|
||||||
|
if !ds.config.schema.contains_key(&field) {
|
||||||
|
return Err(anyhow::anyhow!("field {:?} is not defined in schema", self.field).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let index = DbFtsIndex::new(&ds.path_workdir, &ds.config);
|
||||||
|
let mut count = 0u64;
|
||||||
|
|
||||||
|
for (name, source) in &ds.config.dataset.source {
|
||||||
|
match source {
|
||||||
|
Source::Filesystem { path, sidecars } => {
|
||||||
|
if !sidecars {
|
||||||
|
warn!("Source {name} does not have sidecars enabled, skipping");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let source = DirDataSource::new(name, path.clone().to_vec(), *sidecars);
|
||||||
|
|
||||||
|
for res in source.iter() {
|
||||||
|
let (_key, item) =
|
||||||
|
res.with_context(|| format!("while reading source {name}"))?;
|
||||||
|
|
||||||
|
let meta = MetaExtractor::new(&item);
|
||||||
|
let extractor = PileValue::<FileItem>::Extractor(Rc::new(meta));
|
||||||
|
|
||||||
|
let Some(value) =
|
||||||
|
index.get_field(&extractor, &field).with_context(|| {
|
||||||
|
format!("while extracting field from {}", item.path.display())
|
||||||
|
})?
|
||||||
|
else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
item.write_sidecar(dest_path.clone(), PileValue::String(value.into()))
|
||||||
|
.with_context(|| {
|
||||||
|
format!("while writing sidecar for {}", item.path.display())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Annotated {count} items");
|
||||||
|
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ use anyhow::Result;
|
|||||||
use clap::Subcommand;
|
use clap::Subcommand;
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTask, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTask, CancelableTaskError};
|
||||||
|
|
||||||
|
mod annotate;
|
||||||
mod check;
|
mod check;
|
||||||
mod index;
|
mod index;
|
||||||
mod init;
|
mod init;
|
||||||
@@ -12,6 +13,12 @@ use crate::GlobalContext;
|
|||||||
|
|
||||||
#[derive(Debug, Subcommand)]
|
#[derive(Debug, Subcommand)]
|
||||||
pub enum SubCommand {
|
pub enum SubCommand {
|
||||||
|
/// Annotate all items with a field, writing it to a sidecar path
|
||||||
|
Annotate {
|
||||||
|
#[command(flatten)]
|
||||||
|
cmd: annotate::AnnotateCommand,
|
||||||
|
},
|
||||||
|
|
||||||
/// Create an empty dataset
|
/// Create an empty dataset
|
||||||
Init {
|
Init {
|
||||||
#[command(flatten)]
|
#[command(flatten)]
|
||||||
@@ -46,6 +53,7 @@ pub enum SubCommand {
|
|||||||
impl CliCmdDispatch for SubCommand {
|
impl CliCmdDispatch for SubCommand {
|
||||||
fn start(self, ctx: GlobalContext) -> Result<CancelableTask<Result<i32>>> {
|
fn start(self, ctx: GlobalContext) -> Result<CancelableTask<Result<i32>>> {
|
||||||
match self {
|
match self {
|
||||||
|
Self::Annotate { cmd } => cmd.start(ctx),
|
||||||
Self::Init { cmd } => cmd.start(ctx),
|
Self::Init { cmd } => cmd.start(ctx),
|
||||||
Self::Check { cmd } => cmd.start(ctx),
|
Self::Check { cmd } => cmd.start(ctx),
|
||||||
Self::Index { cmd } => cmd.start(ctx),
|
Self::Index { cmd } => cmd.start(ctx),
|
||||||
|
|||||||
Reference in New Issue
Block a user