Validate label names
This commit is contained in:
107
Cargo.lock
generated
107
Cargo.lock
generated
@@ -17,6 +17,15 @@ version = "0.2.21"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "android_system_properties"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstream"
|
name = "anstream"
|
||||||
version = "0.6.21"
|
version = "0.6.21"
|
||||||
@@ -211,6 +220,19 @@ version = "1.0.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "chrono"
|
||||||
|
version = "0.4.43"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
|
||||||
|
dependencies = [
|
||||||
|
"iana-time-zone",
|
||||||
|
"js-sys",
|
||||||
|
"num-traits",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.5.53"
|
version = "4.5.53"
|
||||||
@@ -276,6 +298,12 @@ version = "0.10.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0dabb6555f92fb9ee4140454eb5dcd14c7960e1225c6d1a6cc361f032947713e"
|
checksum = "0dabb6555f92fb9ee4140454eb5dcd14c7960e1225c6d1a6cc361f032947713e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "core-foundation-sys"
|
||||||
|
version = "0.8.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cpufeatures"
|
name = "cpufeatures"
|
||||||
version = "0.2.17"
|
version = "0.2.17"
|
||||||
@@ -621,6 +649,30 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iana-time-zone"
|
||||||
|
version = "0.1.65"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
|
||||||
|
dependencies = [
|
||||||
|
"android_system_properties",
|
||||||
|
"core-foundation-sys",
|
||||||
|
"iana-time-zone-haiku",
|
||||||
|
"js-sys",
|
||||||
|
"log",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"windows-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iana-time-zone-haiku"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "id-arena"
|
name = "id-arena"
|
||||||
version = "2.3.0"
|
version = "2.3.0"
|
||||||
@@ -1019,6 +1071,7 @@ dependencies = [
|
|||||||
"itertools",
|
"itertools",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"smartstring",
|
||||||
"toml",
|
"toml",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -1026,6 +1079,7 @@ dependencies = [
|
|||||||
name = "pile-dataset"
|
name = "pile-dataset"
|
||||||
version = "0.0.1"
|
version = "0.0.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"chrono",
|
||||||
"itertools",
|
"itertools",
|
||||||
"jsonpath-rust",
|
"jsonpath-rust",
|
||||||
"pile-audio",
|
"pile-audio",
|
||||||
@@ -2120,12 +2174,65 @@ version = "0.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-core"
|
||||||
|
version = "0.62.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
|
||||||
|
dependencies = [
|
||||||
|
"windows-implement",
|
||||||
|
"windows-interface",
|
||||||
|
"windows-link",
|
||||||
|
"windows-result",
|
||||||
|
"windows-strings",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-implement"
|
||||||
|
version = "0.60.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-interface"
|
||||||
|
version = "0.59.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-link"
|
name = "windows-link"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-result"
|
||||||
|
version = "0.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-strings"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.59.0"
|
version = "0.59.0"
|
||||||
|
|||||||
21
Cargo.toml
21
Cargo.toml
@@ -70,18 +70,10 @@ pile-audio = { path = "crates/pile-audio" }
|
|||||||
pile-dataset = { path = "crates/pile-dataset" }
|
pile-dataset = { path = "crates/pile-dataset" }
|
||||||
|
|
||||||
# Clients
|
# Clients
|
||||||
reqwest = { version = "0.12.15", features = [
|
|
||||||
"multipart",
|
|
||||||
"json",
|
|
||||||
"rustls-tls",
|
|
||||||
] }
|
|
||||||
librqbit = "8.1.1"
|
|
||||||
librqbit-core = "5.0.0"
|
|
||||||
tantivy = "0.25.0"
|
tantivy = "0.25.0"
|
||||||
|
|
||||||
# Async & Parallelism
|
# Async & Parallelism
|
||||||
tokio = { version = "1.44.1", features = ["full"] }
|
tokio = { version = "1.44.1", features = ["full"] }
|
||||||
tokio-stream = { version = "0.1.17" }
|
|
||||||
|
|
||||||
# CLI & logging
|
# CLI & logging
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
@@ -91,17 +83,10 @@ tracing-indicatif = "0.3.13"
|
|||||||
anstyle = "1.0.10"
|
anstyle = "1.0.10"
|
||||||
clap = { version = "4.5.37", features = ["derive"] }
|
clap = { version = "4.5.37", features = ["derive"] }
|
||||||
|
|
||||||
# Extra types
|
|
||||||
url = { version = "2.5.4", features = ["serde"] }
|
|
||||||
|
|
||||||
# Serialization & formats
|
# Serialization & formats
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = "1.0.140"
|
serde_json = "1.0.140"
|
||||||
blake3 = "1.8.2"
|
|
||||||
flate2 = "1.1.2"
|
|
||||||
base64 = "0.22.1"
|
base64 = "0.22.1"
|
||||||
binrw = "0.15.0"
|
|
||||||
brotli = "8.0.2"
|
|
||||||
toml = "0.9.8"
|
toml = "0.9.8"
|
||||||
jsonpath-rust = "1.0.4"
|
jsonpath-rust = "1.0.4"
|
||||||
sha2 = "0.11.0-rc.3"
|
sha2 = "0.11.0-rc.3"
|
||||||
@@ -110,15 +95,11 @@ sha2 = "0.11.0-rc.3"
|
|||||||
thiserror = "2.0.12"
|
thiserror = "2.0.12"
|
||||||
anyhow = "1.0.97"
|
anyhow = "1.0.97"
|
||||||
itertools = "0.14.0"
|
itertools = "0.14.0"
|
||||||
tempfile = "3.21.0"
|
|
||||||
signal-hook = "0.3.18"
|
signal-hook = "0.3.18"
|
||||||
parking_lot = "0.12.5"
|
|
||||||
lru = "0.16.1"
|
|
||||||
rayon = "1.11.0"
|
|
||||||
rand = "0.9.2"
|
rand = "0.9.2"
|
||||||
regex = "1.12.2"
|
|
||||||
strum = { version = "0.27.2", features = ["derive"] }
|
strum = { version = "0.27.2", features = ["derive"] }
|
||||||
walkdir = "2.5.0"
|
walkdir = "2.5.0"
|
||||||
mime = "0.3.17"
|
mime = "0.3.17"
|
||||||
paste = "1.0.15"
|
paste = "1.0.15"
|
||||||
smartstring = "1.0.1"
|
smartstring = "1.0.1"
|
||||||
|
chrono = "0.4.43"
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ workspace = true
|
|||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
itertools = { workspace = true }
|
itertools = { workspace = true }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
|
smartstring = { workspace = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
toml = { workspace = true }
|
toml = { workspace = true }
|
||||||
|
|||||||
@@ -1,56 +1,36 @@
|
|||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::{collections::HashMap, fmt::Debug, path::PathBuf, slice};
|
use std::{collections::HashMap, fmt::Debug, path::PathBuf};
|
||||||
|
|
||||||
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
|
||||||
|
|
||||||
mod post;
|
mod post;
|
||||||
pub use post::*;
|
pub use post::*;
|
||||||
|
|
||||||
|
mod misc;
|
||||||
|
pub use misc::*;
|
||||||
|
|
||||||
|
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn init_db_toml_valid() {
|
fn init_db_toml_valid() {
|
||||||
toml::from_str::<ConfigToml>(INIT_DB_TOML).unwrap();
|
toml::from_str::<ConfigToml>(INIT_DB_TOML).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
|
||||||
#[serde(untagged)]
|
|
||||||
pub enum OneOrMany<T: Debug + Clone> {
|
|
||||||
One(T),
|
|
||||||
Many(Vec<T>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Debug + Clone> OneOrMany<T> {
|
|
||||||
pub fn to_vec(self) -> Vec<T> {
|
|
||||||
match self {
|
|
||||||
Self::One(x) => vec![x],
|
|
||||||
Self::Many(x) => x,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn as_slice(&self) -> &[T] {
|
|
||||||
match self {
|
|
||||||
Self::One(x) => slice::from_ref(&x),
|
|
||||||
Self::Many(x) => &x[..],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
pub struct ConfigToml {
|
pub struct ConfigToml {
|
||||||
pub dataset: DatasetConfig,
|
pub dataset: DatasetConfig,
|
||||||
pub schema: HashMap<String, FieldSpec>,
|
pub schema: HashMap<Label, FieldSpec>,
|
||||||
pub fts: Option<DatasetFts>,
|
pub fts: Option<DatasetFts>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
pub struct DatasetConfig {
|
pub struct DatasetConfig {
|
||||||
/// Must be unique
|
/// Must be unique
|
||||||
pub name: String,
|
pub name: Label,
|
||||||
|
|
||||||
/// Root dir for indices
|
/// Root dir for indices
|
||||||
pub working_dir: Option<PathBuf>,
|
pub working_dir: Option<PathBuf>,
|
||||||
|
|
||||||
/// Where to find this field
|
/// Where to find this field
|
||||||
pub source: HashMap<String, Source>,
|
pub source: HashMap<Label, Source>,
|
||||||
|
|
||||||
/// How to post-process this field
|
/// How to post-process this field
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -95,7 +75,7 @@ pub enum FieldType {
|
|||||||
#[derive(Debug, Clone, Deserialize, Default)]
|
#[derive(Debug, Clone, Deserialize, Default)]
|
||||||
pub struct DatasetFts {
|
pub struct DatasetFts {
|
||||||
#[serde(alias = "field")]
|
#[serde(alias = "field")]
|
||||||
pub fields: HashMap<String, FtsIndexField>,
|
pub fields: HashMap<Label, FtsIndexField>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
|||||||
122
crates/pile-config/src/misc.rs
Normal file
122
crates/pile-config/src/misc.rs
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
use core::slice;
|
||||||
|
use std::fmt::{Debug, Display};
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use smartstring::{LazyCompact, SmartString};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum OneOrMany<T: Debug + Clone> {
|
||||||
|
One(T),
|
||||||
|
Many(Vec<T>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Debug + Clone> OneOrMany<T> {
|
||||||
|
pub fn to_vec(self) -> Vec<T> {
|
||||||
|
match self {
|
||||||
|
Self::One(x) => vec![x],
|
||||||
|
Self::Many(x) => x,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_slice(&self) -> &[T] {
|
||||||
|
match self {
|
||||||
|
Self::One(x) => slice::from_ref(&x),
|
||||||
|
Self::Many(x) => &x[..],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// MARK: Label
|
||||||
|
//
|
||||||
|
|
||||||
|
/// A sanitized [String], guaranteed to only contain
|
||||||
|
/// chars in `A-z`, `0-9`, and `-_`.
|
||||||
|
///
|
||||||
|
/// Used for names of datasets, fields, etc.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||||
|
#[serde(try_from = "String", into = "String")]
|
||||||
|
pub struct Label(SmartString<LazyCompact>);
|
||||||
|
|
||||||
|
impl Label {
|
||||||
|
pub const VALID_CHARS: &str =
|
||||||
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_";
|
||||||
|
|
||||||
|
pub fn new(str: impl Into<String>) -> Option<Self> {
|
||||||
|
let str: String = str.into();
|
||||||
|
for c in str.chars() {
|
||||||
|
if !Self::VALID_CHARS.contains(c) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Some(Self(str.into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_string(self) -> String {
|
||||||
|
self.0.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for Label {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Label> for String {
|
||||||
|
fn from(value: Label) -> Self {
|
||||||
|
value.0.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<String> for Label {
|
||||||
|
type Error = InvalidLabel;
|
||||||
|
|
||||||
|
fn try_from(value: String) -> Result<Self, Self::Error> {
|
||||||
|
Self::new(value).ok_or(InvalidLabel)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&str> for Label {
|
||||||
|
type Error = InvalidLabel;
|
||||||
|
|
||||||
|
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||||
|
Self::new(value).ok_or(InvalidLabel)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsRef<str> for Label {
|
||||||
|
fn as_ref(&self) -> &str {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for Label {
|
||||||
|
type Target = str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct InvalidLabel;
|
||||||
|
|
||||||
|
impl Display for InvalidLabel {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"Invalid label: must only contain characters in {}",
|
||||||
|
Label::VALID_CHARS
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for InvalidLabel {}
|
||||||
@@ -11,10 +11,10 @@ workspace = true
|
|||||||
pile-config = { workspace = true }
|
pile-config = { workspace = true }
|
||||||
pile-audio = { workspace = true }
|
pile-audio = { workspace = true }
|
||||||
|
|
||||||
|
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
itertools = { workspace = true }
|
itertools = { workspace = true }
|
||||||
walkdir = { workspace = true }
|
walkdir = { workspace = true }
|
||||||
tantivy = { workspace = true }
|
tantivy = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
jsonpath-rust = { workspace = true }
|
jsonpath-rust = { workspace = true }
|
||||||
|
chrono = { workspace = true }
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use jsonpath_rust::JsonPath;
|
use jsonpath_rust::JsonPath;
|
||||||
use pile_config::{ConfigToml, DatasetFts};
|
use pile_config::{ConfigToml, DatasetFts, Label};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::{path::PathBuf, sync::LazyLock};
|
use std::{path::PathBuf, sync::LazyLock};
|
||||||
use tantivy::{
|
use tantivy::{
|
||||||
@@ -106,7 +106,7 @@ impl DbFtsIndex {
|
|||||||
pub fn get_field(
|
pub fn get_field(
|
||||||
&self,
|
&self,
|
||||||
json: &Value,
|
json: &Value,
|
||||||
field_name: &str,
|
field_name: &Label,
|
||||||
) -> Result<Option<String>, std::io::Error> {
|
) -> Result<Option<String>, std::io::Error> {
|
||||||
let field = match self.cfg.schema.get(field_name) {
|
let field = match self.cfg.schema.get(field_name) {
|
||||||
Some(x) => x,
|
Some(x) => x,
|
||||||
@@ -124,7 +124,7 @@ impl DbFtsIndex {
|
|||||||
warn!(
|
warn!(
|
||||||
message = "Path returned more than one value, this is not supported. Skipping.",
|
message = "Path returned more than one value, this is not supported. Skipping.",
|
||||||
?path,
|
?path,
|
||||||
field = field_name
|
field = field_name.to_string()
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -139,7 +139,7 @@ impl DbFtsIndex {
|
|||||||
warn!(
|
warn!(
|
||||||
message = "Invalid path, skipping",
|
message = "Invalid path, skipping",
|
||||||
?path,
|
?path,
|
||||||
field = field_name,
|
field = field_name.to_string(),
|
||||||
?error
|
?error
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
@@ -150,7 +150,7 @@ impl DbFtsIndex {
|
|||||||
Value::Null => {
|
Value::Null => {
|
||||||
trace!(
|
trace!(
|
||||||
message = "Skipping field, is null",
|
message = "Skipping field, is null",
|
||||||
field = field_name,
|
field = field_name.to_string(),
|
||||||
path,
|
path,
|
||||||
value = ?val
|
value = ?val
|
||||||
);
|
);
|
||||||
@@ -174,7 +174,7 @@ impl DbFtsIndex {
|
|||||||
} else if x.len() > 1 {
|
} else if x.len() > 1 {
|
||||||
debug!(
|
debug!(
|
||||||
message = "Skipping field, is array with more than one element",
|
message = "Skipping field, is array with more than one element",
|
||||||
field = field_name,
|
field = field_name.to_string(),
|
||||||
path,
|
path,
|
||||||
value = ?val
|
value = ?val
|
||||||
);
|
);
|
||||||
@@ -182,7 +182,7 @@ impl DbFtsIndex {
|
|||||||
} else {
|
} else {
|
||||||
debug!(
|
debug!(
|
||||||
message = "Skipping field, is empty array",
|
message = "Skipping field, is empty array",
|
||||||
field = field_name,
|
field = field_name.to_string(),
|
||||||
path,
|
path,
|
||||||
value = ?val
|
value = ?val
|
||||||
);
|
);
|
||||||
@@ -192,7 +192,7 @@ impl DbFtsIndex {
|
|||||||
Value::Null => {
|
Value::Null => {
|
||||||
trace!(
|
trace!(
|
||||||
message = "Skipping field, is null",
|
message = "Skipping field, is null",
|
||||||
field = field_name,
|
field = field_name.to_string(),
|
||||||
path,
|
path,
|
||||||
value = ?val
|
value = ?val
|
||||||
);
|
);
|
||||||
@@ -201,7 +201,7 @@ impl DbFtsIndex {
|
|||||||
Value::Object(_) => {
|
Value::Object(_) => {
|
||||||
trace!(
|
trace!(
|
||||||
message = "Skipping field, is object",
|
message = "Skipping field, is object",
|
||||||
field = field_name,
|
field = field_name.to_string(),
|
||||||
path,
|
path,
|
||||||
value = ?val
|
value = ?val
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
mod traits;
|
mod traits;
|
||||||
pub use traits::*;
|
pub use traits::*;
|
||||||
|
|
||||||
|
mod misc;
|
||||||
|
pub use misc::*;
|
||||||
|
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod item;
|
pub mod item;
|
||||||
pub mod source;
|
pub mod source;
|
||||||
|
|||||||
61
crates/pile-dataset/src/misc.rs
Normal file
61
crates/pile-dataset/src/misc.rs
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
/// Returns the age of a path as a chrono DateTime.
|
||||||
|
/// - If the path doesn't exist, returns None
|
||||||
|
/// - If it's a file, returns the modified time
|
||||||
|
/// - If it's a directory, returns the LATEST modified time of all files within
|
||||||
|
pub fn path_age(path: impl AsRef<Path>) -> Option<DateTime<Utc>> {
|
||||||
|
let path = path.as_ref();
|
||||||
|
|
||||||
|
// Check if path exists
|
||||||
|
if !path.exists() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let metadata = fs::metadata(path).ok()?;
|
||||||
|
|
||||||
|
if metadata.is_file() {
|
||||||
|
// For files, return the modified time
|
||||||
|
let modified = metadata.modified().ok()?;
|
||||||
|
Some(modified.into())
|
||||||
|
} else if metadata.is_dir() {
|
||||||
|
// For directories, find the latest modified time of all files
|
||||||
|
find_latest_modified(path)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_latest_modified(dir: &Path) -> Option<DateTime<Utc>> {
|
||||||
|
let mut latest: Option<DateTime<Utc>> = None;
|
||||||
|
|
||||||
|
// Read directory entries
|
||||||
|
let entries = fs::read_dir(dir).ok()?;
|
||||||
|
|
||||||
|
for entry in entries.flatten() {
|
||||||
|
let path = entry.path();
|
||||||
|
let metadata = entry.metadata().ok()?;
|
||||||
|
|
||||||
|
if metadata.is_file() {
|
||||||
|
if let Ok(modified) = metadata.modified() {
|
||||||
|
let dt: DateTime<Utc> = modified.into();
|
||||||
|
latest = Some(match latest {
|
||||||
|
Some(prev) if prev > dt => prev,
|
||||||
|
_ => dt,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if metadata.is_dir() {
|
||||||
|
// Recursively check subdirectories
|
||||||
|
if let Some(dir_latest) = find_latest_modified(&path) {
|
||||||
|
latest = Some(match latest {
|
||||||
|
Some(prev) if prev > dir_latest => prev,
|
||||||
|
_ => dir_latest,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
latest
|
||||||
|
}
|
||||||
@@ -54,7 +54,7 @@ impl CliCmd for IndexCommand {
|
|||||||
.working_dir
|
.working_dir
|
||||||
.clone()
|
.clone()
|
||||||
.unwrap_or(parent.join(".pile"))
|
.unwrap_or(parent.join(".pile"))
|
||||||
.join(&config.dataset.name);
|
.join(&config.dataset.name.as_str());
|
||||||
let fts_dir = working_dir.join("fts");
|
let fts_dir = working_dir.join("fts");
|
||||||
|
|
||||||
if fts_dir.is_dir() {
|
if fts_dir.is_dir() {
|
||||||
@@ -70,7 +70,7 @@ impl CliCmd for IndexCommand {
|
|||||||
for (name, source) in &config.dataset.source {
|
for (name, source) in &config.dataset.source {
|
||||||
match source {
|
match source {
|
||||||
Source::Flac { path: dir } => {
|
Source::Flac { path: dir } => {
|
||||||
let source = DirDataSource::new(name, dir.clone().to_vec());
|
let source = DirDataSource::new(name.as_str(), dir.clone().to_vec());
|
||||||
sources.push(source);
|
sources.push(source);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ impl CliCmd for LookupCommand {
|
|||||||
.working_dir
|
.working_dir
|
||||||
.clone()
|
.clone()
|
||||||
.unwrap_or(parent.join(".pile"))
|
.unwrap_or(parent.join(".pile"))
|
||||||
.join(&config.dataset.name);
|
.join(&config.dataset.name.as_str());
|
||||||
let fts_dir = working_dir.join("fts");
|
let fts_dir = working_dir.join("fts");
|
||||||
|
|
||||||
if !fts_dir.is_dir() {
|
if !fts_dir.is_dir() {
|
||||||
|
|||||||
Reference in New Issue
Block a user