Validate label names
This commit is contained in:
@@ -11,6 +11,7 @@ workspace = true
|
||||
serde = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
smartstring = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
toml = { workspace = true }
|
||||
|
||||
@@ -1,56 +1,36 @@
|
||||
use serde::Deserialize;
|
||||
use std::{collections::HashMap, fmt::Debug, path::PathBuf, slice};
|
||||
|
||||
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
||||
use std::{collections::HashMap, fmt::Debug, path::PathBuf};
|
||||
|
||||
mod post;
|
||||
pub use post::*;
|
||||
|
||||
mod misc;
|
||||
pub use misc::*;
|
||||
|
||||
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
||||
|
||||
#[test]
|
||||
fn init_db_toml_valid() {
|
||||
toml::from_str::<ConfigToml>(INIT_DB_TOML).unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum OneOrMany<T: Debug + Clone> {
|
||||
One(T),
|
||||
Many(Vec<T>),
|
||||
}
|
||||
|
||||
impl<T: Debug + Clone> OneOrMany<T> {
|
||||
pub fn to_vec(self) -> Vec<T> {
|
||||
match self {
|
||||
Self::One(x) => vec![x],
|
||||
Self::Many(x) => x,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &[T] {
|
||||
match self {
|
||||
Self::One(x) => slice::from_ref(&x),
|
||||
Self::Many(x) => &x[..],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ConfigToml {
|
||||
pub dataset: DatasetConfig,
|
||||
pub schema: HashMap<String, FieldSpec>,
|
||||
pub schema: HashMap<Label, FieldSpec>,
|
||||
pub fts: Option<DatasetFts>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct DatasetConfig {
|
||||
/// Must be unique
|
||||
pub name: String,
|
||||
pub name: Label,
|
||||
|
||||
/// Root dir for indices
|
||||
pub working_dir: Option<PathBuf>,
|
||||
|
||||
/// Where to find this field
|
||||
pub source: HashMap<String, Source>,
|
||||
pub source: HashMap<Label, Source>,
|
||||
|
||||
/// How to post-process this field
|
||||
#[serde(default)]
|
||||
@@ -95,7 +75,7 @@ pub enum FieldType {
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct DatasetFts {
|
||||
#[serde(alias = "field")]
|
||||
pub fields: HashMap<String, FtsIndexField>,
|
||||
pub fields: HashMap<Label, FtsIndexField>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
|
||||
122
crates/pile-config/src/misc.rs
Normal file
122
crates/pile-config/src/misc.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
use core::slice;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::ops::Deref;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smartstring::{LazyCompact, SmartString};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum OneOrMany<T: Debug + Clone> {
|
||||
One(T),
|
||||
Many(Vec<T>),
|
||||
}
|
||||
|
||||
impl<T: Debug + Clone> OneOrMany<T> {
|
||||
pub fn to_vec(self) -> Vec<T> {
|
||||
match self {
|
||||
Self::One(x) => vec![x],
|
||||
Self::Many(x) => x,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &[T] {
|
||||
match self {
|
||||
Self::One(x) => slice::from_ref(&x),
|
||||
Self::Many(x) => &x[..],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: Label
|
||||
//
|
||||
|
||||
/// A sanitized [String], guaranteed to only contain
|
||||
/// chars in `A-z`, `0-9`, and `-_`.
|
||||
///
|
||||
/// Used for names of datasets, fields, etc.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
#[serde(try_from = "String", into = "String")]
|
||||
pub struct Label(SmartString<LazyCompact>);
|
||||
|
||||
impl Label {
|
||||
pub const VALID_CHARS: &str =
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_";
|
||||
|
||||
pub fn new(str: impl Into<String>) -> Option<Self> {
|
||||
let str: String = str.into();
|
||||
for c in str.chars() {
|
||||
if !Self::VALID_CHARS.contains(c) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
return Some(Self(str.into()));
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
pub fn into_string(self) -> String {
|
||||
self.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Label {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Label> for String {
|
||||
fn from(value: Label) -> Self {
|
||||
value.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<String> for Label {
|
||||
type Error = InvalidLabel;
|
||||
|
||||
fn try_from(value: String) -> Result<Self, Self::Error> {
|
||||
Self::new(value).ok_or(InvalidLabel)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for Label {
|
||||
type Error = InvalidLabel;
|
||||
|
||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||
Self::new(value).ok_or(InvalidLabel)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for Label {
|
||||
fn as_ref(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Label {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct InvalidLabel;
|
||||
|
||||
impl Display for InvalidLabel {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Invalid label: must only contain characters in {}",
|
||||
Label::VALID_CHARS
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for InvalidLabel {}
|
||||
@@ -11,10 +11,10 @@ workspace = true
|
||||
pile-config = { workspace = true }
|
||||
pile-audio = { workspace = true }
|
||||
|
||||
|
||||
serde_json = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
tantivy = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
jsonpath-rust = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use jsonpath_rust::JsonPath;
|
||||
use pile_config::{ConfigToml, DatasetFts};
|
||||
use pile_config::{ConfigToml, DatasetFts, Label};
|
||||
use serde_json::Value;
|
||||
use std::{path::PathBuf, sync::LazyLock};
|
||||
use tantivy::{
|
||||
@@ -106,7 +106,7 @@ impl DbFtsIndex {
|
||||
pub fn get_field(
|
||||
&self,
|
||||
json: &Value,
|
||||
field_name: &str,
|
||||
field_name: &Label,
|
||||
) -> Result<Option<String>, std::io::Error> {
|
||||
let field = match self.cfg.schema.get(field_name) {
|
||||
Some(x) => x,
|
||||
@@ -124,7 +124,7 @@ impl DbFtsIndex {
|
||||
warn!(
|
||||
message = "Path returned more than one value, this is not supported. Skipping.",
|
||||
?path,
|
||||
field = field_name
|
||||
field = field_name.to_string()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -139,7 +139,7 @@ impl DbFtsIndex {
|
||||
warn!(
|
||||
message = "Invalid path, skipping",
|
||||
?path,
|
||||
field = field_name,
|
||||
field = field_name.to_string(),
|
||||
?error
|
||||
);
|
||||
continue;
|
||||
@@ -150,7 +150,7 @@ impl DbFtsIndex {
|
||||
Value::Null => {
|
||||
trace!(
|
||||
message = "Skipping field, is null",
|
||||
field = field_name,
|
||||
field = field_name.to_string(),
|
||||
path,
|
||||
value = ?val
|
||||
);
|
||||
@@ -174,7 +174,7 @@ impl DbFtsIndex {
|
||||
} else if x.len() > 1 {
|
||||
debug!(
|
||||
message = "Skipping field, is array with more than one element",
|
||||
field = field_name,
|
||||
field = field_name.to_string(),
|
||||
path,
|
||||
value = ?val
|
||||
);
|
||||
@@ -182,7 +182,7 @@ impl DbFtsIndex {
|
||||
} else {
|
||||
debug!(
|
||||
message = "Skipping field, is empty array",
|
||||
field = field_name,
|
||||
field = field_name.to_string(),
|
||||
path,
|
||||
value = ?val
|
||||
);
|
||||
@@ -192,7 +192,7 @@ impl DbFtsIndex {
|
||||
Value::Null => {
|
||||
trace!(
|
||||
message = "Skipping field, is null",
|
||||
field = field_name,
|
||||
field = field_name.to_string(),
|
||||
path,
|
||||
value = ?val
|
||||
);
|
||||
@@ -201,7 +201,7 @@ impl DbFtsIndex {
|
||||
Value::Object(_) => {
|
||||
trace!(
|
||||
message = "Skipping field, is object",
|
||||
field = field_name,
|
||||
field = field_name.to_string(),
|
||||
path,
|
||||
value = ?val
|
||||
);
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
mod traits;
|
||||
pub use traits::*;
|
||||
|
||||
mod misc;
|
||||
pub use misc::*;
|
||||
|
||||
pub mod index;
|
||||
pub mod item;
|
||||
pub mod source;
|
||||
|
||||
61
crates/pile-dataset/src/misc.rs
Normal file
61
crates/pile-dataset/src/misc.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
/// Returns the age of a path as a chrono DateTime.
|
||||
/// - If the path doesn't exist, returns None
|
||||
/// - If it's a file, returns the modified time
|
||||
/// - If it's a directory, returns the LATEST modified time of all files within
|
||||
pub fn path_age(path: impl AsRef<Path>) -> Option<DateTime<Utc>> {
|
||||
let path = path.as_ref();
|
||||
|
||||
// Check if path exists
|
||||
if !path.exists() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let metadata = fs::metadata(path).ok()?;
|
||||
|
||||
if metadata.is_file() {
|
||||
// For files, return the modified time
|
||||
let modified = metadata.modified().ok()?;
|
||||
Some(modified.into())
|
||||
} else if metadata.is_dir() {
|
||||
// For directories, find the latest modified time of all files
|
||||
find_latest_modified(path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn find_latest_modified(dir: &Path) -> Option<DateTime<Utc>> {
|
||||
let mut latest: Option<DateTime<Utc>> = None;
|
||||
|
||||
// Read directory entries
|
||||
let entries = fs::read_dir(dir).ok()?;
|
||||
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
let metadata = entry.metadata().ok()?;
|
||||
|
||||
if metadata.is_file() {
|
||||
if let Ok(modified) = metadata.modified() {
|
||||
let dt: DateTime<Utc> = modified.into();
|
||||
latest = Some(match latest {
|
||||
Some(prev) if prev > dt => prev,
|
||||
_ => dt,
|
||||
});
|
||||
}
|
||||
} else if metadata.is_dir() {
|
||||
// Recursively check subdirectories
|
||||
if let Some(dir_latest) = find_latest_modified(&path) {
|
||||
latest = Some(match latest {
|
||||
Some(prev) if prev > dir_latest => prev,
|
||||
_ => dir_latest,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
latest
|
||||
}
|
||||
@@ -54,7 +54,7 @@ impl CliCmd for IndexCommand {
|
||||
.working_dir
|
||||
.clone()
|
||||
.unwrap_or(parent.join(".pile"))
|
||||
.join(&config.dataset.name);
|
||||
.join(&config.dataset.name.as_str());
|
||||
let fts_dir = working_dir.join("fts");
|
||||
|
||||
if fts_dir.is_dir() {
|
||||
@@ -70,7 +70,7 @@ impl CliCmd for IndexCommand {
|
||||
for (name, source) in &config.dataset.source {
|
||||
match source {
|
||||
Source::Flac { path: dir } => {
|
||||
let source = DirDataSource::new(name, dir.clone().to_vec());
|
||||
let source = DirDataSource::new(name.as_str(), dir.clone().to_vec());
|
||||
sources.push(source);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ impl CliCmd for LookupCommand {
|
||||
.working_dir
|
||||
.clone()
|
||||
.unwrap_or(parent.join(".pile"))
|
||||
.join(&config.dataset.name);
|
||||
.join(&config.dataset.name.as_str());
|
||||
let fts_dir = working_dir.join("fts");
|
||||
|
||||
if !fts_dir.is_dir() {
|
||||
|
||||
Reference in New Issue
Block a user