From 3d9f0bd990e9a2c06b32a2f49422dfd0a80d9bdd Mon Sep 17 00:00:00 2001 From: rm-dr <96270320+rm-dr@users.noreply.github.com> Date: Tue, 6 Jan 2026 23:05:33 -0800 Subject: [PATCH] Initial `pile-config` --- crates/pile-config/Cargo.toml | 16 ++++ crates/pile-config/src/config.toml | 15 ++++ crates/pile-config/src/lib.rs | 61 +++++++++++++++ crates/pile-config/src/post.rs | 121 +++++++++++++++++++++++++++++ 4 files changed, 213 insertions(+) create mode 100644 crates/pile-config/Cargo.toml create mode 100644 crates/pile-config/src/config.toml create mode 100644 crates/pile-config/src/lib.rs create mode 100644 crates/pile-config/src/post.rs diff --git a/crates/pile-config/Cargo.toml b/crates/pile-config/Cargo.toml new file mode 100644 index 0000000..9b637d1 --- /dev/null +++ b/crates/pile-config/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "pile-config" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } + +[lints] +workspace = true + +[dependencies] +serde = { workspace = true } +itertools = { workspace = true } +serde_json = { workspace = true } + +[dev-dependencies] +toml = { workspace = true } diff --git a/crates/pile-config/src/config.toml b/crates/pile-config/src/config.toml new file mode 100644 index 0000000..c53ef15 --- /dev/null +++ b/crates/pile-config/src/config.toml @@ -0,0 +1,15 @@ +[dataset] +name = "dataset" +list_dir = "./lists" + +[schema] +description = { type = "text", path = "$.metadata.description" } +title = { type = "text", path = "$.metadata.title" } +author = { type = "text", path = "$.metadata.author" } +language = { type = "text", path = "$.metadata.language" } +aacid = { type = "text", path = "$.aacid" } +zlibrary_id = { type = "text", path = "$.metadata.zlibrary_id" } + +[fts] +dir = "./fts" +field.description = { tokenize = true } diff --git a/crates/pile-config/src/lib.rs b/crates/pile-config/src/lib.rs new file mode 100644 index 0000000..e7c33f2 --- /dev/null +++ b/crates/pile-config/src/lib.rs @@ -0,0 +1,61 @@ +use serde::Deserialize; +use std::{collections::HashMap, path::PathBuf}; + +pub static INIT_DB_TOML: &str = include_str!("./config.toml"); + +mod post; +pub use post::*; + +#[test] +fn init_db_toml_valid() { + toml::from_str::(INIT_DB_TOML).unwrap(); +} + +#[derive(Debug, Clone, Deserialize)] +pub struct ConfigToml { + pub dataset: DatasetConfig, + pub schema: HashMap, + pub fts: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct DatasetConfig { + pub name: String, + pub list_dir: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct FieldSpec { + /// The type of this field + pub r#type: FieldType, + + /// How to find this field in a data entry + pub path: String, + + /// How to post-process this field + #[serde(default)] + pub post: Vec, +} + +#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum FieldType { + Text, +} + +// +// MARK: fts +// + +#[derive(Debug, Clone, Deserialize)] +pub struct DatasetFts { + pub dir: PathBuf, + + #[serde(alias = "field")] + pub fields: HashMap, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct FtsIndexField { + pub tokenize: bool, +} diff --git a/crates/pile-config/src/post.rs b/crates/pile-config/src/post.rs new file mode 100644 index 0000000..5b43116 --- /dev/null +++ b/crates/pile-config/src/post.rs @@ -0,0 +1,121 @@ +use itertools::Itertools; +use serde::Deserialize; +use serde_json::Value; + +#[derive(Debug, Clone, Deserialize, PartialEq, Eq)] +#[serde(untagged)] +pub enum FieldSpecPost { + TrimSuffix { trim_suffix: String }, + TrimPrefix { trim_prefix: String }, + SetCase { case: Case }, + Join { join: String }, + NotEmpty { notempty: bool }, +} + +impl FieldSpecPost { + pub fn apply(&self, val: &Value) -> Option { + Some(match self { + Self::NotEmpty { notempty: false } => val.clone(), + Self::NotEmpty { notempty: true } => match val { + Value::Null => return None, + Value::String(x) if x.is_empty() => return None, + Value::Array(x) if x.is_empty() => return None, + x => x.clone(), + }, + + Self::SetCase { case: Case::Lower } => match val { + Value::Null => return None, + Value::Bool(_) | Value::Number(_) => val.clone(), + Value::String(x) => Value::String(x.to_lowercase()), + + Value::Array(x) => { + Value::Array(x.iter().map(|x| self.apply(x)).collect::>()?) + } + + Value::Object(x) => Value::Object( + x.iter() + .map(|x| (x.0.to_lowercase(), self.apply(x.1))) + .map(|x| x.1.map(|y| (x.0, y))) + .collect::>()?, + ), + }, + + Self::SetCase { case: Case::Upper } => match val { + Value::Null => return None, + Value::Bool(_) | Value::Number(_) => val.clone(), + Value::String(x) => Value::String(x.to_uppercase()), + + Value::Array(x) => { + Value::Array(x.iter().map(|x| self.apply(x)).collect::>()?) + } + + Value::Object(x) => Value::Object( + x.iter() + .map(|x| (x.0.to_uppercase(), self.apply(x.1))) + .map(|x| x.1.map(|y| (x.0, y))) + .collect::>()?, + ), + }, + + Self::TrimSuffix { trim_suffix } => match val { + Value::Null => return None, + Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()), + + Value::String(x) => { + Value::String(x.strip_suffix(trim_suffix).unwrap_or(&x).to_owned()) + } + + Value::Array(x) => { + Value::Array(x.iter().map(|x| self.apply(x)).collect::>()?) + } + + Value::Object(x) => Value::Object( + x.iter() + .map(|x| { + ( + x.0.strip_suffix(trim_suffix).unwrap_or(&x.0).to_owned(), + self.apply(x.1), + ) + }) + .map(|x| x.1.map(|y| (x.0, y))) + .collect::>()?, + ), + }, + + Self::TrimPrefix { trim_prefix } => match val { + Value::Null => return None, + Value::Object(_) => return None, + Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()), + + Value::String(x) => { + Value::String(x.strip_prefix(trim_prefix).unwrap_or(&x).to_owned()) + } + + Value::Array(x) => { + Value::Array(x.iter().map(|x| self.apply(x)).collect::>()?) + } + }, + + Self::Join { join } => match val { + Value::Null => return None, + Value::Object(_) => return None, + Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()), + Value::String(x) => Value::String(x.clone()), + Value::Array(x) => Value::String( + x.iter() + .map(|x| self.apply(x)) + .collect::>>()? + .into_iter() + .join(join), + ), + }, + }) + } +} + +#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum Case { + Lower, + Upper, +}