Initial pile-config
This commit is contained in:
16
crates/pile-config/Cargo.toml
Normal file
16
crates/pile-config/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "pile-config"
|
||||
version = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
serde = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
toml = { workspace = true }
|
||||
46
crates/pile-config/src/config.toml
Normal file
46
crates/pile-config/src/config.toml
Normal file
@@ -0,0 +1,46 @@
|
||||
[dataset]
|
||||
# This dataset's name. Must be unique.
|
||||
name = "dataset"
|
||||
|
||||
# root directory for fts indices, relative to the parent dir of this config file.
|
||||
# Files are written to {working_dir}/{dataset.name}/*.
|
||||
# Default is ".pile"
|
||||
#
|
||||
# working_dir = ".pile"
|
||||
|
||||
# Data sources avaliable in this dataset
|
||||
source."music" = { type = "flac", path = ["music", "music-2"] }
|
||||
|
||||
|
||||
# This dataset's schema.
|
||||
# Defines normalized fields that are extracted from source entries on-demand.
|
||||
#
|
||||
# Format is as follows:
|
||||
#
|
||||
# "field-name" = {
|
||||
# # The type of data this field contains.
|
||||
# # only text is supportedin this verison.
|
||||
# type = "text",
|
||||
#
|
||||
# # An array of jsonpaths (rfc9535) used to extract this field from each source entry.
|
||||
# # These are evaluated in order, the first non-null value is used.
|
||||
# # A single string is equivalent to an array with one element.
|
||||
# path = "$.json.path"
|
||||
# }
|
||||
[schema]
|
||||
album = { type = "text", path = "$.Album" }
|
||||
isrc = { type = "text", path = "$.Isrc" }
|
||||
artist = { type = "text", path = ["$.Artist", "$.TrackArtist"] }
|
||||
lyrics = { type = "text", path = "$.Lyrics" }
|
||||
genre = { type = "text", path = "$.Genre" }
|
||||
title = { type = "text", path = ["$.Title", "$.TrackTitle"] }
|
||||
|
||||
# Fts configuration.
|
||||
# Determines which fields (defined in `schema`) are included in the fts index.
|
||||
[fts]
|
||||
field.album = { tokenize = true }
|
||||
field.isrc = { tokenize = true }
|
||||
field.artist = { tokenize = true }
|
||||
field.lyrics = { tokenize = true }
|
||||
field.genre = { tokenize = true }
|
||||
field.title = { tokenize = true }
|
||||
104
crates/pile-config/src/lib.rs
Normal file
104
crates/pile-config/src/lib.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
use serde::Deserialize;
|
||||
use std::{collections::HashMap, fmt::Debug, path::PathBuf, slice};
|
||||
|
||||
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
||||
|
||||
mod post;
|
||||
pub use post::*;
|
||||
|
||||
#[test]
|
||||
fn init_db_toml_valid() {
|
||||
toml::from_str::<ConfigToml>(INIT_DB_TOML).unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum OneOrMany<T: Debug + Clone> {
|
||||
One(T),
|
||||
Many(Vec<T>),
|
||||
}
|
||||
|
||||
impl<T: Debug + Clone> OneOrMany<T> {
|
||||
pub fn to_vec(self) -> Vec<T> {
|
||||
match self {
|
||||
Self::One(x) => vec![x],
|
||||
Self::Many(x) => x,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &[T] {
|
||||
match self {
|
||||
Self::One(x) => slice::from_ref(&x),
|
||||
Self::Many(x) => &x[..],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ConfigToml {
|
||||
pub dataset: DatasetConfig,
|
||||
pub schema: HashMap<String, FieldSpec>,
|
||||
pub fts: Option<DatasetFts>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct DatasetConfig {
|
||||
/// Must be unique
|
||||
pub name: String,
|
||||
|
||||
/// Root dir for indices
|
||||
pub working_dir: Option<PathBuf>,
|
||||
|
||||
/// Where to find this field
|
||||
pub source: HashMap<String, Source>,
|
||||
|
||||
/// How to post-process this field
|
||||
#[serde(default)]
|
||||
pub post: Vec<FieldSpecPost>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Source {
|
||||
/// A directory of FLAC files
|
||||
Flac { path: OneOrMany<PathBuf> },
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: schema
|
||||
//
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct FieldSpec {
|
||||
/// The type of this field
|
||||
pub r#type: FieldType,
|
||||
|
||||
/// How to find this field in a data entry
|
||||
pub path: OneOrMany<String>,
|
||||
|
||||
/// How to post-process this field
|
||||
#[serde(default)]
|
||||
pub post: Vec<FieldSpecPost>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum FieldType {
|
||||
Text,
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: fts
|
||||
//
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct DatasetFts {
|
||||
#[serde(alias = "field")]
|
||||
pub fields: HashMap<String, FtsIndexField>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct FtsIndexField {
|
||||
pub tokenize: bool,
|
||||
}
|
||||
121
crates/pile-config/src/post.rs
Normal file
121
crates/pile-config/src/post.rs
Normal file
@@ -0,0 +1,121 @@
|
||||
use itertools::Itertools;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
|
||||
#[serde(untagged)]
|
||||
pub enum FieldSpecPost {
|
||||
TrimSuffix { trim_suffix: String },
|
||||
TrimPrefix { trim_prefix: String },
|
||||
SetCase { case: Case },
|
||||
Join { join: String },
|
||||
NotEmpty { notempty: bool },
|
||||
}
|
||||
|
||||
impl FieldSpecPost {
|
||||
pub fn apply(&self, val: &Value) -> Option<Value> {
|
||||
Some(match self {
|
||||
Self::NotEmpty { notempty: false } => val.clone(),
|
||||
Self::NotEmpty { notempty: true } => match val {
|
||||
Value::Null => return None,
|
||||
Value::String(x) if x.is_empty() => return None,
|
||||
Value::Array(x) if x.is_empty() => return None,
|
||||
x => x.clone(),
|
||||
},
|
||||
|
||||
Self::SetCase { case: Case::Lower } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Bool(_) | Value::Number(_) => val.clone(),
|
||||
Value::String(x) => Value::String(x.to_lowercase()),
|
||||
|
||||
Value::Array(x) => {
|
||||
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
|
||||
}
|
||||
|
||||
Value::Object(x) => Value::Object(
|
||||
x.iter()
|
||||
.map(|x| (x.0.to_lowercase(), self.apply(x.1)))
|
||||
.map(|x| x.1.map(|y| (x.0, y)))
|
||||
.collect::<Option<_>>()?,
|
||||
),
|
||||
},
|
||||
|
||||
Self::SetCase { case: Case::Upper } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Bool(_) | Value::Number(_) => val.clone(),
|
||||
Value::String(x) => Value::String(x.to_uppercase()),
|
||||
|
||||
Value::Array(x) => {
|
||||
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
|
||||
}
|
||||
|
||||
Value::Object(x) => Value::Object(
|
||||
x.iter()
|
||||
.map(|x| (x.0.to_uppercase(), self.apply(x.1)))
|
||||
.map(|x| x.1.map(|y| (x.0, y)))
|
||||
.collect::<Option<_>>()?,
|
||||
),
|
||||
},
|
||||
|
||||
Self::TrimSuffix { trim_suffix } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
||||
|
||||
Value::String(x) => {
|
||||
Value::String(x.strip_suffix(trim_suffix).unwrap_or(&x).to_owned())
|
||||
}
|
||||
|
||||
Value::Array(x) => {
|
||||
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
|
||||
}
|
||||
|
||||
Value::Object(x) => Value::Object(
|
||||
x.iter()
|
||||
.map(|x| {
|
||||
(
|
||||
x.0.strip_suffix(trim_suffix).unwrap_or(&x.0).to_owned(),
|
||||
self.apply(x.1),
|
||||
)
|
||||
})
|
||||
.map(|x| x.1.map(|y| (x.0, y)))
|
||||
.collect::<Option<_>>()?,
|
||||
),
|
||||
},
|
||||
|
||||
Self::TrimPrefix { trim_prefix } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Object(_) => return None,
|
||||
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
||||
|
||||
Value::String(x) => {
|
||||
Value::String(x.strip_prefix(trim_prefix).unwrap_or(&x).to_owned())
|
||||
}
|
||||
|
||||
Value::Array(x) => {
|
||||
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
|
||||
}
|
||||
},
|
||||
|
||||
Self::Join { join } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Object(_) => return None,
|
||||
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
||||
Value::String(x) => Value::String(x.clone()),
|
||||
Value::Array(x) => Value::String(
|
||||
x.iter()
|
||||
.map(|x| self.apply(x))
|
||||
.collect::<Option<Vec<_>>>()?
|
||||
.into_iter()
|
||||
.join(join),
|
||||
),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Case {
|
||||
Lower,
|
||||
Upper,
|
||||
}
|
||||
Reference in New Issue
Block a user