Initial pile-config
This commit is contained in:
16
crates/pile-config/Cargo.toml
Normal file
16
crates/pile-config/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "pile-config"
|
||||
version = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
serde = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
toml = { workspace = true }
|
||||
15
crates/pile-config/src/config.toml
Normal file
15
crates/pile-config/src/config.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
[dataset]
|
||||
name = "dataset"
|
||||
list_dir = "./lists"
|
||||
|
||||
[schema]
|
||||
description = { type = "text", path = "$.metadata.description" }
|
||||
title = { type = "text", path = "$.metadata.title" }
|
||||
author = { type = "text", path = "$.metadata.author" }
|
||||
language = { type = "text", path = "$.metadata.language" }
|
||||
aacid = { type = "text", path = "$.aacid" }
|
||||
zlibrary_id = { type = "text", path = "$.metadata.zlibrary_id" }
|
||||
|
||||
[fts]
|
||||
dir = "./fts"
|
||||
field.description = { tokenize = true }
|
||||
61
crates/pile-config/src/lib.rs
Normal file
61
crates/pile-config/src/lib.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use serde::Deserialize;
|
||||
use std::{collections::HashMap, path::PathBuf};
|
||||
|
||||
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
||||
|
||||
mod post;
|
||||
pub use post::*;
|
||||
|
||||
#[test]
|
||||
fn init_db_toml_valid() {
|
||||
toml::from_str::<ConfigToml>(INIT_DB_TOML).unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ConfigToml {
|
||||
pub dataset: DatasetConfig,
|
||||
pub schema: HashMap<String, FieldSpec>,
|
||||
pub fts: Option<DatasetFts>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct DatasetConfig {
|
||||
pub name: String,
|
||||
pub list_dir: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct FieldSpec {
|
||||
/// The type of this field
|
||||
pub r#type: FieldType,
|
||||
|
||||
/// How to find this field in a data entry
|
||||
pub path: String,
|
||||
|
||||
/// How to post-process this field
|
||||
#[serde(default)]
|
||||
pub post: Vec<FieldSpecPost>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum FieldType {
|
||||
Text,
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: fts
|
||||
//
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct DatasetFts {
|
||||
pub dir: PathBuf,
|
||||
|
||||
#[serde(alias = "field")]
|
||||
pub fields: HashMap<String, FtsIndexField>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct FtsIndexField {
|
||||
pub tokenize: bool,
|
||||
}
|
||||
121
crates/pile-config/src/post.rs
Normal file
121
crates/pile-config/src/post.rs
Normal file
@@ -0,0 +1,121 @@
|
||||
use itertools::Itertools;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
|
||||
#[serde(untagged)]
|
||||
pub enum FieldSpecPost {
|
||||
TrimSuffix { trim_suffix: String },
|
||||
TrimPrefix { trim_prefix: String },
|
||||
SetCase { case: Case },
|
||||
Join { join: String },
|
||||
NotEmpty { notempty: bool },
|
||||
}
|
||||
|
||||
impl FieldSpecPost {
|
||||
pub fn apply(&self, val: &Value) -> Option<Value> {
|
||||
Some(match self {
|
||||
Self::NotEmpty { notempty: false } => val.clone(),
|
||||
Self::NotEmpty { notempty: true } => match val {
|
||||
Value::Null => return None,
|
||||
Value::String(x) if x.is_empty() => return None,
|
||||
Value::Array(x) if x.is_empty() => return None,
|
||||
x => x.clone(),
|
||||
},
|
||||
|
||||
Self::SetCase { case: Case::Lower } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Bool(_) | Value::Number(_) => val.clone(),
|
||||
Value::String(x) => Value::String(x.to_lowercase()),
|
||||
|
||||
Value::Array(x) => {
|
||||
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
|
||||
}
|
||||
|
||||
Value::Object(x) => Value::Object(
|
||||
x.iter()
|
||||
.map(|x| (x.0.to_lowercase(), self.apply(x.1)))
|
||||
.map(|x| x.1.map(|y| (x.0, y)))
|
||||
.collect::<Option<_>>()?,
|
||||
),
|
||||
},
|
||||
|
||||
Self::SetCase { case: Case::Upper } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Bool(_) | Value::Number(_) => val.clone(),
|
||||
Value::String(x) => Value::String(x.to_uppercase()),
|
||||
|
||||
Value::Array(x) => {
|
||||
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
|
||||
}
|
||||
|
||||
Value::Object(x) => Value::Object(
|
||||
x.iter()
|
||||
.map(|x| (x.0.to_uppercase(), self.apply(x.1)))
|
||||
.map(|x| x.1.map(|y| (x.0, y)))
|
||||
.collect::<Option<_>>()?,
|
||||
),
|
||||
},
|
||||
|
||||
Self::TrimSuffix { trim_suffix } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
||||
|
||||
Value::String(x) => {
|
||||
Value::String(x.strip_suffix(trim_suffix).unwrap_or(&x).to_owned())
|
||||
}
|
||||
|
||||
Value::Array(x) => {
|
||||
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
|
||||
}
|
||||
|
||||
Value::Object(x) => Value::Object(
|
||||
x.iter()
|
||||
.map(|x| {
|
||||
(
|
||||
x.0.strip_suffix(trim_suffix).unwrap_or(&x.0).to_owned(),
|
||||
self.apply(x.1),
|
||||
)
|
||||
})
|
||||
.map(|x| x.1.map(|y| (x.0, y)))
|
||||
.collect::<Option<_>>()?,
|
||||
),
|
||||
},
|
||||
|
||||
Self::TrimPrefix { trim_prefix } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Object(_) => return None,
|
||||
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
||||
|
||||
Value::String(x) => {
|
||||
Value::String(x.strip_prefix(trim_prefix).unwrap_or(&x).to_owned())
|
||||
}
|
||||
|
||||
Value::Array(x) => {
|
||||
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
|
||||
}
|
||||
},
|
||||
|
||||
Self::Join { join } => match val {
|
||||
Value::Null => return None,
|
||||
Value::Object(_) => return None,
|
||||
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
||||
Value::String(x) => Value::String(x.clone()),
|
||||
Value::Array(x) => Value::String(
|
||||
x.iter()
|
||||
.map(|x| self.apply(x))
|
||||
.collect::<Option<Vec<_>>>()?
|
||||
.into_iter()
|
||||
.join(join),
|
||||
),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Case {
|
||||
Lower,
|
||||
Upper,
|
||||
}
|
||||
Reference in New Issue
Block a user