Refactor sidecars

This commit is contained in:
2026-03-16 22:24:30 -07:00
parent f2f5726d7b
commit 053459f340
25 changed files with 674 additions and 530 deletions

View File

@@ -1,12 +1,13 @@
use serde::Deserialize;
use std::{collections::HashMap, fmt::Debug, path::PathBuf};
use crate::{objectpath::ObjectPath, pattern::GroupPattern};
mod misc;
pub use misc::*;
use crate::objectpath::ObjectPath;
pub mod objectpath;
pub mod pattern;
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
@@ -59,13 +60,9 @@ pub enum Source {
/// Must be relative.
path: PathBuf,
/// If true, all toml files are ignored.
/// Metadata can be added to any file using a {filename}.toml.
///
/// If false, toml files are treated as regular files
/// and sidecar metadata is disabled.
#[serde(default = "default_true")]
sidecars: bool,
/// How to group files into items in this source
#[serde(default)]
pattern: GroupPattern,
},
/// An S3-compatible object store bucket
@@ -84,9 +81,9 @@ pub enum Source {
credentials: S3Credentials,
/// If true, all .toml objects are treated as sidecar metadata files.
#[serde(default = "default_true")]
sidecars: bool,
/// How to group files into items in this source
#[serde(default)]
pattern: GroupPattern,
},
}

View File

@@ -58,7 +58,7 @@ pub enum PathSegment {
/// - `$` refers to the root object
/// - `.<name>` selects aPathSegment::Field of an object
/// - `[n]` selects an item of an array
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ObjectPath {
pub segments: Vec<PathSegment>,
}

View File

@@ -0,0 +1,49 @@
use std::collections::HashMap;
use serde::{Deserialize, Deserializer, de};
use smartstring::{LazyCompact, SmartString};
use thiserror::Error;
use crate::{Label, objectpath::PathParseError as ObjectPathError};
mod parser;
pub use parser::GroupSegment;
#[derive(Debug, Error, PartialEq)]
pub enum GroupPatternParseError {
/// A `{` or `}` appeared in an invalid position, or a `{` was never closed.
#[error("syntax error at index {position}")]
Syntax { position: usize },
/// The contents of a `{...}` block could not be parsed as an object path.
#[error("invalid object path {path:?}: {source}")]
InvalidObjectPath {
start: usize,
end: usize,
path: SmartString<LazyCompact>,
source: ObjectPathError,
},
}
#[derive(Debug, Clone, Default)]
pub struct GroupPattern {
pub pattern: HashMap<Label, Vec<GroupSegment>>,
}
impl<'de> Deserialize<'de> for GroupPattern {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let raw = HashMap::<String, String>::deserialize(deserializer)?;
let mut parts = HashMap::with_capacity(raw.len());
for (key, value) in raw {
let label = Label::try_from(key.as_str()).map_err(de::Error::custom)?;
let segments = parser::Parser::new()
.parse(&value)
.map_err(de::Error::custom)?
.into_iter()
.map(|(_, seg)| seg)
.collect();
parts.insert(label, segments);
}
Ok(GroupPattern { pattern: parts })
}
}

View File

@@ -0,0 +1,195 @@
use smartstring::{LazyCompact, SmartString};
use crate::{objectpath::ObjectPath, pattern::GroupPatternParseError};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug, Clone)]
pub enum GroupSegment {
Path(ObjectPath),
Literal(SmartString<LazyCompact>),
}
pub struct Parser {}
impl Parser {
pub fn new() -> Self {
Self {}
}
/// Parse a pattern string of the form `{path}.literal{path}...`.
///
/// - `{...}` delimiters are parsed as [`ObjectPath`] expressions.
/// Nested `{}` inside a path are allowed; depth is tracked to find the
/// matching closing brace.
/// - Everything outside `{...}` is a `Literal` segment.
/// - A bare `}` in literal position (depth == 0) is a syntax error.
/// - An unclosed `{` is a syntax error.
pub fn parse(self, source: &str) -> Result<Vec<(usize, GroupSegment)>, GroupPatternParseError> {
let mut tokens = Vec::new();
// `depth` > 0 means we are currently inside a `{...}` path expression.
let mut depth: usize = 0;
// Start of the current segment (literal text or path content).
let mut window_start: usize = 0;
// Source position of the opening `{` for the current path (used for error reporting).
let mut open_brace: usize = 0;
for (i, c) in source.char_indices() {
match c {
'{' => {
if depth == 0 {
// Emit any accumulated literal.
if i > window_start {
tokens.push((
window_start,
GroupSegment::Literal(source[window_start..i].into()),
));
}
open_brace = i;
// Path content starts after the opening brace.
window_start = i + 1;
depth = 1;
} else {
// Nested brace inside a path — keep counting.
depth += 1;
}
}
'}' => {
if depth == 0 {
// Unmatched `}` outside any path.
return Err(GroupPatternParseError::Syntax { position: i });
}
depth -= 1;
if depth == 0 {
// Closing brace of the outermost path expression — parse as ObjectPath.
let path_str = &source[window_start..i];
let path = path_str.parse::<ObjectPath>().map_err(|e| {
GroupPatternParseError::InvalidObjectPath {
start: open_brace,
end: i + 1,
path: path_str.into(),
source: e,
}
})?;
tokens.push((open_brace, GroupSegment::Path(path)));
// Literal content (if any) starts after this `}`.
window_start = i + 1;
}
}
_ => {}
}
}
// Unclosed `{`.
if depth > 0 {
return Err(GroupPatternParseError::Syntax {
position: open_brace,
});
}
// Emit any trailing literal.
if window_start < source.len() {
tokens.push((
window_start,
GroupSegment::Literal(source[window_start..].into()),
));
}
Ok(tokens)
}
}
//
// MARK: tests
//
#[expect(clippy::unwrap_used)]
#[cfg(test)]
mod tests {
use super::*;
fn parse(source: &str) -> Result<Vec<(usize, GroupSegment)>, GroupPatternParseError> {
Parser::new().parse(source)
}
fn path(s: &str) -> GroupSegment {
GroupSegment::Path(s.parse().unwrap())
}
fn lit(s: &str) -> GroupSegment {
GroupSegment::Literal(s.into())
}
#[test]
fn single_path() {
assert_eq!(parse("{$.foo}").unwrap(), vec![(0, path("$.foo"))]);
}
#[test]
fn single_literal() {
assert_eq!(parse("hello").unwrap(), vec![(0, lit("hello"))]);
}
#[test]
fn path_then_literal() {
assert_eq!(
parse("{$.foo}.txt").unwrap(),
vec![(0, path("$.foo")), (7, lit(".txt"))]
);
}
#[test]
fn literal_then_path() {
assert_eq!(
parse("prefix/{$.foo}").unwrap(),
vec![(0, lit("prefix/")), (7, path("$.foo"))]
);
}
#[test]
fn interleaved() {
assert_eq!(
parse("{$.a}.sep.{$.b}").unwrap(),
vec![(0, path("$.a")), (5, lit(".sep.")), (10, path("$.b")),]
);
}
#[test]
fn unmatched_open_brace_error() {
assert_eq!(
parse("{$.foo"),
Err(GroupPatternParseError::Syntax { position: 0 })
);
}
#[test]
fn unmatched_close_brace_in_literal_error() {
assert_eq!(
parse("foo}bar"),
Err(GroupPatternParseError::Syntax { position: 3 })
);
}
#[test]
fn invalid_path_error() {
assert_eq!(
parse("{not-a-path}"),
Err(GroupPatternParseError::InvalidObjectPath {
start: 0,
end: 12,
path: "not-a-path".into(),
source: crate::objectpath::PathParseError::MustStartWithRoot { position: 0 },
})
);
}
#[test]
fn literal_between_paths() {
assert_eq!(
parse("foo{$.x}bar").unwrap(),
vec![(0, lit("foo")), (3, path("$.x")), (8, lit("bar")),]
);
}
}