Add ObjectPath query language
This commit is contained in:
@@ -10,6 +10,7 @@ workspace = true
|
||||
[dependencies]
|
||||
serde = { workspace = true }
|
||||
smartstring = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
toml = { workspace = true }
|
||||
|
||||
@@ -7,10 +7,14 @@ pub use post::*;
|
||||
mod misc;
|
||||
pub use misc::*;
|
||||
|
||||
use crate::objectpath::ObjectPath;
|
||||
|
||||
pub mod objectpath;
|
||||
|
||||
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
||||
|
||||
#[test]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
#[expect(clippy::expect_used)]
|
||||
fn init_db_toml_valid() {
|
||||
toml::from_str::<ConfigToml>(INIT_DB_TOML).expect("INIT_DB_TOML should be valid TOML");
|
||||
}
|
||||
@@ -56,7 +60,7 @@ pub struct FieldSpec {
|
||||
pub r#type: FieldType,
|
||||
|
||||
/// How to find this field in a data entry
|
||||
pub path: OneOrMany<String>,
|
||||
pub path: OneOrMany<ObjectPath>,
|
||||
|
||||
/// How to post-process this field
|
||||
#[serde(default)]
|
||||
|
||||
95
crates/pile-config/src/objectpath/mod.rs
Normal file
95
crates/pile-config/src/objectpath/mod.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use std::{fmt, str::FromStr};
|
||||
|
||||
use serde::{
|
||||
Deserialize, Deserializer,
|
||||
de::{self, Visitor},
|
||||
};
|
||||
use smartstring::{LazyCompact, SmartString};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::Label;
|
||||
|
||||
mod parser;
|
||||
mod tokenizer;
|
||||
|
||||
#[derive(Debug, Error, PartialEq)]
|
||||
pub enum PathParseError {
|
||||
#[error("invalid syntax at index {position}")]
|
||||
Syntax { position: usize },
|
||||
|
||||
#[error("path string must start with $")]
|
||||
MustStartWithRoot { position: usize },
|
||||
|
||||
#[error("invalid field {str:?} at {position}")]
|
||||
InvalidField {
|
||||
position: usize,
|
||||
str: SmartString<LazyCompact>,
|
||||
},
|
||||
|
||||
#[error("invalid index {str:?} at {position}")]
|
||||
InvalidIndexString {
|
||||
position: usize,
|
||||
str: SmartString<LazyCompact>,
|
||||
},
|
||||
|
||||
#[error("non-ascii character {char:?} at index {position}")]
|
||||
NonAsciiChar { position: usize, char: char },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PathSegment {
|
||||
/// Go to root node (`$` identifier)
|
||||
Root,
|
||||
|
||||
/// Go to a child of the current object
|
||||
Field(Label),
|
||||
|
||||
/// Go to an element of the current list
|
||||
Index(i64),
|
||||
}
|
||||
|
||||
/// A path to aPathSegment::Field inside a nested object,
|
||||
/// This is a subset of the rfc9535 jsonpath.
|
||||
///
|
||||
/// Format:
|
||||
/// - `$` refers to the root object
|
||||
/// - `.<name>` selects aPathSegment::Field of an object
|
||||
/// - `[n]` selects an item of an array
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ObjectPath {
|
||||
pub segments: Vec<PathSegment>,
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for ObjectPath {
|
||||
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
|
||||
struct PathVisitor;
|
||||
|
||||
impl Visitor<'_> for PathVisitor {
|
||||
type Value = ObjectPath;
|
||||
|
||||
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str("an objectpath")
|
||||
}
|
||||
|
||||
fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
|
||||
v.parse().map_err(de::Error::custom)
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_str(PathVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for ObjectPath {
|
||||
type Err = PathParseError;
|
||||
|
||||
fn from_str(source: &str) -> Result<Self, Self::Err> {
|
||||
let tk = tokenizer::Tokenizer::new();
|
||||
let tk = tk.tokenize(source)?;
|
||||
|
||||
let ps = parser::Parser::new();
|
||||
let segments = ps.parse(source, &tk)?;
|
||||
|
||||
return Ok(Self { segments });
|
||||
}
|
||||
}
|
||||
248
crates/pile-config/src/objectpath/parser.rs
Normal file
248
crates/pile-config/src/objectpath/parser.rs
Normal file
@@ -0,0 +1,248 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::{
|
||||
Label,
|
||||
objectpath::{PathParseError, PathSegment, tokenizer::Token},
|
||||
};
|
||||
|
||||
enum State {
|
||||
Start,
|
||||
|
||||
/// We are holding a pointer to an object
|
||||
Selected,
|
||||
|
||||
/// We are waiting for an identifier
|
||||
Dot,
|
||||
|
||||
/// We are indexing an array, waiting for a number
|
||||
Index,
|
||||
|
||||
/// We are indexing an array, waiting for a close-bracket
|
||||
IndexClose,
|
||||
}
|
||||
|
||||
pub struct Parser {
|
||||
state: State,
|
||||
segments: Vec<PathSegment>,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Parser {
|
||||
state: State::Start,
|
||||
segments: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(
|
||||
mut self,
|
||||
source: &str,
|
||||
tokens: &[(usize, Token<'_>)],
|
||||
) -> Result<Vec<PathSegment>, PathParseError> {
|
||||
for t in tokens {
|
||||
match (self.state, t) {
|
||||
(State::Start, (_, Token::Root)) => {
|
||||
self.segments.push(PathSegment::Root);
|
||||
self.state = State::Selected
|
||||
}
|
||||
|
||||
(State::Start, (p, Token::Ident(_))) => {
|
||||
return Err(PathParseError::MustStartWithRoot { position: *p });
|
||||
}
|
||||
|
||||
(State::Start, (p, Token::Dot))
|
||||
| (State::Start, (p, Token::SqbOpen))
|
||||
| (State::Start, (p, Token::SqbClose)) => {
|
||||
return Err(PathParseError::Syntax { position: *p });
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: selected
|
||||
//
|
||||
(State::Selected, (_, Token::Dot)) => self.state = State::Dot,
|
||||
(State::Selected, (_, Token::SqbOpen)) => self.state = State::Index,
|
||||
|
||||
(State::Selected, (p, Token::Root))
|
||||
| (State::Selected, (p, Token::Ident(_)))
|
||||
| (State::Selected, (p, Token::SqbClose)) => {
|
||||
return Err(PathParseError::Syntax { position: *p });
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: dot
|
||||
//
|
||||
(State::Dot, (p, Token::Ident(ident))) => {
|
||||
self.segments
|
||||
.push(PathSegment::Field(Label::new(*ident).ok_or_else(|| {
|
||||
PathParseError::InvalidField {
|
||||
position: *p,
|
||||
str: (*ident).into(),
|
||||
}
|
||||
})?));
|
||||
|
||||
self.state = State::Selected;
|
||||
}
|
||||
|
||||
(State::Dot, (p, Token::Root))
|
||||
| (State::Dot, (p, Token::Dot))
|
||||
| (State::Dot, (p, Token::SqbOpen))
|
||||
| (State::Dot, (p, Token::SqbClose)) => {
|
||||
return Err(PathParseError::Syntax { position: *p });
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: index
|
||||
//
|
||||
(State::Index, (p, Token::Ident(ident))) => {
|
||||
let idx: i64 = i64::from_str(ident).map_err(|_err| {
|
||||
PathParseError::InvalidIndexString {
|
||||
position: *p,
|
||||
str: (*ident).into(),
|
||||
}
|
||||
})?;
|
||||
|
||||
self.segments.push(PathSegment::Index(idx));
|
||||
self.state = State::IndexClose;
|
||||
}
|
||||
|
||||
(State::Index, (p, Token::Root))
|
||||
| (State::Index, (p, Token::Dot))
|
||||
| (State::Index, (p, Token::SqbOpen))
|
||||
| (State::Index, (p, Token::SqbClose)) => {
|
||||
return Err(PathParseError::Syntax { position: *p });
|
||||
}
|
||||
|
||||
(State::IndexClose, (_, Token::SqbClose)) => self.state = State::Selected,
|
||||
(State::IndexClose, (p, _)) => {
|
||||
return Err(PathParseError::Syntax { position: *p });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let position = source.len();
|
||||
match self.state {
|
||||
State::Start => Err(PathParseError::Syntax { position: 0 }),
|
||||
State::Dot => Err(PathParseError::Syntax { position }),
|
||||
State::Index => Err(PathParseError::Syntax { position }),
|
||||
State::IndexClose => Err(PathParseError::Syntax { position }),
|
||||
State::Selected => Ok(()),
|
||||
}?;
|
||||
|
||||
return Ok(self.segments);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: tests
|
||||
//
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::objectpath::tokenizer::Tokenizer;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn parse_test(source: &str, expected: Result<&[PathSegment], PathParseError>) {
|
||||
let parsed = Tokenizer::new()
|
||||
.tokenize(source)
|
||||
.and_then(|tokens| Parser::new().parse(source, &tokens[..]));
|
||||
|
||||
match (parsed, expected) {
|
||||
(Ok(segments), Ok(segs)) => assert_eq!(segments, segs),
|
||||
(Err(e), Err(expected_err)) => assert_eq!(e, expected_err),
|
||||
(Ok(segments), Err(e)) => panic!("expected error {e}, got {:?}", segments),
|
||||
(Err(e), Ok(segs)) => panic!("expected {:?}, got error {e}", segs),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn root_only() {
|
||||
parse_test("$", Ok(&[PathSegment::Root]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_field() {
|
||||
parse_test(
|
||||
"$.foo",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("foo").unwrap()),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nested_fields() {
|
||||
parse_test(
|
||||
"$.foo.bar.baz",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("foo").unwrap()),
|
||||
PathSegment::Field(Label::new("bar").unwrap()),
|
||||
PathSegment::Field(Label::new("baz").unwrap()),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn array_index() {
|
||||
parse_test(
|
||||
"$.items[0]",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("items").unwrap()),
|
||||
PathSegment::Index(0),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chained_indices() {
|
||||
parse_test(
|
||||
"$.a[1][2]",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("a").unwrap()),
|
||||
PathSegment::Index(1),
|
||||
PathSegment::Index(2),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_after_index() {
|
||||
parse_test(
|
||||
"$.a[0].b",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("a").unwrap()),
|
||||
PathSegment::Index(0),
|
||||
PathSegment::Field(Label::new("b").unwrap()),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_index() {
|
||||
parse_test(
|
||||
"$.a[-1]",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("a").unwrap()),
|
||||
PathSegment::Index(-1),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_ascii_error() {
|
||||
parse_test(
|
||||
"$.fé",
|
||||
Err(PathParseError::NonAsciiChar {
|
||||
position: 3,
|
||||
char: 'é',
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
241
crates/pile-config/src/objectpath/tokenizer.rs
Normal file
241
crates/pile-config/src/objectpath/tokenizer.rs
Normal file
@@ -0,0 +1,241 @@
|
||||
use crate::objectpath::PathParseError;
|
||||
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[derive(Debug)]
|
||||
pub enum Token<'a> {
|
||||
Root,
|
||||
Ident(&'a str),
|
||||
Dot,
|
||||
SqbOpen,
|
||||
SqbClose,
|
||||
}
|
||||
|
||||
pub struct Tokenizer {}
|
||||
|
||||
impl Tokenizer {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
|
||||
pub fn tokenize(self, source: &str) -> Result<Vec<(usize, Token<'_>)>, PathParseError> {
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
let mut window_start = None;
|
||||
for (i, c) in source.char_indices() {
|
||||
match c {
|
||||
'$' => {
|
||||
if let Some(s) = window_start.take() {
|
||||
tokens.push((s, Token::Ident(&source[s..i])));
|
||||
}
|
||||
tokens.push((i, Token::Root));
|
||||
}
|
||||
|
||||
'.' => {
|
||||
if let Some(s) = window_start.take() {
|
||||
tokens.push((s, Token::Ident(&source[s..i])));
|
||||
}
|
||||
tokens.push((i, Token::Dot));
|
||||
}
|
||||
|
||||
'[' => {
|
||||
if let Some(s) = window_start.take() {
|
||||
tokens.push((s, Token::Ident(&source[s..i])));
|
||||
}
|
||||
tokens.push((i, Token::SqbOpen));
|
||||
}
|
||||
|
||||
']' => {
|
||||
if let Some(s) = window_start.take() {
|
||||
tokens.push((s, Token::Ident(&source[s..i])));
|
||||
}
|
||||
tokens.push((i, Token::SqbClose));
|
||||
}
|
||||
|
||||
x if x.is_ascii() => match window_start {
|
||||
None => window_start = Some(i),
|
||||
Some(_) => continue,
|
||||
},
|
||||
|
||||
char => return Err(PathParseError::NonAsciiChar { position: i, char }),
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(s) = window_start.take() {
|
||||
tokens.push((s, Token::Ident(&source[s..])));
|
||||
}
|
||||
|
||||
return Ok(tokens);
|
||||
}
|
||||
}
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn tokenize(source: &str) -> Result<Vec<(usize, Token<'_>)>, PathParseError> {
|
||||
Tokenizer::new().tokenize(source)
|
||||
}
|
||||
|
||||
fn tok_ok(source: &str) -> Vec<(usize, Token<'_>)> {
|
||||
tokenize(source).expect("expected tokenization to succeed")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
assert_eq!(tok_ok(""), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn root_only() {
|
||||
assert_eq!(tok_ok("$"), vec![(0, Token::Root)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_only() {
|
||||
assert_eq!(tok_ok("."), vec![(0, Token::Dot)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sqb_open_only() {
|
||||
assert_eq!(tok_ok("["), vec![(0, Token::SqbOpen)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sqb_close_only() {
|
||||
assert_eq!(tok_ok("]"), vec![(0, Token::SqbClose)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident_only() {
|
||||
assert_eq!(tok_ok("foo"), vec![(0, Token::Ident("foo"))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident_with_digits() {
|
||||
assert_eq!(tok_ok("abc123"), vec![(0, Token::Ident("abc123"))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn root_dot_ident() {
|
||||
assert_eq!(
|
||||
tok_ok("$.foo"),
|
||||
vec![(0, Token::Root), (1, Token::Dot), (2, Token::Ident("foo"))]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident_flushed_before_delimiter() {
|
||||
assert_eq!(
|
||||
tok_ok("foo.bar"),
|
||||
vec![
|
||||
(0, Token::Ident("foo")),
|
||||
(3, Token::Dot),
|
||||
(4, Token::Ident("bar"))
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn root_after_ident_flushes() {
|
||||
// ident window should flush before Root token
|
||||
assert_eq!(
|
||||
tok_ok("foo$"),
|
||||
vec![(0, Token::Ident("foo")), (3, Token::Root)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_path() {
|
||||
assert_eq!(
|
||||
tok_ok("$.foo[0]"),
|
||||
vec![
|
||||
(0, Token::Root),
|
||||
(1, Token::Dot),
|
||||
(2, Token::Ident("foo")),
|
||||
(5, Token::SqbOpen),
|
||||
(6, Token::Ident("0")),
|
||||
(7, Token::SqbClose),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn complex_nested() {
|
||||
assert_eq!(
|
||||
tok_ok("$.a[1].b"),
|
||||
vec![
|
||||
(0, Token::Root),
|
||||
(1, Token::Dot),
|
||||
(2, Token::Ident("a")),
|
||||
(3, Token::SqbOpen),
|
||||
(4, Token::Ident("1")),
|
||||
(5, Token::SqbClose),
|
||||
(6, Token::Dot),
|
||||
(7, Token::Ident("b")),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_number_ident() {
|
||||
// '-' is ASCII, so "-1" is a single ident
|
||||
assert_eq!(
|
||||
tok_ok("[-1]"),
|
||||
vec![
|
||||
(0, Token::SqbOpen),
|
||||
(1, Token::Ident("-1")),
|
||||
(3, Token::SqbClose)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn root_immediately_followed_by_ident() {
|
||||
// "$foo" with no dot — produces Root then Ident
|
||||
assert_eq!(
|
||||
tok_ok("$foo"),
|
||||
vec![(0, Token::Root), (1, Token::Ident("foo"))]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consecutive_delimiters() {
|
||||
assert_eq!(tok_ok(".."), vec![(0, Token::Dot), (1, Token::Dot)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_ascii_error() {
|
||||
assert_eq!(
|
||||
tokenize("$.fé"),
|
||||
Err(PathParseError::NonAsciiChar {
|
||||
position: 3,
|
||||
char: 'é'
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_ascii_at_start() {
|
||||
assert_eq!(
|
||||
tokenize("é"),
|
||||
Err(PathParseError::NonAsciiChar {
|
||||
position: 0,
|
||||
char: 'é'
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_ascii_flushes_pending_ident_not_reached() {
|
||||
// "ab é" — the ident "ab" is not yet flushed when error occurs,
|
||||
// but we still get an error for the non-ascii char
|
||||
assert_eq!(
|
||||
tokenize("abé"),
|
||||
Err(PathParseError::NonAsciiChar {
|
||||
position: 2,
|
||||
char: 'é'
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user