Add ObjectPath query language
Some checks failed
CI / Typos (push) Successful in 19s
CI / Build and test (push) Failing after 40s
CI / Clippy (push) Failing after 53s

This commit is contained in:
2026-03-05 21:35:07 -08:00
parent 0053ed3a69
commit a9e402bc83
11 changed files with 657 additions and 48 deletions

View File

@@ -0,0 +1,241 @@
use crate::objectpath::PathParseError;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Token<'a> {
Root,
Ident(&'a str),
Dot,
SqbOpen,
SqbClose,
}
pub struct Tokenizer {}
impl Tokenizer {
pub fn new() -> Self {
Self {}
}
pub fn tokenize(self, source: &str) -> Result<Vec<(usize, Token<'_>)>, PathParseError> {
let mut tokens = Vec::new();
let mut window_start = None;
for (i, c) in source.char_indices() {
match c {
'$' => {
if let Some(s) = window_start.take() {
tokens.push((s, Token::Ident(&source[s..i])));
}
tokens.push((i, Token::Root));
}
'.' => {
if let Some(s) = window_start.take() {
tokens.push((s, Token::Ident(&source[s..i])));
}
tokens.push((i, Token::Dot));
}
'[' => {
if let Some(s) = window_start.take() {
tokens.push((s, Token::Ident(&source[s..i])));
}
tokens.push((i, Token::SqbOpen));
}
']' => {
if let Some(s) = window_start.take() {
tokens.push((s, Token::Ident(&source[s..i])));
}
tokens.push((i, Token::SqbClose));
}
x if x.is_ascii() => match window_start {
None => window_start = Some(i),
Some(_) => continue,
},
char => return Err(PathParseError::NonAsciiChar { position: i, char }),
}
}
if let Some(s) = window_start.take() {
tokens.push((s, Token::Ident(&source[s..])));
}
return Ok(tokens);
}
}
#[expect(clippy::expect_used)]
#[cfg(test)]
mod tests {
use super::*;
fn tokenize(source: &str) -> Result<Vec<(usize, Token<'_>)>, PathParseError> {
Tokenizer::new().tokenize(source)
}
fn tok_ok(source: &str) -> Vec<(usize, Token<'_>)> {
tokenize(source).expect("expected tokenization to succeed")
}
#[test]
fn empty() {
assert_eq!(tok_ok(""), vec![]);
}
#[test]
fn root_only() {
assert_eq!(tok_ok("$"), vec![(0, Token::Root)]);
}
#[test]
fn dot_only() {
assert_eq!(tok_ok("."), vec![(0, Token::Dot)]);
}
#[test]
fn sqb_open_only() {
assert_eq!(tok_ok("["), vec![(0, Token::SqbOpen)]);
}
#[test]
fn sqb_close_only() {
assert_eq!(tok_ok("]"), vec![(0, Token::SqbClose)]);
}
#[test]
fn ident_only() {
assert_eq!(tok_ok("foo"), vec![(0, Token::Ident("foo"))]);
}
#[test]
fn ident_with_digits() {
assert_eq!(tok_ok("abc123"), vec![(0, Token::Ident("abc123"))]);
}
#[test]
fn root_dot_ident() {
assert_eq!(
tok_ok("$.foo"),
vec![(0, Token::Root), (1, Token::Dot), (2, Token::Ident("foo"))]
);
}
#[test]
fn ident_flushed_before_delimiter() {
assert_eq!(
tok_ok("foo.bar"),
vec![
(0, Token::Ident("foo")),
(3, Token::Dot),
(4, Token::Ident("bar"))
]
);
}
#[test]
fn root_after_ident_flushes() {
// ident window should flush before Root token
assert_eq!(
tok_ok("foo$"),
vec![(0, Token::Ident("foo")), (3, Token::Root)]
);
}
#[test]
fn full_path() {
assert_eq!(
tok_ok("$.foo[0]"),
vec![
(0, Token::Root),
(1, Token::Dot),
(2, Token::Ident("foo")),
(5, Token::SqbOpen),
(6, Token::Ident("0")),
(7, Token::SqbClose),
]
);
}
#[test]
fn complex_nested() {
assert_eq!(
tok_ok("$.a[1].b"),
vec![
(0, Token::Root),
(1, Token::Dot),
(2, Token::Ident("a")),
(3, Token::SqbOpen),
(4, Token::Ident("1")),
(5, Token::SqbClose),
(6, Token::Dot),
(7, Token::Ident("b")),
]
);
}
#[test]
fn negative_number_ident() {
// '-' is ASCII, so "-1" is a single ident
assert_eq!(
tok_ok("[-1]"),
vec![
(0, Token::SqbOpen),
(1, Token::Ident("-1")),
(3, Token::SqbClose)
]
);
}
#[test]
fn root_immediately_followed_by_ident() {
// "$foo" with no dot — produces Root then Ident
assert_eq!(
tok_ok("$foo"),
vec![(0, Token::Root), (1, Token::Ident("foo"))]
);
}
#[test]
fn consecutive_delimiters() {
assert_eq!(tok_ok(".."), vec![(0, Token::Dot), (1, Token::Dot)]);
}
#[test]
fn non_ascii_error() {
assert_eq!(
tokenize("$.fé"),
Err(PathParseError::NonAsciiChar {
position: 3,
char: 'é'
})
);
}
#[test]
fn non_ascii_at_start() {
assert_eq!(
tokenize("é"),
Err(PathParseError::NonAsciiChar {
position: 0,
char: 'é'
})
);
}
#[test]
fn non_ascii_flushes_pending_ident_not_reached() {
// "ab é" — the ident "ab" is not yet flushed when error occurs,
// but we still get an error for the non-ascii char
assert_eq!(
tokenize("abé"),
Err(PathParseError::NonAsciiChar {
position: 2,
char: 'é'
})
);
}
}