487 lines
11 KiB
Rust
487 lines
11 KiB
Rust
use std::str::FromStr;
|
|
|
|
use smartstring::{LazyCompact, SmartString};
|
|
|
|
use crate::{
|
|
Label,
|
|
objectpath::{PathParseError, PathSegment, tokenizer::Token},
|
|
};
|
|
|
|
/// Parse an ident token into a `PathSegment::Field`, handling optional args of
|
|
/// the form `name(args)`. Parens inside args may be nested; `\(` and `\)` are
|
|
/// escaped and do not affect depth counting.
|
|
fn parse_field(ident: &str, position: usize) -> Result<PathSegment, PathParseError> {
|
|
let bytes = ident.as_bytes();
|
|
let mut i = 0;
|
|
|
|
// Find the first unescaped '(' — everything before it is the name.
|
|
let open_paren: Option<usize> = loop {
|
|
if i >= bytes.len() {
|
|
break None;
|
|
}
|
|
match bytes[i] {
|
|
b'\\' => i += 2, // skip escaped character
|
|
b'(' => break Some(i),
|
|
_ => i += 1,
|
|
}
|
|
};
|
|
|
|
let name_str = &ident[..open_paren.unwrap_or(bytes.len())];
|
|
let name = Label::new(name_str).ok_or_else(|| PathParseError::InvalidField {
|
|
position,
|
|
str: name_str.into(),
|
|
})?;
|
|
|
|
let Some(open_pos) = open_paren else {
|
|
return Ok(PathSegment::Field { name, args: None });
|
|
};
|
|
|
|
// Scan args, tracking paren depth.
|
|
let args_start = open_pos + 1;
|
|
let mut depth: usize = 1;
|
|
let mut j = args_start;
|
|
|
|
while j < bytes.len() {
|
|
match bytes[j] {
|
|
b'\\' => j += 2, // skip escaped character
|
|
b'(' => {
|
|
depth += 1;
|
|
j += 1;
|
|
}
|
|
b')' => {
|
|
depth -= 1;
|
|
if depth == 0 {
|
|
// Closing paren must be the last character.
|
|
if j + 1 != bytes.len() {
|
|
return Err(PathParseError::Syntax {
|
|
position: position + j + 1,
|
|
});
|
|
}
|
|
let args: SmartString<LazyCompact> = ident[args_start..j].into();
|
|
return Ok(PathSegment::Field {
|
|
name,
|
|
args: Some(args),
|
|
});
|
|
}
|
|
j += 1;
|
|
}
|
|
_ => j += 1,
|
|
}
|
|
}
|
|
|
|
// Reached end of ident without finding the matching ')'.
|
|
Err(PathParseError::Syntax {
|
|
position: position + ident.len(),
|
|
})
|
|
}
|
|
|
|
enum State {
|
|
Start,
|
|
|
|
/// We are holding a pointer to an object
|
|
Selected,
|
|
|
|
/// We are waiting for an identifier
|
|
Dot,
|
|
|
|
/// We are indexing an array, waiting for a number
|
|
Index,
|
|
|
|
/// We parsed the start index, waiting for `]` or the first `.` of `..`
|
|
IndexAfterStart(i64),
|
|
|
|
/// We saw one `.` after the start index, waiting for the second `.`
|
|
IndexRangeDot1(i64),
|
|
|
|
/// We saw `..`, waiting for the end index (optionally prefixed with `=`)
|
|
IndexRangeDot2(i64),
|
|
|
|
/// We are indexing an array, waiting for a close-bracket
|
|
IndexClose,
|
|
}
|
|
|
|
pub struct Parser {
|
|
state: State,
|
|
segments: Vec<PathSegment>,
|
|
}
|
|
|
|
impl Parser {
|
|
pub fn new() -> Self {
|
|
Parser {
|
|
state: State::Start,
|
|
segments: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn parse(
|
|
mut self,
|
|
source: &str,
|
|
tokens: &[(usize, Token<'_>)],
|
|
) -> Result<Vec<PathSegment>, PathParseError> {
|
|
for t in tokens {
|
|
match (self.state, t) {
|
|
(State::Start, (_, Token::Root)) => {
|
|
self.segments.push(PathSegment::Root);
|
|
self.state = State::Selected
|
|
}
|
|
|
|
(State::Start, (p, Token::Ident(_))) => {
|
|
return Err(PathParseError::MustStartWithRoot { position: *p });
|
|
}
|
|
|
|
(State::Start, (p, Token::Dot))
|
|
| (State::Start, (p, Token::SqbOpen))
|
|
| (State::Start, (p, Token::SqbClose)) => {
|
|
return Err(PathParseError::Syntax { position: *p });
|
|
}
|
|
|
|
//
|
|
// MARK: selected
|
|
//
|
|
(State::Selected, (_, Token::Dot)) => self.state = State::Dot,
|
|
(State::Selected, (_, Token::SqbOpen)) => self.state = State::Index,
|
|
|
|
(State::Selected, (p, Token::Root))
|
|
| (State::Selected, (p, Token::Ident(_)))
|
|
| (State::Selected, (p, Token::SqbClose)) => {
|
|
return Err(PathParseError::Syntax { position: *p });
|
|
}
|
|
|
|
//
|
|
// MARK: dot
|
|
//
|
|
(State::Dot, (p, Token::Ident(ident))) => {
|
|
self.segments.push(parse_field(ident, *p)?);
|
|
self.state = State::Selected;
|
|
}
|
|
|
|
(State::Dot, (p, Token::Root))
|
|
| (State::Dot, (p, Token::Dot))
|
|
| (State::Dot, (p, Token::SqbOpen))
|
|
| (State::Dot, (p, Token::SqbClose)) => {
|
|
return Err(PathParseError::Syntax { position: *p });
|
|
}
|
|
|
|
//
|
|
// MARK: index
|
|
//
|
|
(State::Index, (p, Token::Ident(ident))) => {
|
|
let idx: i64 = i64::from_str(ident).map_err(|_err| {
|
|
PathParseError::InvalidIndexString {
|
|
position: *p,
|
|
str: (*ident).into(),
|
|
}
|
|
})?;
|
|
|
|
self.state = State::IndexAfterStart(idx);
|
|
}
|
|
|
|
(State::Index, (p, Token::Root))
|
|
| (State::Index, (p, Token::Dot))
|
|
| (State::Index, (p, Token::SqbOpen))
|
|
| (State::Index, (p, Token::SqbClose)) => {
|
|
return Err(PathParseError::Syntax { position: *p });
|
|
}
|
|
|
|
(State::IndexAfterStart(idx), (_, Token::SqbClose)) => {
|
|
self.segments.push(PathSegment::Index(idx));
|
|
self.state = State::Selected;
|
|
}
|
|
(State::IndexAfterStart(idx), (_, Token::Dot)) => {
|
|
self.state = State::IndexRangeDot1(idx);
|
|
}
|
|
(State::IndexAfterStart(_), (p, _)) => {
|
|
return Err(PathParseError::Syntax { position: *p });
|
|
}
|
|
|
|
(State::IndexRangeDot1(idx), (_, Token::Dot)) => {
|
|
self.state = State::IndexRangeDot2(idx);
|
|
}
|
|
(State::IndexRangeDot1(_), (p, _)) => {
|
|
return Err(PathParseError::Syntax { position: *p });
|
|
}
|
|
|
|
(State::IndexRangeDot2(start), (p, Token::Ident(ident))) => {
|
|
let (end_str, inclusive) = if let Some(stripped) = ident.strip_prefix('=') {
|
|
(stripped, true)
|
|
} else {
|
|
(*ident, false)
|
|
};
|
|
|
|
let end: i64 = i64::from_str(end_str).map_err(|_err| {
|
|
PathParseError::InvalidIndexString {
|
|
position: *p,
|
|
str: (*ident).into(),
|
|
}
|
|
})?;
|
|
|
|
self.segments.push(PathSegment::Range {
|
|
start,
|
|
end,
|
|
inclusive,
|
|
});
|
|
self.state = State::IndexClose;
|
|
}
|
|
(State::IndexRangeDot2(_), (p, _)) => {
|
|
return Err(PathParseError::Syntax { position: *p });
|
|
}
|
|
|
|
(State::IndexClose, (_, Token::SqbClose)) => self.state = State::Selected,
|
|
(State::IndexClose, (p, _)) => {
|
|
return Err(PathParseError::Syntax { position: *p });
|
|
}
|
|
}
|
|
}
|
|
|
|
let position = source.len();
|
|
match self.state {
|
|
State::Start => Err(PathParseError::Syntax { position: 0 }),
|
|
State::Dot => Err(PathParseError::Syntax { position }),
|
|
State::Index => Err(PathParseError::Syntax { position }),
|
|
State::IndexAfterStart(_) => Err(PathParseError::Syntax { position }),
|
|
State::IndexRangeDot1(_) => Err(PathParseError::Syntax { position }),
|
|
State::IndexRangeDot2(_) => Err(PathParseError::Syntax { position }),
|
|
State::IndexClose => Err(PathParseError::Syntax { position }),
|
|
State::Selected => Ok(()),
|
|
}?;
|
|
|
|
return Ok(self.segments);
|
|
}
|
|
}
|
|
|
|
//
|
|
// MARK: tests
|
|
//
|
|
|
|
#[expect(clippy::unwrap_used)]
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::objectpath::tokenizer::Tokenizer;
|
|
|
|
use super::*;
|
|
|
|
fn parse_test(source: &str, expected: Result<&[PathSegment], PathParseError>) {
|
|
let parsed = Tokenizer::new()
|
|
.tokenize(source)
|
|
.and_then(|tokens| Parser::new().parse(source, &tokens[..]));
|
|
|
|
match (parsed, expected) {
|
|
(Ok(segments), Ok(segs)) => assert_eq!(segments, segs),
|
|
(Err(e), Err(expected_err)) => assert_eq!(e, expected_err),
|
|
(Ok(segments), Err(e)) => panic!("expected error {e}, got {:?}", segments),
|
|
(Err(e), Ok(segs)) => panic!("expected {:?}, got error {e}", segs),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn root_only() {
|
|
parse_test("$", Ok(&[PathSegment::Root]));
|
|
}
|
|
|
|
fn field(name: &str) -> PathSegment {
|
|
PathSegment::Field {
|
|
name: Label::new(name).unwrap(),
|
|
args: None,
|
|
}
|
|
}
|
|
|
|
fn field_args(name: &str, args: &str) -> PathSegment {
|
|
PathSegment::Field {
|
|
name: Label::new(name).unwrap(),
|
|
args: Some(args.into()),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn single_field() {
|
|
parse_test("$.foo", Ok(&[PathSegment::Root, field("foo")]));
|
|
}
|
|
|
|
#[test]
|
|
fn nested_fields() {
|
|
parse_test(
|
|
"$.foo.bar.baz",
|
|
Ok(&[PathSegment::Root, field("foo"), field("bar"), field("baz")]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn array_index() {
|
|
parse_test(
|
|
"$.items[0]",
|
|
Ok(&[PathSegment::Root, field("items"), PathSegment::Index(0)]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn chained_indices() {
|
|
parse_test(
|
|
"$.a[1][2]",
|
|
Ok(&[
|
|
PathSegment::Root,
|
|
field("a"),
|
|
PathSegment::Index(1),
|
|
PathSegment::Index(2),
|
|
]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn field_after_index() {
|
|
parse_test(
|
|
"$.a[0].b",
|
|
Ok(&[
|
|
PathSegment::Root,
|
|
field("a"),
|
|
PathSegment::Index(0),
|
|
field("b"),
|
|
]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn negative_index() {
|
|
parse_test(
|
|
"$.a[-1]",
|
|
Ok(&[PathSegment::Root, field("a"), PathSegment::Index(-1)]),
|
|
);
|
|
}
|
|
|
|
// MARK: args
|
|
|
|
#[test]
|
|
fn field_with_simple_args() {
|
|
parse_test(
|
|
"$.foo(bar)",
|
|
Ok(&[PathSegment::Root, field_args("foo", "bar")]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn field_with_empty_args() {
|
|
parse_test("$.foo()", Ok(&[PathSegment::Root, field_args("foo", "")]));
|
|
}
|
|
|
|
#[test]
|
|
fn field_with_nested_parens_in_args() {
|
|
parse_test(
|
|
"$.foo(a(b)c)",
|
|
Ok(&[PathSegment::Root, field_args("foo", "a(b)c")]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn field_with_deeply_nested_parens_in_args() {
|
|
parse_test(
|
|
"$.foo(a(b(c))d)",
|
|
Ok(&[PathSegment::Root, field_args("foo", "a(b(c))d")]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn field_with_escaped_open_paren_in_args() {
|
|
// "$.foo(a\(b)" — '\(' is escaped, so depth never rises above 1; ')' closes it
|
|
parse_test(
|
|
r"$.foo(a\(b)",
|
|
Ok(&[PathSegment::Root, field_args("foo", r"a\(b")]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn field_with_escaped_close_paren_in_args() {
|
|
// "$.foo(a\)b)" — '\)' is escaped, the second ')' closes at depth 0
|
|
parse_test(
|
|
r"$.foo(a\)b)",
|
|
Ok(&[PathSegment::Root, field_args("foo", r"a\)b")]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn field_with_both_escaped_parens_in_args() {
|
|
parse_test(
|
|
r"$.foo(a\(b\)c)",
|
|
Ok(&[PathSegment::Root, field_args("foo", r"a\(b\)c")]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn field_args_with_multiple_segments() {
|
|
parse_test(
|
|
"$.foo(x).bar(y)",
|
|
Ok(&[
|
|
PathSegment::Root,
|
|
field_args("foo", "x"),
|
|
field_args("bar", "y"),
|
|
]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn field_args_unclosed_paren_error() {
|
|
// Missing closing ')' → Syntax error at end of source
|
|
parse_test("$.foo(bar", Err(PathParseError::Syntax { position: 9 }));
|
|
}
|
|
|
|
#[test]
|
|
fn field_args_trailing_chars_after_close_error() {
|
|
// Closing ')' is not the last char → Syntax error at the trailing char
|
|
parse_test(
|
|
"$.foo(bar)baz",
|
|
Err(PathParseError::Syntax { position: 10 }),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn non_ascii_error() {
|
|
parse_test(
|
|
"$.fé",
|
|
Err(PathParseError::NonAsciiChar {
|
|
position: 3,
|
|
char: 'é',
|
|
}),
|
|
);
|
|
}
|
|
|
|
// MARK: range
|
|
|
|
fn range(start: i64, end: i64, inclusive: bool) -> PathSegment {
|
|
PathSegment::Range {
|
|
start,
|
|
end,
|
|
inclusive,
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn exclusive_range() {
|
|
parse_test(
|
|
"$.a[0..5]",
|
|
Ok(&[PathSegment::Root, field("a"), range(0, 5, false)]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn inclusive_range() {
|
|
parse_test(
|
|
"$.a[1..=2]",
|
|
Ok(&[PathSegment::Root, field("a"), range(1, 2, true)]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn range_with_negative_end() {
|
|
parse_test(
|
|
"$.a[0..-1]",
|
|
Ok(&[PathSegment::Root, field("a"), range(0, -1, false)]),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn range_with_negative_start() {
|
|
parse_test(
|
|
"$.a[-3..-1]",
|
|
Ok(&[PathSegment::Root, field("a"), range(-3, -1, false)]),
|
|
);
|
|
}
|
|
}
|