use std::str::FromStr; use smartstring::{LazyCompact, SmartString}; use crate::{ Label, objectpath::{PathParseError, PathSegment, tokenizer::Token}, }; /// Parse an ident token into a `PathSegment::Field`, handling optional args of /// the form `name(args)`. Parens inside args may be nested; `\(` and `\)` are /// escaped and do not affect depth counting. fn parse_field(ident: &str, position: usize) -> Result { let bytes = ident.as_bytes(); let mut i = 0; // Find the first unescaped '(' — everything before it is the name. let open_paren: Option = loop { if i >= bytes.len() { break None; } match bytes[i] { b'\\' => i += 2, // skip escaped character b'(' => break Some(i), _ => i += 1, } }; let name_str = &ident[..open_paren.unwrap_or(bytes.len())]; let name = Label::new(name_str).ok_or_else(|| PathParseError::InvalidField { position, str: name_str.into(), })?; let Some(open_pos) = open_paren else { return Ok(PathSegment::Field { name, args: None }); }; // Scan args, tracking paren depth. let args_start = open_pos + 1; let mut depth: usize = 1; let mut j = args_start; while j < bytes.len() { match bytes[j] { b'\\' => j += 2, // skip escaped character b'(' => { depth += 1; j += 1; } b')' => { depth -= 1; if depth == 0 { // Closing paren must be the last character. if j + 1 != bytes.len() { return Err(PathParseError::Syntax { position: position + j + 1, }); } let args: SmartString = ident[args_start..j].into(); return Ok(PathSegment::Field { name, args: Some(args), }); } j += 1; } _ => j += 1, } } // Reached end of ident without finding the matching ')'. Err(PathParseError::Syntax { position: position + ident.len(), }) } enum State { Start, /// We are holding a pointer to an object Selected, /// We are waiting for an identifier Dot, /// We are indexing an array, waiting for a number Index, /// We parsed the start index, waiting for `]` or the first `.` of `..` IndexAfterStart(i64), /// We saw one `.` after the start index, waiting for the second `.` IndexRangeDot1(i64), /// We saw `..`, waiting for the end index (optionally prefixed with `=`) IndexRangeDot2(i64), /// We are indexing an array, waiting for a close-bracket IndexClose, } pub struct Parser { state: State, segments: Vec, } impl Parser { pub fn new() -> Self { Parser { state: State::Start, segments: Vec::new(), } } pub fn parse( mut self, source: &str, tokens: &[(usize, Token<'_>)], ) -> Result, PathParseError> { for t in tokens { match (self.state, t) { (State::Start, (_, Token::Root)) => { self.segments.push(PathSegment::Root); self.state = State::Selected } (State::Start, (p, Token::Ident(_))) => { return Err(PathParseError::MustStartWithRoot { position: *p }); } (State::Start, (p, Token::Dot)) | (State::Start, (p, Token::SqbOpen)) | (State::Start, (p, Token::SqbClose)) => { return Err(PathParseError::Syntax { position: *p }); } // // MARK: selected // (State::Selected, (_, Token::Dot)) => self.state = State::Dot, (State::Selected, (_, Token::SqbOpen)) => self.state = State::Index, (State::Selected, (p, Token::Root)) | (State::Selected, (p, Token::Ident(_))) | (State::Selected, (p, Token::SqbClose)) => { return Err(PathParseError::Syntax { position: *p }); } // // MARK: dot // (State::Dot, (p, Token::Ident(ident))) => { self.segments.push(parse_field(ident, *p)?); self.state = State::Selected; } (State::Dot, (p, Token::Root)) | (State::Dot, (p, Token::Dot)) | (State::Dot, (p, Token::SqbOpen)) | (State::Dot, (p, Token::SqbClose)) => { return Err(PathParseError::Syntax { position: *p }); } // // MARK: index // (State::Index, (p, Token::Ident(ident))) => { let idx: i64 = i64::from_str(ident).map_err(|_err| { PathParseError::InvalidIndexString { position: *p, str: (*ident).into(), } })?; self.state = State::IndexAfterStart(idx); } (State::Index, (p, Token::Root)) | (State::Index, (p, Token::Dot)) | (State::Index, (p, Token::SqbOpen)) | (State::Index, (p, Token::SqbClose)) => { return Err(PathParseError::Syntax { position: *p }); } (State::IndexAfterStart(idx), (_, Token::SqbClose)) => { self.segments.push(PathSegment::Index(idx)); self.state = State::Selected; } (State::IndexAfterStart(idx), (_, Token::Dot)) => { self.state = State::IndexRangeDot1(idx); } (State::IndexAfterStart(_), (p, _)) => { return Err(PathParseError::Syntax { position: *p }); } (State::IndexRangeDot1(idx), (_, Token::Dot)) => { self.state = State::IndexRangeDot2(idx); } (State::IndexRangeDot1(_), (p, _)) => { return Err(PathParseError::Syntax { position: *p }); } (State::IndexRangeDot2(start), (p, Token::Ident(ident))) => { let (end_str, inclusive) = if let Some(stripped) = ident.strip_prefix('=') { (stripped, true) } else { (*ident, false) }; let end: i64 = i64::from_str(end_str).map_err(|_err| { PathParseError::InvalidIndexString { position: *p, str: (*ident).into(), } })?; self.segments.push(PathSegment::Range { start, end, inclusive, }); self.state = State::IndexClose; } (State::IndexRangeDot2(_), (p, _)) => { return Err(PathParseError::Syntax { position: *p }); } (State::IndexClose, (_, Token::SqbClose)) => self.state = State::Selected, (State::IndexClose, (p, _)) => { return Err(PathParseError::Syntax { position: *p }); } } } let position = source.len(); match self.state { State::Start => Err(PathParseError::Syntax { position: 0 }), State::Dot => Err(PathParseError::Syntax { position }), State::Index => Err(PathParseError::Syntax { position }), State::IndexAfterStart(_) => Err(PathParseError::Syntax { position }), State::IndexRangeDot1(_) => Err(PathParseError::Syntax { position }), State::IndexRangeDot2(_) => Err(PathParseError::Syntax { position }), State::IndexClose => Err(PathParseError::Syntax { position }), State::Selected => Ok(()), }?; return Ok(self.segments); } } // // MARK: tests // #[expect(clippy::unwrap_used)] #[cfg(test)] mod tests { use crate::objectpath::tokenizer::Tokenizer; use super::*; fn parse_test(source: &str, expected: Result<&[PathSegment], PathParseError>) { let parsed = Tokenizer::new() .tokenize(source) .and_then(|tokens| Parser::new().parse(source, &tokens[..])); match (parsed, expected) { (Ok(segments), Ok(segs)) => assert_eq!(segments, segs), (Err(e), Err(expected_err)) => assert_eq!(e, expected_err), (Ok(segments), Err(e)) => panic!("expected error {e}, got {:?}", segments), (Err(e), Ok(segs)) => panic!("expected {:?}, got error {e}", segs), } } #[test] fn root_only() { parse_test("$", Ok(&[PathSegment::Root])); } fn field(name: &str) -> PathSegment { PathSegment::Field { name: Label::new(name).unwrap(), args: None, } } fn field_args(name: &str, args: &str) -> PathSegment { PathSegment::Field { name: Label::new(name).unwrap(), args: Some(args.into()), } } #[test] fn single_field() { parse_test("$.foo", Ok(&[PathSegment::Root, field("foo")])); } #[test] fn nested_fields() { parse_test( "$.foo.bar.baz", Ok(&[PathSegment::Root, field("foo"), field("bar"), field("baz")]), ); } #[test] fn array_index() { parse_test( "$.items[0]", Ok(&[PathSegment::Root, field("items"), PathSegment::Index(0)]), ); } #[test] fn chained_indices() { parse_test( "$.a[1][2]", Ok(&[ PathSegment::Root, field("a"), PathSegment::Index(1), PathSegment::Index(2), ]), ); } #[test] fn field_after_index() { parse_test( "$.a[0].b", Ok(&[ PathSegment::Root, field("a"), PathSegment::Index(0), field("b"), ]), ); } #[test] fn negative_index() { parse_test( "$.a[-1]", Ok(&[PathSegment::Root, field("a"), PathSegment::Index(-1)]), ); } // MARK: args #[test] fn field_with_simple_args() { parse_test( "$.foo(bar)", Ok(&[PathSegment::Root, field_args("foo", "bar")]), ); } #[test] fn field_with_empty_args() { parse_test("$.foo()", Ok(&[PathSegment::Root, field_args("foo", "")])); } #[test] fn field_with_nested_parens_in_args() { parse_test( "$.foo(a(b)c)", Ok(&[PathSegment::Root, field_args("foo", "a(b)c")]), ); } #[test] fn field_with_deeply_nested_parens_in_args() { parse_test( "$.foo(a(b(c))d)", Ok(&[PathSegment::Root, field_args("foo", "a(b(c))d")]), ); } #[test] fn field_with_escaped_open_paren_in_args() { // "$.foo(a\(b)" — '\(' is escaped, so depth never rises above 1; ')' closes it parse_test( r"$.foo(a\(b)", Ok(&[PathSegment::Root, field_args("foo", r"a\(b")]), ); } #[test] fn field_with_escaped_close_paren_in_args() { // "$.foo(a\)b)" — '\)' is escaped, the second ')' closes at depth 0 parse_test( r"$.foo(a\)b)", Ok(&[PathSegment::Root, field_args("foo", r"a\)b")]), ); } #[test] fn field_with_both_escaped_parens_in_args() { parse_test( r"$.foo(a\(b\)c)", Ok(&[PathSegment::Root, field_args("foo", r"a\(b\)c")]), ); } #[test] fn field_args_with_multiple_segments() { parse_test( "$.foo(x).bar(y)", Ok(&[ PathSegment::Root, field_args("foo", "x"), field_args("bar", "y"), ]), ); } #[test] fn field_args_unclosed_paren_error() { // Missing closing ')' → Syntax error at end of source parse_test("$.foo(bar", Err(PathParseError::Syntax { position: 9 })); } #[test] fn field_args_trailing_chars_after_close_error() { // Closing ')' is not the last char → Syntax error at the trailing char parse_test( "$.foo(bar)baz", Err(PathParseError::Syntax { position: 10 }), ); } #[test] fn non_ascii_error() { parse_test( "$.fé", Err(PathParseError::NonAsciiChar { position: 3, char: 'é', }), ); } // MARK: range fn range(start: i64, end: i64, inclusive: bool) -> PathSegment { PathSegment::Range { start, end, inclusive, } } #[test] fn exclusive_range() { parse_test( "$.a[0..5]", Ok(&[PathSegment::Root, field("a"), range(0, 5, false)]), ); } #[test] fn inclusive_range() { parse_test( "$.a[1..=2]", Ok(&[PathSegment::Root, field("a"), range(1, 2, true)]), ); } #[test] fn range_with_negative_end() { parse_test( "$.a[0..-1]", Ok(&[PathSegment::Root, field("a"), range(0, -1, false)]), ); } #[test] fn range_with_negative_start() { parse_test( "$.a[-3..-1]", Ok(&[PathSegment::Root, field("a"), range(-3, -1, false)]), ); } }