Improve arg parsing
Some checks failed
CI / Typos (push) Successful in 20s
CI / Build and test (push) Successful in 2m28s
CI / Clippy (push) Failing after 2m50s
CI / Build and test (all features) (push) Successful in 7m27s

This commit is contained in:
2026-03-11 12:54:02 -07:00
parent 8a9388020c
commit f3bb1a265e
19 changed files with 327 additions and 98 deletions

View File

@@ -41,8 +41,11 @@ pub enum PathSegment {
/// Go to root node (`$` identifier)
Root,
/// Go to a child of the current object
Field(Label),
/// Go to a child of the current object.
Field {
name: Label,
args: Option<SmartString<LazyCompact>>,
},
/// Go to an element of the current list
Index(i64),

View File

@@ -1,10 +1,80 @@
use std::str::FromStr;
use smartstring::{LazyCompact, SmartString};
use crate::{
Label,
objectpath::{PathParseError, PathSegment, tokenizer::Token},
};
/// Parse an ident token into a `PathSegment::Field`, handling optional args of
/// the form `name(args)`. Parens inside args may be nested; `\(` and `\)` are
/// escaped and do not affect depth counting.
fn parse_field(ident: &str, position: usize) -> Result<PathSegment, PathParseError> {
let bytes = ident.as_bytes();
let mut i = 0;
// Find the first unescaped '(' — everything before it is the name.
let open_paren: Option<usize> = loop {
if i >= bytes.len() {
break None;
}
match bytes[i] {
b'\\' => i += 2, // skip escaped character
b'(' => break Some(i),
_ => i += 1,
}
};
let name_str = &ident[..open_paren.unwrap_or(bytes.len())];
let name = Label::new(name_str).ok_or_else(|| PathParseError::InvalidField {
position,
str: name_str.into(),
})?;
let Some(open_pos) = open_paren else {
return Ok(PathSegment::Field { name, args: None });
};
// Scan args, tracking paren depth.
let args_start = open_pos + 1;
let mut depth: usize = 1;
let mut j = args_start;
while j < bytes.len() {
match bytes[j] {
b'\\' => j += 2, // skip escaped character
b'(' => {
depth += 1;
j += 1;
}
b')' => {
depth -= 1;
if depth == 0 {
// Closing paren must be the last character.
if j + 1 != bytes.len() {
return Err(PathParseError::Syntax {
position: position + j + 1,
});
}
let args: SmartString<LazyCompact> = ident[args_start..j].into();
return Ok(PathSegment::Field {
name,
args: Some(args),
});
}
j += 1;
}
_ => j += 1,
}
}
// Reached end of ident without finding the matching ')'.
Err(PathParseError::Syntax {
position: position + ident.len(),
})
}
enum State {
Start,
@@ -72,14 +142,7 @@ impl Parser {
// MARK: dot
//
(State::Dot, (p, Token::Ident(ident))) => {
self.segments
.push(PathSegment::Field(Label::new(*ident).ok_or_else(|| {
PathParseError::InvalidField {
position: *p,
str: (*ident).into(),
}
})?));
self.segments.push(parse_field(ident, *p)?);
self.state = State::Selected;
}
@@ -161,27 +224,30 @@ mod tests {
parse_test("$", Ok(&[PathSegment::Root]));
}
fn field(name: &str) -> PathSegment {
PathSegment::Field {
name: Label::new(name).unwrap(),
args: None,
}
}
fn field_args(name: &str, args: &str) -> PathSegment {
PathSegment::Field {
name: Label::new(name).unwrap(),
args: Some(args.into()),
}
}
#[test]
fn single_field() {
parse_test(
"$.foo",
Ok(&[
PathSegment::Root,
PathSegment::Field(Label::new("foo").unwrap()),
]),
);
parse_test("$.foo", Ok(&[PathSegment::Root, field("foo")]));
}
#[test]
fn nested_fields() {
parse_test(
"$.foo.bar.baz",
Ok(&[
PathSegment::Root,
PathSegment::Field(Label::new("foo").unwrap()),
PathSegment::Field(Label::new("bar").unwrap()),
PathSegment::Field(Label::new("baz").unwrap()),
]),
Ok(&[PathSegment::Root, field("foo"), field("bar"), field("baz")]),
);
}
@@ -189,11 +255,7 @@ mod tests {
fn array_index() {
parse_test(
"$.items[0]",
Ok(&[
PathSegment::Root,
PathSegment::Field(Label::new("items").unwrap()),
PathSegment::Index(0),
]),
Ok(&[PathSegment::Root, field("items"), PathSegment::Index(0)]),
);
}
@@ -203,7 +265,7 @@ mod tests {
"$.a[1][2]",
Ok(&[
PathSegment::Root,
PathSegment::Field(Label::new("a").unwrap()),
field("a"),
PathSegment::Index(1),
PathSegment::Index(2),
]),
@@ -216,9 +278,9 @@ mod tests {
"$.a[0].b",
Ok(&[
PathSegment::Root,
PathSegment::Field(Label::new("a").unwrap()),
field("a"),
PathSegment::Index(0),
PathSegment::Field(Label::new("b").unwrap()),
field("b"),
]),
);
}
@@ -227,14 +289,94 @@ mod tests {
fn negative_index() {
parse_test(
"$.a[-1]",
Ok(&[PathSegment::Root, field("a"), PathSegment::Index(-1)]),
);
}
// MARK: args
#[test]
fn field_with_simple_args() {
parse_test(
"$.foo(bar)",
Ok(&[PathSegment::Root, field_args("foo", "bar")]),
);
}
#[test]
fn field_with_empty_args() {
parse_test("$.foo()", Ok(&[PathSegment::Root, field_args("foo", "")]));
}
#[test]
fn field_with_nested_parens_in_args() {
parse_test(
"$.foo(a(b)c)",
Ok(&[PathSegment::Root, field_args("foo", "a(b)c")]),
);
}
#[test]
fn field_with_deeply_nested_parens_in_args() {
parse_test(
"$.foo(a(b(c))d)",
Ok(&[PathSegment::Root, field_args("foo", "a(b(c))d")]),
);
}
#[test]
fn field_with_escaped_open_paren_in_args() {
// "$.foo(a\(b)" — '\(' is escaped, so depth never rises above 1; ')' closes it
parse_test(
r"$.foo(a\(b)",
Ok(&[PathSegment::Root, field_args("foo", r"a\(b")]),
);
}
#[test]
fn field_with_escaped_close_paren_in_args() {
// "$.foo(a\)b)" — '\)' is escaped, the second ')' closes at depth 0
parse_test(
r"$.foo(a\)b)",
Ok(&[PathSegment::Root, field_args("foo", r"a\)b")]),
);
}
#[test]
fn field_with_both_escaped_parens_in_args() {
parse_test(
r"$.foo(a\(b\)c)",
Ok(&[PathSegment::Root, field_args("foo", r"a\(b\)c")]),
);
}
#[test]
fn field_args_with_multiple_segments() {
parse_test(
"$.foo(x).bar(y)",
Ok(&[
PathSegment::Root,
PathSegment::Field(Label::new("a").unwrap()),
PathSegment::Index(-1),
field_args("foo", "x"),
field_args("bar", "y"),
]),
);
}
#[test]
fn field_args_unclosed_paren_error() {
// Missing closing ')' → Syntax error at end of source
parse_test("$.foo(bar", Err(PathParseError::Syntax { position: 9 }));
}
#[test]
fn field_args_trailing_chars_after_close_error() {
// Closing ')' is not the last char → Syntax error at the trailing char
parse_test(
"$.foo(bar)baz",
Err(PathParseError::Syntax { position: 10 }),
);
}
#[test]
fn non_ascii_error() {
parse_test(