Improve arg parsing
This commit is contained in:
@@ -41,8 +41,11 @@ pub enum PathSegment {
|
||||
/// Go to root node (`$` identifier)
|
||||
Root,
|
||||
|
||||
/// Go to a child of the current object
|
||||
Field(Label),
|
||||
/// Go to a child of the current object.
|
||||
Field {
|
||||
name: Label,
|
||||
args: Option<SmartString<LazyCompact>>,
|
||||
},
|
||||
|
||||
/// Go to an element of the current list
|
||||
Index(i64),
|
||||
|
||||
@@ -1,10 +1,80 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use smartstring::{LazyCompact, SmartString};
|
||||
|
||||
use crate::{
|
||||
Label,
|
||||
objectpath::{PathParseError, PathSegment, tokenizer::Token},
|
||||
};
|
||||
|
||||
/// Parse an ident token into a `PathSegment::Field`, handling optional args of
|
||||
/// the form `name(args)`. Parens inside args may be nested; `\(` and `\)` are
|
||||
/// escaped and do not affect depth counting.
|
||||
fn parse_field(ident: &str, position: usize) -> Result<PathSegment, PathParseError> {
|
||||
let bytes = ident.as_bytes();
|
||||
let mut i = 0;
|
||||
|
||||
// Find the first unescaped '(' — everything before it is the name.
|
||||
let open_paren: Option<usize> = loop {
|
||||
if i >= bytes.len() {
|
||||
break None;
|
||||
}
|
||||
match bytes[i] {
|
||||
b'\\' => i += 2, // skip escaped character
|
||||
b'(' => break Some(i),
|
||||
_ => i += 1,
|
||||
}
|
||||
};
|
||||
|
||||
let name_str = &ident[..open_paren.unwrap_or(bytes.len())];
|
||||
let name = Label::new(name_str).ok_or_else(|| PathParseError::InvalidField {
|
||||
position,
|
||||
str: name_str.into(),
|
||||
})?;
|
||||
|
||||
let Some(open_pos) = open_paren else {
|
||||
return Ok(PathSegment::Field { name, args: None });
|
||||
};
|
||||
|
||||
// Scan args, tracking paren depth.
|
||||
let args_start = open_pos + 1;
|
||||
let mut depth: usize = 1;
|
||||
let mut j = args_start;
|
||||
|
||||
while j < bytes.len() {
|
||||
match bytes[j] {
|
||||
b'\\' => j += 2, // skip escaped character
|
||||
b'(' => {
|
||||
depth += 1;
|
||||
j += 1;
|
||||
}
|
||||
b')' => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
// Closing paren must be the last character.
|
||||
if j + 1 != bytes.len() {
|
||||
return Err(PathParseError::Syntax {
|
||||
position: position + j + 1,
|
||||
});
|
||||
}
|
||||
let args: SmartString<LazyCompact> = ident[args_start..j].into();
|
||||
return Ok(PathSegment::Field {
|
||||
name,
|
||||
args: Some(args),
|
||||
});
|
||||
}
|
||||
j += 1;
|
||||
}
|
||||
_ => j += 1,
|
||||
}
|
||||
}
|
||||
|
||||
// Reached end of ident without finding the matching ')'.
|
||||
Err(PathParseError::Syntax {
|
||||
position: position + ident.len(),
|
||||
})
|
||||
}
|
||||
|
||||
enum State {
|
||||
Start,
|
||||
|
||||
@@ -72,14 +142,7 @@ impl Parser {
|
||||
// MARK: dot
|
||||
//
|
||||
(State::Dot, (p, Token::Ident(ident))) => {
|
||||
self.segments
|
||||
.push(PathSegment::Field(Label::new(*ident).ok_or_else(|| {
|
||||
PathParseError::InvalidField {
|
||||
position: *p,
|
||||
str: (*ident).into(),
|
||||
}
|
||||
})?));
|
||||
|
||||
self.segments.push(parse_field(ident, *p)?);
|
||||
self.state = State::Selected;
|
||||
}
|
||||
|
||||
@@ -161,27 +224,30 @@ mod tests {
|
||||
parse_test("$", Ok(&[PathSegment::Root]));
|
||||
}
|
||||
|
||||
fn field(name: &str) -> PathSegment {
|
||||
PathSegment::Field {
|
||||
name: Label::new(name).unwrap(),
|
||||
args: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn field_args(name: &str, args: &str) -> PathSegment {
|
||||
PathSegment::Field {
|
||||
name: Label::new(name).unwrap(),
|
||||
args: Some(args.into()),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_field() {
|
||||
parse_test(
|
||||
"$.foo",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("foo").unwrap()),
|
||||
]),
|
||||
);
|
||||
parse_test("$.foo", Ok(&[PathSegment::Root, field("foo")]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nested_fields() {
|
||||
parse_test(
|
||||
"$.foo.bar.baz",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("foo").unwrap()),
|
||||
PathSegment::Field(Label::new("bar").unwrap()),
|
||||
PathSegment::Field(Label::new("baz").unwrap()),
|
||||
]),
|
||||
Ok(&[PathSegment::Root, field("foo"), field("bar"), field("baz")]),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -189,11 +255,7 @@ mod tests {
|
||||
fn array_index() {
|
||||
parse_test(
|
||||
"$.items[0]",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("items").unwrap()),
|
||||
PathSegment::Index(0),
|
||||
]),
|
||||
Ok(&[PathSegment::Root, field("items"), PathSegment::Index(0)]),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -203,7 +265,7 @@ mod tests {
|
||||
"$.a[1][2]",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("a").unwrap()),
|
||||
field("a"),
|
||||
PathSegment::Index(1),
|
||||
PathSegment::Index(2),
|
||||
]),
|
||||
@@ -216,9 +278,9 @@ mod tests {
|
||||
"$.a[0].b",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("a").unwrap()),
|
||||
field("a"),
|
||||
PathSegment::Index(0),
|
||||
PathSegment::Field(Label::new("b").unwrap()),
|
||||
field("b"),
|
||||
]),
|
||||
);
|
||||
}
|
||||
@@ -227,14 +289,94 @@ mod tests {
|
||||
fn negative_index() {
|
||||
parse_test(
|
||||
"$.a[-1]",
|
||||
Ok(&[PathSegment::Root, field("a"), PathSegment::Index(-1)]),
|
||||
);
|
||||
}
|
||||
|
||||
// MARK: args
|
||||
|
||||
#[test]
|
||||
fn field_with_simple_args() {
|
||||
parse_test(
|
||||
"$.foo(bar)",
|
||||
Ok(&[PathSegment::Root, field_args("foo", "bar")]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_with_empty_args() {
|
||||
parse_test("$.foo()", Ok(&[PathSegment::Root, field_args("foo", "")]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_with_nested_parens_in_args() {
|
||||
parse_test(
|
||||
"$.foo(a(b)c)",
|
||||
Ok(&[PathSegment::Root, field_args("foo", "a(b)c")]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_with_deeply_nested_parens_in_args() {
|
||||
parse_test(
|
||||
"$.foo(a(b(c))d)",
|
||||
Ok(&[PathSegment::Root, field_args("foo", "a(b(c))d")]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_with_escaped_open_paren_in_args() {
|
||||
// "$.foo(a\(b)" — '\(' is escaped, so depth never rises above 1; ')' closes it
|
||||
parse_test(
|
||||
r"$.foo(a\(b)",
|
||||
Ok(&[PathSegment::Root, field_args("foo", r"a\(b")]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_with_escaped_close_paren_in_args() {
|
||||
// "$.foo(a\)b)" — '\)' is escaped, the second ')' closes at depth 0
|
||||
parse_test(
|
||||
r"$.foo(a\)b)",
|
||||
Ok(&[PathSegment::Root, field_args("foo", r"a\)b")]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_with_both_escaped_parens_in_args() {
|
||||
parse_test(
|
||||
r"$.foo(a\(b\)c)",
|
||||
Ok(&[PathSegment::Root, field_args("foo", r"a\(b\)c")]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_args_with_multiple_segments() {
|
||||
parse_test(
|
||||
"$.foo(x).bar(y)",
|
||||
Ok(&[
|
||||
PathSegment::Root,
|
||||
PathSegment::Field(Label::new("a").unwrap()),
|
||||
PathSegment::Index(-1),
|
||||
field_args("foo", "x"),
|
||||
field_args("bar", "y"),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_args_unclosed_paren_error() {
|
||||
// Missing closing ')' → Syntax error at end of source
|
||||
parse_test("$.foo(bar", Err(PathParseError::Syntax { position: 9 }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_args_trailing_chars_after_close_error() {
|
||||
// Closing ')' is not the last char → Syntax error at the trailing char
|
||||
parse_test(
|
||||
"$.foo(bar)baz",
|
||||
Err(PathParseError::Syntax { position: 10 }),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_ascii_error() {
|
||||
parse_test(
|
||||
|
||||
Reference in New Issue
Block a user