196 lines
4.4 KiB
Rust
196 lines
4.4 KiB
Rust
use smartstring::{LazyCompact, SmartString};
|
|
|
|
use crate::{objectpath::ObjectPath, pattern::GroupPatternParseError};
|
|
|
|
#[cfg_attr(test, derive(PartialEq))]
|
|
#[derive(Debug, Clone)]
|
|
pub enum GroupSegment {
|
|
Path(ObjectPath),
|
|
Literal(SmartString<LazyCompact>),
|
|
}
|
|
|
|
pub struct Parser {}
|
|
|
|
impl Parser {
|
|
pub fn new() -> Self {
|
|
Self {}
|
|
}
|
|
|
|
/// Parse a pattern string of the form `{path}.literal{path}...`.
|
|
///
|
|
/// - `{...}` delimiters are parsed as [`ObjectPath`] expressions.
|
|
/// Nested `{}` inside a path are allowed; depth is tracked to find the
|
|
/// matching closing brace.
|
|
/// - Everything outside `{...}` is a `Literal` segment.
|
|
/// - A bare `}` in literal position (depth == 0) is a syntax error.
|
|
/// - An unclosed `{` is a syntax error.
|
|
pub fn parse(self, source: &str) -> Result<Vec<(usize, GroupSegment)>, GroupPatternParseError> {
|
|
let mut tokens = Vec::new();
|
|
|
|
// `depth` > 0 means we are currently inside a `{...}` path expression.
|
|
let mut depth: usize = 0;
|
|
// Start of the current segment (literal text or path content).
|
|
let mut window_start: usize = 0;
|
|
// Source position of the opening `{` for the current path (used for error reporting).
|
|
let mut open_brace: usize = 0;
|
|
|
|
for (i, c) in source.char_indices() {
|
|
match c {
|
|
'{' => {
|
|
if depth == 0 {
|
|
// Emit any accumulated literal.
|
|
if i > window_start {
|
|
tokens.push((
|
|
window_start,
|
|
GroupSegment::Literal(source[window_start..i].into()),
|
|
));
|
|
}
|
|
open_brace = i;
|
|
// Path content starts after the opening brace.
|
|
window_start = i + 1;
|
|
depth = 1;
|
|
} else {
|
|
// Nested brace inside a path — keep counting.
|
|
depth += 1;
|
|
}
|
|
}
|
|
|
|
'}' => {
|
|
if depth == 0 {
|
|
// Unmatched `}` outside any path.
|
|
return Err(GroupPatternParseError::Syntax { position: i });
|
|
}
|
|
depth -= 1;
|
|
if depth == 0 {
|
|
// Closing brace of the outermost path expression — parse as ObjectPath.
|
|
let path_str = &source[window_start..i];
|
|
let path = path_str.parse::<ObjectPath>().map_err(|e| {
|
|
GroupPatternParseError::InvalidObjectPath {
|
|
start: open_brace,
|
|
end: i + 1,
|
|
path: path_str.into(),
|
|
source: e,
|
|
}
|
|
})?;
|
|
tokens.push((open_brace, GroupSegment::Path(path)));
|
|
// Literal content (if any) starts after this `}`.
|
|
window_start = i + 1;
|
|
}
|
|
}
|
|
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
// Unclosed `{`.
|
|
if depth > 0 {
|
|
return Err(GroupPatternParseError::Syntax {
|
|
position: open_brace,
|
|
});
|
|
}
|
|
|
|
// Emit any trailing literal.
|
|
if window_start < source.len() {
|
|
tokens.push((
|
|
window_start,
|
|
GroupSegment::Literal(source[window_start..].into()),
|
|
));
|
|
}
|
|
|
|
Ok(tokens)
|
|
}
|
|
}
|
|
|
|
//
|
|
// MARK: tests
|
|
//
|
|
|
|
#[expect(clippy::unwrap_used)]
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn parse(source: &str) -> Result<Vec<(usize, GroupSegment)>, GroupPatternParseError> {
|
|
Parser::new().parse(source)
|
|
}
|
|
|
|
fn path(s: &str) -> GroupSegment {
|
|
GroupSegment::Path(s.parse().unwrap())
|
|
}
|
|
|
|
fn lit(s: &str) -> GroupSegment {
|
|
GroupSegment::Literal(s.into())
|
|
}
|
|
|
|
#[test]
|
|
fn single_path() {
|
|
assert_eq!(parse("{$.foo}").unwrap(), vec![(0, path("$.foo"))]);
|
|
}
|
|
|
|
#[test]
|
|
fn single_literal() {
|
|
assert_eq!(parse("hello").unwrap(), vec![(0, lit("hello"))]);
|
|
}
|
|
|
|
#[test]
|
|
fn path_then_literal() {
|
|
assert_eq!(
|
|
parse("{$.foo}.txt").unwrap(),
|
|
vec![(0, path("$.foo")), (7, lit(".txt"))]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn literal_then_path() {
|
|
assert_eq!(
|
|
parse("prefix/{$.foo}").unwrap(),
|
|
vec![(0, lit("prefix/")), (7, path("$.foo"))]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn interleaved() {
|
|
assert_eq!(
|
|
parse("{$.a}.sep.{$.b}").unwrap(),
|
|
vec![(0, path("$.a")), (5, lit(".sep.")), (10, path("$.b")),]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn unmatched_open_brace_error() {
|
|
assert_eq!(
|
|
parse("{$.foo"),
|
|
Err(GroupPatternParseError::Syntax { position: 0 })
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn unmatched_close_brace_in_literal_error() {
|
|
assert_eq!(
|
|
parse("foo}bar"),
|
|
Err(GroupPatternParseError::Syntax { position: 3 })
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn invalid_path_error() {
|
|
assert_eq!(
|
|
parse("{not-a-path}"),
|
|
Err(GroupPatternParseError::InvalidObjectPath {
|
|
start: 0,
|
|
end: 12,
|
|
path: "not-a-path".into(),
|
|
source: crate::objectpath::PathParseError::MustStartWithRoot { position: 0 },
|
|
})
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn literal_between_paths() {
|
|
assert_eq!(
|
|
parse("foo{$.x}bar").unwrap(),
|
|
vec![(0, lit("foo")), (3, path("$.x")), (8, lit("bar")),]
|
|
);
|
|
}
|
|
}
|