Refactor sidecars
This commit is contained in:
195
crates/pile-config/src/pattern/parser.rs
Normal file
195
crates/pile-config/src/pattern/parser.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
use smartstring::{LazyCompact, SmartString};
|
||||
|
||||
use crate::{objectpath::ObjectPath, pattern::GroupPatternParseError};
|
||||
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum GroupSegment {
|
||||
Path(ObjectPath),
|
||||
Literal(SmartString<LazyCompact>),
|
||||
}
|
||||
|
||||
pub struct Parser {}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
|
||||
/// Parse a pattern string of the form `{path}.literal{path}...`.
|
||||
///
|
||||
/// - `{...}` delimiters are parsed as [`ObjectPath`] expressions.
|
||||
/// Nested `{}` inside a path are allowed; depth is tracked to find the
|
||||
/// matching closing brace.
|
||||
/// - Everything outside `{...}` is a `Literal` segment.
|
||||
/// - A bare `}` in literal position (depth == 0) is a syntax error.
|
||||
/// - An unclosed `{` is a syntax error.
|
||||
pub fn parse(self, source: &str) -> Result<Vec<(usize, GroupSegment)>, GroupPatternParseError> {
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
// `depth` > 0 means we are currently inside a `{...}` path expression.
|
||||
let mut depth: usize = 0;
|
||||
// Start of the current segment (literal text or path content).
|
||||
let mut window_start: usize = 0;
|
||||
// Source position of the opening `{` for the current path (used for error reporting).
|
||||
let mut open_brace: usize = 0;
|
||||
|
||||
for (i, c) in source.char_indices() {
|
||||
match c {
|
||||
'{' => {
|
||||
if depth == 0 {
|
||||
// Emit any accumulated literal.
|
||||
if i > window_start {
|
||||
tokens.push((
|
||||
window_start,
|
||||
GroupSegment::Literal(source[window_start..i].into()),
|
||||
));
|
||||
}
|
||||
open_brace = i;
|
||||
// Path content starts after the opening brace.
|
||||
window_start = i + 1;
|
||||
depth = 1;
|
||||
} else {
|
||||
// Nested brace inside a path — keep counting.
|
||||
depth += 1;
|
||||
}
|
||||
}
|
||||
|
||||
'}' => {
|
||||
if depth == 0 {
|
||||
// Unmatched `}` outside any path.
|
||||
return Err(GroupPatternParseError::Syntax { position: i });
|
||||
}
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
// Closing brace of the outermost path expression — parse as ObjectPath.
|
||||
let path_str = &source[window_start..i];
|
||||
let path = path_str.parse::<ObjectPath>().map_err(|e| {
|
||||
GroupPatternParseError::InvalidObjectPath {
|
||||
start: open_brace,
|
||||
end: i + 1,
|
||||
path: path_str.into(),
|
||||
source: e,
|
||||
}
|
||||
})?;
|
||||
tokens.push((open_brace, GroupSegment::Path(path)));
|
||||
// Literal content (if any) starts after this `}`.
|
||||
window_start = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Unclosed `{`.
|
||||
if depth > 0 {
|
||||
return Err(GroupPatternParseError::Syntax {
|
||||
position: open_brace,
|
||||
});
|
||||
}
|
||||
|
||||
// Emit any trailing literal.
|
||||
if window_start < source.len() {
|
||||
tokens.push((
|
||||
window_start,
|
||||
GroupSegment::Literal(source[window_start..].into()),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: tests
|
||||
//
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse(source: &str) -> Result<Vec<(usize, GroupSegment)>, GroupPatternParseError> {
|
||||
Parser::new().parse(source)
|
||||
}
|
||||
|
||||
fn path(s: &str) -> GroupSegment {
|
||||
GroupSegment::Path(s.parse().unwrap())
|
||||
}
|
||||
|
||||
fn lit(s: &str) -> GroupSegment {
|
||||
GroupSegment::Literal(s.into())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_path() {
|
||||
assert_eq!(parse("{$.foo}").unwrap(), vec![(0, path("$.foo"))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_literal() {
|
||||
assert_eq!(parse("hello").unwrap(), vec![(0, lit("hello"))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_then_literal() {
|
||||
assert_eq!(
|
||||
parse("{$.foo}.txt").unwrap(),
|
||||
vec![(0, path("$.foo")), (7, lit(".txt"))]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn literal_then_path() {
|
||||
assert_eq!(
|
||||
parse("prefix/{$.foo}").unwrap(),
|
||||
vec![(0, lit("prefix/")), (7, path("$.foo"))]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn interleaved() {
|
||||
assert_eq!(
|
||||
parse("{$.a}.sep.{$.b}").unwrap(),
|
||||
vec![(0, path("$.a")), (5, lit(".sep.")), (10, path("$.b")),]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unmatched_open_brace_error() {
|
||||
assert_eq!(
|
||||
parse("{$.foo"),
|
||||
Err(GroupPatternParseError::Syntax { position: 0 })
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unmatched_close_brace_in_literal_error() {
|
||||
assert_eq!(
|
||||
parse("foo}bar"),
|
||||
Err(GroupPatternParseError::Syntax { position: 3 })
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_path_error() {
|
||||
assert_eq!(
|
||||
parse("{not-a-path}"),
|
||||
Err(GroupPatternParseError::InvalidObjectPath {
|
||||
start: 0,
|
||||
end: 12,
|
||||
path: "not-a-path".into(),
|
||||
source: crate::objectpath::PathParseError::MustStartWithRoot { position: 0 },
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn literal_between_paths() {
|
||||
assert_eq!(
|
||||
parse("foo{$.x}bar").unwrap(),
|
||||
vec![(0, lit("foo")), (3, path("$.x")), (8, lit("bar")),]
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user