Add regex extractor
This commit is contained in:
@@ -21,7 +21,52 @@ impl Tokenizer {
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
let mut window_start = None;
|
||||
// Paren depth: while > 0, `.` / `[` / `]` / `$` are part of the ident.
|
||||
let mut paren_depth: usize = 0;
|
||||
// When true, the current char is escaped by a preceding `\` and is
|
||||
// treated as a plain ident character with no special meaning.
|
||||
let mut skip_next = false;
|
||||
|
||||
for (i, c) in source.char_indices() {
|
||||
if skip_next {
|
||||
skip_next = false;
|
||||
// Escaped char: just extend the ident window (already opened by `\`).
|
||||
continue;
|
||||
}
|
||||
|
||||
if c == '\\' {
|
||||
if window_start.is_none() {
|
||||
window_start = Some(i);
|
||||
}
|
||||
skip_next = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if paren_depth > 0 {
|
||||
// Inside parens: only track depth changes, everything else is ident.
|
||||
match c {
|
||||
'(' => {
|
||||
if window_start.is_none() {
|
||||
window_start = Some(i);
|
||||
}
|
||||
paren_depth += 1;
|
||||
}
|
||||
')' => {
|
||||
if window_start.is_none() {
|
||||
window_start = Some(i);
|
||||
}
|
||||
paren_depth -= 1;
|
||||
}
|
||||
x if x.is_ascii() => {
|
||||
if window_start.is_none() {
|
||||
window_start = Some(i);
|
||||
}
|
||||
}
|
||||
char => return Err(PathParseError::NonAsciiChar { position: i, char }),
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
match c {
|
||||
'$' => {
|
||||
if let Some(s) = window_start.take() {
|
||||
@@ -51,10 +96,26 @@ impl Tokenizer {
|
||||
tokens.push((i, Token::SqbClose));
|
||||
}
|
||||
|
||||
x if x.is_ascii() => match window_start {
|
||||
None => window_start = Some(i),
|
||||
Some(_) => continue,
|
||||
},
|
||||
'(' => {
|
||||
if window_start.is_none() {
|
||||
window_start = Some(i);
|
||||
}
|
||||
paren_depth += 1;
|
||||
}
|
||||
|
||||
')' => {
|
||||
if window_start.is_none() {
|
||||
window_start = Some(i);
|
||||
}
|
||||
// paren_depth is 0 here — stray `)` is an ident char and
|
||||
// parse_field will surface the error later.
|
||||
}
|
||||
|
||||
x if x.is_ascii() => {
|
||||
if window_start.is_none() {
|
||||
window_start = Some(i);
|
||||
}
|
||||
}
|
||||
|
||||
char => return Err(PathParseError::NonAsciiChar { position: i, char }),
|
||||
}
|
||||
|
||||
@@ -122,6 +122,14 @@ mod tests {
|
||||
GroupSegment::Literal(s.into())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regex() {
|
||||
assert_eq!(
|
||||
parse("{$.split(/)[-1].regex((.*).pub \\((.*)\\).pdf)[0]}").unwrap(),
|
||||
vec![(0, path("$.split(/)[-1].regex((.*).pub \\((.*)\\).pdf)[0]"))]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_path() {
|
||||
assert_eq!(parse("{$.foo}").unwrap(), vec![(0, path("$.foo"))]);
|
||||
|
||||
Reference in New Issue
Block a user