Upgrade parser

This commit is contained in:
2025-05-03 16:40:17 -07:00
parent 00c88ccf51
commit b8302d3381
4 changed files with 442 additions and 31 deletions

View File

@ -13,20 +13,14 @@ pub mod tool;
pub mod util;
// enumerate files with a spinner (count size)
// warn if links and follow
// trim everything
// parallelism
// input from stdin?
// * ** greed
// fix and document "**.flac", "**/*.flac"
// tests
// show progress
// bash before and after
// capture/print stdout/stderr
// workdir vs root?
//
// Tools:
// - *** bash
// - * list
// - *** rename
// - ** typst
@ -116,11 +110,13 @@ fn main_inner() -> Result<ExitCode> {
// MARK: rules
//
let rules = manifest
.rules
.iter()
.map(|rule| (rule.regex(), rule.tasks))
.collect::<Vec<_>>();
let rules = {
let mut rules = Vec::new();
for rule in &manifest.rules {
rules.push((rule.regex()?, rule.tasks));
}
rules
};
let source_path = std::path::absolute(&work_dir)?;
let walker = WalkDir::new(&source_path).follow_links(manifest.config.follow_links);

View File

@ -1,8 +1,9 @@
use anyhow::Result;
use anyhow::{Result, bail};
use indexmap::IndexMap;
use regex::Regex;
use serde::Deserialize;
use std::path::{Path, PathBuf};
use tracing::warn;
use crate::tool::ToolConfig;
@ -146,24 +147,155 @@ pub struct FlatPickRule {
pub tasks: Vec<String>,
}
#[derive(Debug)]
enum RegexSegment {
/// A single segment
Single(String),
/// An optional doublestar segment
DoubleStar,
}
impl RegexSegment {
/// Returns the regex pattern of this part,
/// prefixed with a /.
fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String {
match (prev, self, next) {
// Consecutive single segments need a trailing slash
(_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"),
// Terminal single segments don't need a trailing slash
(_, Self::Single(x), None) => x.to_owned(),
// Neighboring doublestar is always responsible for slashes
(_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(),
// [^/]+ is a "segment" (a block of non-slash chars)
// The "base" doublestar pattern is a segment
// followed by zero or more segments prefixed by a slash.
//
// No additional slashes
(None, Self::DoubleStar, None) => "((?:[^/]+(?:[/][^/]+)*)?)".into(),
// Doublestars cannot be neighbors
(_, Self::DoubleStar, Some(Self::DoubleStar))
| (Some(Self::DoubleStar), Self::DoubleStar, _) => {
unreachable!("consecutive doublestars must be reduced")
}
// Leading slash
(Some(Self::Single(_)), Self::DoubleStar, None) => {
"((?:[/][^/]+(?:[/][^/]+)*)?)".into()
}
// Trailing slash
(None, Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[^/]+(?:[/][^/]+)*[/])?)".into()
}
// Leading and trailing slash.
// Also, replace self with a [/] when empty.
(Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[/][^/]+(?:[/][^/]+)*[/])|[/])".into()
}
}
}
}
impl FlatPickRule {
pub fn regex(&self) -> Regex {
pub fn regex(&self) -> Result<Regex> {
// Flatten pattern
// Double slashes are handled later
let pattern = self.patterns.join("/");
if pattern.ends_with("/") {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
}
if pattern.starts_with("/") {
warn!("Pattern `{pattern}` has a leading slash which will be ignored")
}
// Split on slashes or stars
// This is a lot like .split("/"), but handles
// the edge case where ** is not delimited by slashes
// (`root**test` is equivalent to `root/**/test`)
let segments = {
#[expect(clippy::unwrap_used)]
let re = Regex::new("[*]{2,}|[/]").unwrap();
let split = re.find_iter(&pattern);
let bounds = split
.into_iter()
.flat_map(|x| {
let r = x.range();
let a = r.start;
let b = r.end;
[a, b]
})
.chain([pattern.len()])
.collect::<Vec<_>>();
let mut parts = Vec::new();
let mut last = 0;
for next in bounds {
let seg = &pattern[last..next];
// Consecutive slashes are identical to a single slash
if seg != "/" && !seg.is_empty() {
parts.push(seg);
}
last = next;
}
parts
};
let mut rebuilt_segments = Vec::new();
let mut last_was_doublestar = false;
for segment in segments {
// This is a wilcard regex
// (**, ***, etc)
if segment.len() > 1 && segment.chars().all(|x| x == '*') {
match segment {
"**" => {
// Consecutive doublestars are meaningless
if !last_was_doublestar {
rebuilt_segments.push(RegexSegment::DoubleStar);
}
last_was_doublestar = true;
}
_ => bail!("Invalid wildcard `{segment}`"),
}
continue;
}
last_was_doublestar = false;
let parts = segment.split("*").collect::<Vec<_>>();
let mut rebuilt = String::new();
for (i, part) in parts.into_iter().enumerate() {
if i != 0 {
rebuilt.push_str("([^/]*)")
}
rebuilt.push_str(&regex::escape(part));
}
rebuilt_segments.push(RegexSegment::Single(rebuilt));
}
let mut re_built = String::new();
let mut prev = None;
for (i, seg) in rebuilt_segments.iter().enumerate() {
let next = rebuilt_segments.get(i + 1);
re_built.push_str(&seg.to_regex_part(prev, next));
prev = Some(seg);
}
let re_built = format!("^{re_built}$");
// This regex should always be valid
#[expect(clippy::unwrap_used)]
Regex::new(
&self
.patterns
.join("/")
.split("/")
.map(|x| match x {
"**" => "((:?[^/]+)*)".to_owned(),
"*" => "([^/]+)".to_owned(),
x => regex::escape(x),
})
.collect::<Vec<_>>()
.join("/"),
)
.unwrap()
Ok(Regex::new(&re_built).unwrap())
}
}
@ -363,4 +495,287 @@ mod tests {
let result = toml::from_str::<TestManifest>(toml_str);
assert!(result.is_err());
}
#[test]
fn pattern_simple() {
let rule = FlatPickRule {
patterns: vec!["file.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("other.txt"));
assert!(!regex.is_match("path/file.txt"));
}
#[test]
fn pattern_with_path() {
let rule = FlatPickRule {
patterns: vec!["dir".to_string(), "file.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("file.txt"));
assert!(!regex.is_match("other/file.txt"));
}
#[test]
fn pattern_wildcard_simple() {
let rule = FlatPickRule {
patterns: vec!["*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("other.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("nested/file.txt"));
}
#[test]
fn pattern_doublestar() {
let rule = FlatPickRule {
patterns: vec!["**".to_string(), "*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn pattern_doublestar_consecutive() {
let rule = FlatPickRule {
patterns: vec![
"**".to_string(),
"**".to_string(),
"**".to_string(),
"*.txt".to_string(),
],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn pattern_wildcard_double_slash() {
let rule = FlatPickRule {
patterns: vec!["**/*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn pattern_single_dual() {
let rule = FlatPickRule {
patterns: vec!["**/*a*".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("fileafile"));
assert!(regex.is_match("dir/fileafile"));
assert!(regex.is_match("filea"));
assert!(regex.is_match("dir/filea"));
assert!(regex.is_match("afile"));
assert!(regex.is_match("dir/afile"));
assert!(!regex.is_match("noletter"));
assert!(!regex.is_match("dir/noletter"));
}
#[test]
fn pattern_single_end() {
let rule = FlatPickRule {
patterns: vec!["**/*".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file"));
assert!(regex.is_match("dir/file"));
}
#[test]
fn pattern_double_end() {
let rule = FlatPickRule {
patterns: vec!["root/**".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn pattern_double_start() {
let rule = FlatPickRule {
patterns: vec!["**/dir".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir"));
assert!(regex.is_match("a/b/dir"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn pattern_double_adjacent_before() {
let rule = FlatPickRule {
// equivalent to root/**/test
patterns: vec!["root/**test".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/a/test"));
assert!(regex.is_match("root/a/b/c/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/xxtest"));
}
#[test]
fn pattern_double_adjacent_after() {
let rule = FlatPickRule {
// equivalent to root/test/**
patterns: vec!["root/test**".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/test/a"));
assert!(regex.is_match("root/test/a/b/c"));
assert!(!regex.is_match("root/testxx"));
assert!(!regex.is_match("root/file"));
}
#[test]
fn pattern_bad_any_extension() {
let rule = FlatPickRule {
// equivalent to root/test/**
patterns: vec!["**.flac".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(!regex.is_match("root/test.flac"));
assert!(!regex.is_match("test.flac"));
assert!(!regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn pattern_good_any_extension() {
let rule = FlatPickRule {
// equivalent to root/test/**
patterns: vec!["**/*.flac".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(regex.is_match("root/test.flac"));
assert!(regex.is_match("test.flac"));
assert!(regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn pattern_double_adjacent_between() {
let rule = FlatPickRule {
// equivalent to root/test/**/file
patterns: vec!["root/test**file".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/test/a/b/c/file"));
assert!(!regex.is_match("root/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/testfile"));
assert!(!regex.is_match("root/testxxfile"));
}
#[test]
fn pattern_double_slashes() {
let rule = FlatPickRule {
patterns: vec!["dir//file.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("dirfile.txt"));
assert!(!regex.is_match("dir/other.txt"));
}
#[test]
fn pattern_double_slash() {
let rule = FlatPickRule {
patterns: vec!["a///b////c.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("a/b/c.txt"));
assert!(!regex.is_match("abc.txt"));
assert!(!regex.is_match("a/b/d.txt"));
}
#[test]
fn pattern_double_slash_wildcards() {
let rule = FlatPickRule {
patterns: vec!["**///*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
}
#[test]
fn pattern_slashes_around_wildcards() {
let rule = FlatPickRule {
patterns: vec!["dir//**//*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(regex.is_match("dir/sub1/sub2/file.txt"));
assert!(!regex.is_match("other/sub/file.txt"));
assert!(!regex.is_match("dir/file.jpg"));
}
}

View File

@ -2,7 +2,7 @@ use anyhow::{Context, Result};
use serde::Deserialize;
use std::io::Write;
use std::{collections::HashMap, path::Path};
use tracing::{error, trace, warn};
use tracing::{debug, error, trace, warn};
use crate::manifest::PickConfig;
@ -31,7 +31,7 @@ impl PickTool for ToolBash {
}
Some(script) => {
trace!("Running `before` script");
debug!("Running `before` script");
let mut temp_file =
tempfile::NamedTempFile::new().context("while creating temporary script")?;
writeln!(temp_file, "{}", script).context("while creating temporary script")?;
@ -79,7 +79,7 @@ impl PickTool for ToolBash {
}
Some(script) => {
trace!("Running `after` script");
debug!("Running `after` script");
let mut temp_file =
tempfile::NamedTempFile::new().context("while creating temporary script")?;
writeln!(temp_file, "{}", script).context("while creating temporary script")?;

View File

@ -53,7 +53,7 @@ ffmpeg \
# All rules are matched against the FULL PATH of files.
# Directories are ignored.
[[rules]]
"**" = "test"
"**.flac" = "test"
[[rules]]
"**" = ""