diff --git a/src/main.rs b/src/main.rs index 7343cb7..ae32146 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,20 +13,14 @@ pub mod tool; pub mod util; // enumerate files with a spinner (count size) -// warn if links and follow // trim everything // parallelism // input from stdin? -// * ** greed -// fix and document "**.flac", "**/*.flac" -// tests // show progress -// bash before and after // capture/print stdout/stderr // workdir vs root? // // Tools: -// - *** bash // - * list // - *** rename // - ** typst @@ -116,11 +110,13 @@ fn main_inner() -> Result { // MARK: rules // - let rules = manifest - .rules - .iter() - .map(|rule| (rule.regex(), rule.tasks)) - .collect::>(); + let rules = { + let mut rules = Vec::new(); + for rule in &manifest.rules { + rules.push((rule.regex()?, rule.tasks)); + } + rules + }; let source_path = std::path::absolute(&work_dir)?; let walker = WalkDir::new(&source_path).follow_links(manifest.config.follow_links); diff --git a/src/manifest.rs b/src/manifest.rs index 7202c00..a49d16f 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -1,8 +1,9 @@ -use anyhow::Result; +use anyhow::{Result, bail}; use indexmap::IndexMap; use regex::Regex; use serde::Deserialize; use std::path::{Path, PathBuf}; +use tracing::warn; use crate::tool::ToolConfig; @@ -146,24 +147,155 @@ pub struct FlatPickRule { pub tasks: Vec, } +#[derive(Debug)] +enum RegexSegment { + /// A single segment + Single(String), + + /// An optional doublestar segment + DoubleStar, +} + +impl RegexSegment { + /// Returns the regex pattern of this part, + /// prefixed with a /. + fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String { + match (prev, self, next) { + // Consecutive single segments need a trailing slash + (_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"), + + // Terminal single segments don't need a trailing slash + (_, Self::Single(x), None) => x.to_owned(), + + // Neighboring doublestar is always responsible for slashes + (_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(), + + // [^/]+ is a "segment" (a block of non-slash chars) + // The "base" doublestar pattern is a segment + // followed by zero or more segments prefixed by a slash. + // + // No additional slashes + (None, Self::DoubleStar, None) => "((?:[^/]+(?:[/][^/]+)*)?)".into(), + + // Doublestars cannot be neighbors + (_, Self::DoubleStar, Some(Self::DoubleStar)) + | (Some(Self::DoubleStar), Self::DoubleStar, _) => { + unreachable!("consecutive doublestars must be reduced") + } + + // Leading slash + (Some(Self::Single(_)), Self::DoubleStar, None) => { + "((?:[/][^/]+(?:[/][^/]+)*)?)".into() + } + + // Trailing slash + (None, Self::DoubleStar, Some(Self::Single(_))) => { + "((?:[^/]+(?:[/][^/]+)*[/])?)".into() + } + + // Leading and trailing slash. + // Also, replace self with a [/] when empty. + (Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => { + "((?:[/][^/]+(?:[/][^/]+)*[/])|[/])".into() + } + } + } +} + impl FlatPickRule { - pub fn regex(&self) -> Regex { + pub fn regex(&self) -> Result { + // Flatten pattern + // Double slashes are handled later + let pattern = self.patterns.join("/"); + + if pattern.ends_with("/") { + warn!("Pattern `{pattern}` has a trailing slash which will be ignored") + } + + if pattern.starts_with("/") { + warn!("Pattern `{pattern}` has a leading slash which will be ignored") + } + + // Split on slashes or stars + // This is a lot like .split("/"), but handles + // the edge case where ** is not delimited by slashes + // (`root**test` is equivalent to `root/**/test`) + let segments = { + #[expect(clippy::unwrap_used)] + let re = Regex::new("[*]{2,}|[/]").unwrap(); + let split = re.find_iter(&pattern); + + let bounds = split + .into_iter() + .flat_map(|x| { + let r = x.range(); + let a = r.start; + let b = r.end; + [a, b] + }) + .chain([pattern.len()]) + .collect::>(); + + let mut parts = Vec::new(); + let mut last = 0; + for next in bounds { + let seg = &pattern[last..next]; + // Consecutive slashes are identical to a single slash + if seg != "/" && !seg.is_empty() { + parts.push(seg); + } + last = next; + } + + parts + }; + + let mut rebuilt_segments = Vec::new(); + let mut last_was_doublestar = false; + for segment in segments { + // This is a wilcard regex + // (**, ***, etc) + if segment.len() > 1 && segment.chars().all(|x| x == '*') { + match segment { + "**" => { + // Consecutive doublestars are meaningless + if !last_was_doublestar { + rebuilt_segments.push(RegexSegment::DoubleStar); + } + last_was_doublestar = true; + } + _ => bail!("Invalid wildcard `{segment}`"), + } + continue; + } + last_was_doublestar = false; + + let parts = segment.split("*").collect::>(); + + let mut rebuilt = String::new(); + for (i, part) in parts.into_iter().enumerate() { + if i != 0 { + rebuilt.push_str("([^/]*)") + } + + rebuilt.push_str(®ex::escape(part)); + } + + rebuilt_segments.push(RegexSegment::Single(rebuilt)); + } + + let mut re_built = String::new(); + let mut prev = None; + for (i, seg) in rebuilt_segments.iter().enumerate() { + let next = rebuilt_segments.get(i + 1); + re_built.push_str(&seg.to_regex_part(prev, next)); + prev = Some(seg); + } + + let re_built = format!("^{re_built}$"); // This regex should always be valid #[expect(clippy::unwrap_used)] - Regex::new( - &self - .patterns - .join("/") - .split("/") - .map(|x| match x { - "**" => "((:?[^/]+)*)".to_owned(), - "*" => "([^/]+)".to_owned(), - x => regex::escape(x), - }) - .collect::>() - .join("/"), - ) - .unwrap() + Ok(Regex::new(&re_built).unwrap()) } } @@ -363,4 +495,287 @@ mod tests { let result = toml::from_str::(toml_str); assert!(result.is_err()); } + + #[test] + fn pattern_simple() { + let rule = FlatPickRule { + patterns: vec!["file.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("file.txt")); + assert!(!regex.is_match("other.txt")); + assert!(!regex.is_match("path/file.txt")); + } + + #[test] + fn pattern_with_path() { + let rule = FlatPickRule { + patterns: vec!["dir".to_string(), "file.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("dir/file.txt")); + assert!(!regex.is_match("file.txt")); + assert!(!regex.is_match("other/file.txt")); + } + + #[test] + fn pattern_wildcard_simple() { + let rule = FlatPickRule { + patterns: vec!["*.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("file.txt")); + assert!(regex.is_match("other.txt")); + assert!(!regex.is_match("file.jpg")); + assert!(!regex.is_match("nested/file.txt")); + } + + #[test] + fn pattern_doublestar() { + let rule = FlatPickRule { + patterns: vec!["**".to_string(), "*.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("file.txt")); + assert!(regex.is_match("dir/file.txt")); + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(!regex.is_match("file.jpg")); + assert!(!regex.is_match("dir/file.jpg")); + } + + #[test] + fn pattern_doublestar_consecutive() { + let rule = FlatPickRule { + patterns: vec![ + "**".to_string(), + "**".to_string(), + "**".to_string(), + "*.txt".to_string(), + ], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("file.txt")); + assert!(regex.is_match("dir/file.txt")); + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(!regex.is_match("file.jpg")); + assert!(!regex.is_match("dir/file.jpg")); + } + + #[test] + fn pattern_wildcard_double_slash() { + let rule = FlatPickRule { + patterns: vec!["**/*.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("file.txt")); + assert!(regex.is_match("dir/file.txt")); + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(!regex.is_match("file.jpg")); + assert!(!regex.is_match("dir/file.jpg")); + } + + #[test] + fn pattern_single_dual() { + let rule = FlatPickRule { + patterns: vec!["**/*a*".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("fileafile")); + assert!(regex.is_match("dir/fileafile")); + assert!(regex.is_match("filea")); + assert!(regex.is_match("dir/filea")); + assert!(regex.is_match("afile")); + assert!(regex.is_match("dir/afile")); + assert!(!regex.is_match("noletter")); + assert!(!regex.is_match("dir/noletter")); + } + + #[test] + fn pattern_single_end() { + let rule = FlatPickRule { + patterns: vec!["**/*".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("file")); + assert!(regex.is_match("dir/file")); + } + + #[test] + fn pattern_double_end() { + let rule = FlatPickRule { + patterns: vec!["root/**".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("root/file")); + assert!(!regex.is_match("dir/file")); + } + + #[test] + fn pattern_double_start() { + let rule = FlatPickRule { + patterns: vec!["**/dir".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("dir")); + assert!(regex.is_match("a/b/dir")); + assert!(!regex.is_match("dir/file")); + } + + #[test] + fn pattern_double_adjacent_before() { + let rule = FlatPickRule { + // equivalent to root/**/test + patterns: vec!["root/**test".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("root/test")); + assert!(regex.is_match("root/a/test")); + assert!(regex.is_match("root/a/b/c/test")); + assert!(!regex.is_match("root/file")); + assert!(!regex.is_match("root/xxtest")); + } + + #[test] + fn pattern_double_adjacent_after() { + let rule = FlatPickRule { + // equivalent to root/test/** + patterns: vec!["root/test**".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("root/test")); + assert!(regex.is_match("root/test/a")); + assert!(regex.is_match("root/test/a/b/c")); + assert!(!regex.is_match("root/testxx")); + assert!(!regex.is_match("root/file")); + } + + #[test] + fn pattern_bad_any_extension() { + let rule = FlatPickRule { + // equivalent to root/test/** + patterns: vec!["**.flac".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("root/.flac")); + assert!(regex.is_match("root/a/.flac")); + assert!(!regex.is_match("root/test.flac")); + assert!(!regex.is_match("test.flac")); + assert!(!regex.is_match("root/test/a/b/c.flac")); + assert!(!regex.is_match("root/testflac")); + assert!(!regex.is_match("test.mp3")); + } + + #[test] + fn pattern_good_any_extension() { + let rule = FlatPickRule { + // equivalent to root/test/** + patterns: vec!["**/*.flac".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("root/.flac")); + assert!(regex.is_match("root/a/.flac")); + assert!(regex.is_match("root/test.flac")); + assert!(regex.is_match("test.flac")); + assert!(regex.is_match("root/test/a/b/c.flac")); + assert!(!regex.is_match("root/testflac")); + assert!(!regex.is_match("test.mp3")); + } + + #[test] + fn pattern_double_adjacent_between() { + let rule = FlatPickRule { + // equivalent to root/test/**/file + patterns: vec!["root/test**file".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("root/test/file")); + assert!(regex.is_match("root/test/a/b/c/file")); + assert!(!regex.is_match("root/test")); + assert!(!regex.is_match("root/file")); + assert!(!regex.is_match("root/testfile")); + assert!(!regex.is_match("root/testxxfile")); + } + + #[test] + fn pattern_double_slashes() { + let rule = FlatPickRule { + patterns: vec!["dir//file.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("dir/file.txt")); + assert!(!regex.is_match("dirfile.txt")); + assert!(!regex.is_match("dir/other.txt")); + } + + #[test] + fn pattern_double_slash() { + let rule = FlatPickRule { + patterns: vec!["a///b////c.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("a/b/c.txt")); + assert!(!regex.is_match("abc.txt")); + assert!(!regex.is_match("a/b/d.txt")); + } + + #[test] + fn pattern_double_slash_wildcards() { + let rule = FlatPickRule { + patterns: vec!["**///*.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("dir/file.txt")); + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(!regex.is_match("file.jpg")); + } + + #[test] + fn pattern_slashes_around_wildcards() { + let rule = FlatPickRule { + patterns: vec!["dir//**//*.txt".to_string()], + tasks: vec!["copy".to_string()], + }; + + let regex = rule.regex().unwrap(); + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(regex.is_match("dir/sub1/sub2/file.txt")); + assert!(!regex.is_match("other/sub/file.txt")); + assert!(!regex.is_match("dir/file.jpg")); + } } diff --git a/src/tool/bash.rs b/src/tool/bash.rs index a1f76ca..9858392 100644 --- a/src/tool/bash.rs +++ b/src/tool/bash.rs @@ -2,7 +2,7 @@ use anyhow::{Context, Result}; use serde::Deserialize; use std::io::Write; use std::{collections::HashMap, path::Path}; -use tracing::{error, trace, warn}; +use tracing::{debug, error, trace, warn}; use crate::manifest::PickConfig; @@ -31,7 +31,7 @@ impl PickTool for ToolBash { } Some(script) => { - trace!("Running `before` script"); + debug!("Running `before` script"); let mut temp_file = tempfile::NamedTempFile::new().context("while creating temporary script")?; writeln!(temp_file, "{}", script).context("while creating temporary script")?; @@ -79,7 +79,7 @@ impl PickTool for ToolBash { } Some(script) => { - trace!("Running `after` script"); + debug!("Running `after` script"); let mut temp_file = tempfile::NamedTempFile::new().context("while creating temporary script")?; writeln!(temp_file, "{}", script).context("while creating temporary script")?; diff --git a/test.toml b/test.toml index 3c41598..abd4179 100644 --- a/test.toml +++ b/test.toml @@ -53,7 +53,7 @@ ffmpeg \ # All rules are matched against the FULL PATH of files. # Directories are ignored. [[rules]] -"**" = "test" +"**.flac" = "test" [[rules]] "**" = ""