Refactor
All checks were successful
CI / Typos (push) Successful in 8s
CI / Clippy (push) Successful in 54s
CI / Build and test (push) Successful in 52s

This commit is contained in:
2025-05-03 16:42:33 -07:00
parent b8302d3381
commit 64f45a443c
17 changed files with 1229 additions and 996 deletions

2
src/manifest/mod.rs Normal file
View File

@ -0,0 +1,2 @@
pub mod rule;
pub mod types;

357
src/manifest/rule.rs Normal file
View File

@ -0,0 +1,357 @@
use anyhow::{Result, bail};
use regex::Regex;
use tracing::warn;
#[derive(Debug)]
enum RegexSegment {
/// A single segment
Single(String),
/// An optional doublestar segment
DoubleStar,
}
impl RegexSegment {
/// Returns the regex pattern of this part,
/// prefixed with a /.
fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String {
match (prev, self, next) {
// Consecutive single segments need a trailing slash
(_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"),
// Terminal single segments don't need a trailing slash
(_, Self::Single(x), None) => x.to_owned(),
// Neighboring doublestar is always responsible for slashes
(_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(),
// [^/]+ is a "segment" (a block of non-slash chars)
// The "base" doublestar pattern is a segment
// followed by zero or more segments prefixed by a slash.
//
// No additional slashes
(None, Self::DoubleStar, None) => "((?:[^/]+(?:[/][^/]+)*)?)".into(),
// Doublestars cannot be neighbors
(_, Self::DoubleStar, Some(Self::DoubleStar))
| (Some(Self::DoubleStar), Self::DoubleStar, _) => {
unreachable!("consecutive doublestars must be reduced")
}
// Leading slash
(Some(Self::Single(_)), Self::DoubleStar, None) => {
"((?:[/][^/]+(?:[/][^/]+)*)?)".into()
}
// Trailing slash
(None, Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[^/]+(?:[/][^/]+)*[/])?)".into()
}
// Leading and trailing slash.
// Also, replace self with a [/] when empty.
(Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[/][^/]+(?:[/][^/]+)*[/])|[/])".into()
}
}
}
}
#[derive(Debug, Clone)]
pub struct FlatPickRule {
pub patterns: Vec<String>,
pub tasks: Vec<String>,
}
impl FlatPickRule {
pub fn regex(&self) -> Result<Regex> {
// Flatten pattern
// Double slashes are handled later
let pattern = self.patterns.join("/");
if pattern.ends_with("/") {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
}
if pattern.starts_with("/") {
warn!("Pattern `{pattern}` has a leading slash which will be ignored")
}
// Split on slashes or stars
// This is a lot like .split("/"), but handles
// the edge case where ** is not delimited by slashes
// (`root**test` is equivalent to `root/**/test`)
let segments = {
#[expect(clippy::unwrap_used)]
let re = Regex::new("[*]{2,}|[/]").unwrap();
let split = re.find_iter(&pattern);
let bounds = split
.into_iter()
.flat_map(|x| {
let r = x.range();
let a = r.start;
let b = r.end;
[a, b]
})
.chain([pattern.len()])
.collect::<Vec<_>>();
let mut parts = Vec::new();
let mut last = 0;
for next in bounds {
let seg = &pattern[last..next];
// Consecutive slashes are identical to a single slash
if seg != "/" && !seg.is_empty() {
parts.push(seg);
}
last = next;
}
parts
};
let mut rebuilt_segments = Vec::new();
let mut last_was_doublestar = false;
for segment in segments {
// This is a wildcard regex
// (**, ***, etc)
if segment.len() > 1 && segment.chars().all(|x| x == '*') {
match segment {
"**" => {
// Consecutive doublestars are meaningless
if !last_was_doublestar {
rebuilt_segments.push(RegexSegment::DoubleStar);
}
last_was_doublestar = true;
}
_ => bail!("Invalid wildcard `{segment}`"),
}
continue;
}
last_was_doublestar = false;
let parts = segment.split("*").collect::<Vec<_>>();
let mut rebuilt = String::new();
for (i, part) in parts.into_iter().enumerate() {
if i != 0 {
rebuilt.push_str("([^/]*)")
}
rebuilt.push_str(&regex::escape(part));
}
rebuilt_segments.push(RegexSegment::Single(rebuilt));
}
let mut re_built = String::new();
let mut prev = None;
for (i, seg) in rebuilt_segments.iter().enumerate() {
let next = rebuilt_segments.get(i + 1);
re_built.push_str(&seg.to_regex_part(prev, next));
prev = Some(seg);
}
let re_built = format!("^{re_built}$");
// This regex should always be valid
#[expect(clippy::unwrap_used)]
Ok(Regex::new(&re_built).unwrap())
}
}
//
// MARK: tests
//
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod tests {
use super::*;
fn rule_regex(pattern: &[&str]) -> Regex {
let rule = FlatPickRule {
patterns: pattern.iter().map(|x| x.to_string()).collect(),
tasks: vec!["task".to_owned()],
};
return rule.regex().unwrap();
}
#[test]
fn simple() {
let regex = rule_regex(&["file.txt"]);
assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("other.txt"));
assert!(!regex.is_match("path/file.txt"));
}
#[test]
fn simple_dir() {
let regex = rule_regex(&["dir", "file.txt"]);
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("file.txt"));
assert!(!regex.is_match("other/file.txt"));
}
#[test]
fn simple_star() {
let regex = rule_regex(&["*.txt"]);
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("other.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("nested/file.txt"));
}
#[test]
fn simple_doublestar() {
let regex = rule_regex(&["**/*.txt"]);
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn consecutive_doublestar() {
let regex = rule_regex(&["**", "**", "**", "*.txt"]);
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn dual_star() {
let regex = rule_regex(&["**/*a*"]);
assert!(regex.is_match("fileafile"));
assert!(regex.is_match("dir/fileafile"));
assert!(regex.is_match("filea"));
assert!(regex.is_match("dir/filea"));
assert!(regex.is_match("afile"));
assert!(regex.is_match("dir/afile"));
assert!(!regex.is_match("noletter"));
assert!(!regex.is_match("dir/noletter"));
}
#[test]
fn single_end() {
let regex = rule_regex(&["**/*"]);
assert!(regex.is_match("file"));
assert!(regex.is_match("dir/file"));
assert!(regex.is_match("a/b/c/dir/file"));
}
#[test]
fn doublestar_end() {
let regex = rule_regex(&["root/**"]);
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn doublestar_start() {
let regex = rule_regex(&["**/dir"]);
assert!(regex.is_match("dir"));
assert!(regex.is_match("a/b/dir"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn doublestar_adjacent_before() {
let regex = rule_regex(&["root/**test"]);
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/a/test"));
assert!(regex.is_match("root/a/b/c/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/xxtest"));
}
#[test]
fn doublestar_adjacent_after() {
let regex = rule_regex(&["root/test**"]);
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/test/a"));
assert!(regex.is_match("root/test/a/b/c"));
assert!(!regex.is_match("root/testxx"));
assert!(!regex.is_match("root/file"));
}
#[test]
fn doublestar_adjacent_middle() {
let regex = rule_regex(&["root/test**file"]);
assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/test/a/b/c/file"));
assert!(!regex.is_match("root/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/testfile"));
assert!(!regex.is_match("root/testxxfile"));
}
#[test]
fn doublestar_bad_extension() {
let regex = rule_regex(&["**.flac"]);
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(!regex.is_match("root/test.flac"));
assert!(!regex.is_match("test.flac"));
assert!(!regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn doublestar_good_extension() {
let regex = rule_regex(&["**/*.flac"]);
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(regex.is_match("root/test.flac"));
assert!(regex.is_match("test.flac"));
assert!(regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn multi_slash_a() {
let regex = rule_regex(&["dir//file.txt"]);
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("dirfile.txt"));
assert!(!regex.is_match("dir/other.txt"));
}
#[test]
fn multi_slash_b() {
let regex = rule_regex(&["**///*.txt"]);
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
}
#[test]
fn multi_slash_c() {
let regex = rule_regex(&["///dir//**//*.txt//"]);
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(regex.is_match("dir/sub1/sub2/file.txt"));
assert!(!regex.is_match("other/sub/file.txt"));
assert!(!regex.is_match("dir/file.jpg"));
}
}

341
src/manifest/types.rs Normal file
View File

@ -0,0 +1,341 @@
use anyhow::Result;
use indexmap::IndexMap;
use serde::Deserialize;
use std::path::{Path, PathBuf};
use crate::tool::ToolConfig;
use super::rule::FlatPickRule;
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Manifest {
pub config: PickConfig,
pub tool: ToolConfig,
pub rules: PickRules,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PickConfig {
#[serde(default)]
pub work_dir: Option<PathBuf>,
#[serde(default = "default_false")]
pub follow_links: bool,
#[serde(default = "default_true")]
pub process_files: bool,
#[serde(default = "default_false")]
pub process_dirs: bool,
#[serde(default = "default_false")]
pub process_links: bool,
}
impl PickConfig {
pub fn work_dir(&self, manifest_path: &Path) -> Result<PathBuf> {
// Parent directory should always exist since manifest is a file.
#[expect(clippy::unwrap_used)]
let p = manifest_path.parent().unwrap().to_path_buf();
match &self.work_dir {
None => Ok(p),
Some(path) => {
if path.is_absolute() {
Ok(path.to_owned())
} else {
Ok(std::path::absolute(p.join(path))?)
}
}
}
}
}
fn default_true() -> bool {
true
}
fn default_false() -> bool {
false
}
//
// MARK: rules
//
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum OptVec<T: Clone> {
Single(T),
Vec(Vec<T>),
}
impl<T: Clone> OptVec<T> {
pub fn len(&self) -> usize {
match self {
Self::Single(_) => 1,
Self::Vec(v) => v.len(),
}
}
pub fn is_empty(&self) -> bool {
match self {
Self::Single(_) => false,
Self::Vec(v) => v.is_empty(),
}
}
pub fn get(&self, idx: usize) -> Option<&T> {
match self {
Self::Single(t) => (idx == 0).then_some(t),
Self::Vec(v) => v.get(idx),
}
}
}
impl<T: Clone> From<OptVec<T>> for Vec<T> {
fn from(val: OptVec<T>) -> Self {
match val {
OptVec::Single(t) => vec![t],
OptVec::Vec(v) => v,
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
#[serde(deny_unknown_fields)]
pub enum PickRule {
Plain(OptVec<String>),
Nested(PickRules),
}
#[derive(Debug, Clone, Deserialize)]
#[serde(transparent)]
pub struct PickRules(OptVec<IndexMap<String, PickRule>>);
impl PickRules {
pub fn iter(&self) -> PickRuleIterator<'_> {
PickRuleIterator {
stack: vec![PickRuleIterState {
rules: self,
map_index: 0,
entry_index: 0,
prefix: Vec::new(),
}],
}
}
}
impl<'a> IntoIterator for &'a PickRules {
type Item = FlatPickRule;
type IntoIter = PickRuleIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
//
// MARK: rule iterator
//
struct PickRuleIterState<'a> {
rules: &'a PickRules,
map_index: usize,
entry_index: usize,
prefix: Vec<String>,
}
pub struct PickRuleIterator<'a> {
stack: Vec<PickRuleIterState<'a>>,
}
impl Iterator for PickRuleIterator<'_> {
type Item = FlatPickRule;
fn next(&mut self) -> Option<Self::Item> {
if self.stack.is_empty() {
return None;
}
#[expect(clippy::unwrap_used)]
let current = self.stack.last_mut().unwrap();
if current.map_index >= current.rules.0.len() {
self.stack.pop();
return self.next();
}
#[expect(clippy::unwrap_used)]
let current_map = &current.rules.0.get(current.map_index).unwrap();
if current.entry_index >= current_map.len() {
current.map_index += 1;
current.entry_index = 0;
return self.next();
}
#[expect(clippy::unwrap_used)]
let (key, value) = current_map.get_index(current.entry_index).unwrap();
current.entry_index += 1;
match value {
PickRule::Plain(task) => {
let mut patterns = current.prefix.clone();
patterns.push(key.to_string());
Some(FlatPickRule {
patterns,
tasks: task.clone().into(),
})
}
PickRule::Nested(nested_rules) => {
let mut prefix = current.prefix.clone();
prefix.push(key.to_string());
self.stack.push(PickRuleIterState {
rules: nested_rules,
map_index: 0,
entry_index: 0,
prefix,
});
self.next()
}
}
}
}
//
// MARK: tests
//
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod tests {
use super::*;
#[derive(Debug, Clone, Deserialize)]
struct TestManifest {
rules: PickRules,
}
#[test]
fn rule_ordering_preserved() {
let toml_str = r#"
[[rules]]
"third" = "c"
"first" = "a"
"second" = "b"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 3);
assert_eq!(rules[0].patterns, vec!["third"]);
assert_eq!(rules[0].tasks, vec!["c"]);
assert_eq!(rules[1].patterns, vec!["first"]);
assert_eq!(rules[1].tasks, vec!["a"]);
assert_eq!(rules[2].patterns, vec!["second"]);
assert_eq!(rules[2].tasks, vec!["b"]);
}
#[test]
fn nested_rules_order() {
let toml_str = r#"
[[rules]]
"a" = "task_a"
"b" = "task_b"
[[rules."nested"]]
"c" = "task_c"
"d" = "task_d"
[[rules]]
"e" = "task_e"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 5);
assert_eq!(rules[0].patterns, vec!["a"]);
assert_eq!(rules[0].tasks, vec!["task_a"]);
assert_eq!(rules[1].patterns, vec!["b"]);
assert_eq!(rules[1].tasks, vec!["task_b"]);
assert_eq!(rules[2].patterns, vec!["nested", "c"]);
assert_eq!(rules[2].tasks, vec!["task_c"]);
assert_eq!(rules[3].patterns, vec!["nested", "d"]);
assert_eq!(rules[3].tasks, vec!["task_d"]);
assert_eq!(rules[4].patterns, vec!["e"]);
assert_eq!(rules[4].tasks, vec!["task_e"]);
}
#[test]
fn deeply_nested_rules() {
let toml_str = r#"
[[rules."a"."b"."c"]]
"d" = "task_d"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].patterns, vec!["a", "b", "c", "d"]);
assert_eq!(rules[0].tasks, vec!["task_d"]);
}
#[test]
fn multiple_maps_same_level() {
let toml_str = r#"
[[rules]]
"a1" = "copy"
"a2" = "ignore"
[[rules]]
"b1" = "copy"
"b2" = "ignore"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 4);
assert_eq!(rules[0].patterns, vec!["a1"]);
assert_eq!(rules[0].tasks, vec!["copy"]);
assert_eq!(rules[1].patterns, vec!["a2"]);
assert_eq!(rules[1].tasks, vec!["ignore"]);
assert_eq!(rules[2].patterns, vec!["b1"]);
assert_eq!(rules[2].tasks, vec!["copy"]);
assert_eq!(rules[3].patterns, vec!["b2"]);
assert_eq!(rules[3].tasks, vec!["ignore"]);
}
#[test]
fn empty_rules_list() {
let toml_str = "
[[rules]]
";
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 0);
}
#[test]
fn mixed_rule_types() {
let toml_str = r#"
[[rules]]
"plain" = "copy"
"nested" = { invalid_as_string = true }
"#;
let result = toml::from_str::<TestManifest>(toml_str);
assert!(result.is_err());
}
}