From 20bf33fa059d0612aff71ebe498fd3ce2f9c1ab1 Mon Sep 17 00:00:00 2001 From: Mark Date: Sat, 3 May 2025 02:48:28 -0700 Subject: [PATCH] Initial path matching --- .editorconfig | 9 ++ .gitignore | 2 + Cargo.lock | 297 ++++++++++++++++++++++++++++++++++++ Cargo.toml | 65 ++++++++ rustfmt.toml | 1 + src/main.rs | 33 ++++ src/manifest.rs | 389 ++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 796 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 rustfmt.toml create mode 100644 src/main.rs create mode 100644 src/manifest.rs diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..941e639 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,9 @@ +root = true + +[*] +indent_style = tab +indent_size = 4 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = false +insert_final_newline = false \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..212de44 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +.DS_Store \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..14a3995 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,297 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "hashbrown" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" + +[[package]] +name = "indexmap" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +dependencies = [ + "equivalent", + "hashbrown", + "serde", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "pick" +version = "0.1.0" +dependencies = [ + "indexmap", + "regex", + "serde", + "toml", + "walkdir", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_spanned" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +dependencies = [ + "serde", +] + +[[package]] +name = "syn" +version = "2.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "toml" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9fb597c990f03753e08d3c29efbfcf2019a003b4bf4ba19225c158e1549f0f3" +dependencies = [ + "memchr", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..02b9bb9 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,65 @@ +[package] +name = "pick" +version = "0.1.0" +edition = "2024" + +# +# MARK: lints +# + +[lints.rust] +unused_import_braces = "deny" +unit_bindings = "deny" +single_use_lifetimes = "deny" +non_ascii_idents = "deny" +macro_use_extern_crate = "deny" +elided_lifetimes_in_paths = "deny" +absolute_paths_not_starting_with_crate = "deny" +explicit_outlives_requirements = "warn" +unused_crate_dependencies = "warn" +redundant_lifetimes = "warn" +missing_docs = "allow" + +[lints.clippy] +needless_return = "allow" +new_without_default = "allow" +tabs_in_doc_comments = "allow" +dbg_macro = "deny" +allow_attributes = "deny" +create_dir = "deny" +filetype_is_file = "deny" +integer_division = "deny" +lossy_float_literal = "deny" +map_err_ignore = "deny" +mutex_atomic = "deny" +needless_raw_strings = "deny" +print_stderr = "warn" +print_stdout = "warn" +str_to_string = "deny" +string_add = "deny" +string_to_string = "deny" +unimplemented = "deny" +use_debug = "allow" +verbose_file_reads = "deny" +large_types_passed_by_value = "deny" +large_enum_variant = "allow" +match_on_vec_items = "deny" +wildcard_dependencies = "deny" +negative_feature_names = "deny" +redundant_feature_names = "deny" +multiple_crate_versions = "allow" +missing_safety_doc = "warn" +identity_op = "allow" +comparison_chain = "allow" +unwrap_used = "deny" + +# +# MARK: deps +# + +[dependencies] +indexmap = { version = "2.9.0", features = ["serde"] } +regex = "1.11.1" +serde = { version = "1.0.219", features = ["derive"] } +toml = { version = "0.8.22", features = ["preserve_order"] } +walkdir = "2.5.0" diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..218e203 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +hard_tabs = true diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..c028473 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,33 @@ +use walkdir::WalkDir; + +pub mod manifest; + +fn main() { + let file = std::fs::read_to_string("./test.toml").unwrap(); + let x: manifest::Manifest = toml::from_str(&file).unwrap(); + + let rules = x + .rules + .iter() + .map(|rule| (rule.regex(), rule.action)) + .collect::>(); + + let walker = WalkDir::new("./target").into_iter(); + + for entry in walker { + let e = entry.unwrap(); + let p = e.path(); + let s = p.to_str().unwrap(); + + if !p.is_file() { + continue; + } + + let m = rules + .iter() + .find(|(r, _)| r.is_match(s)) + .map(|x| x.1.clone()); + + println!(" {m:?} {s}") + } +} diff --git a/src/manifest.rs b/src/manifest.rs new file mode 100644 index 0000000..3df4c33 --- /dev/null +++ b/src/manifest.rs @@ -0,0 +1,389 @@ +use indexmap::IndexMap; +use regex::Regex; +use serde::Deserialize; +use std::path::PathBuf; + +#[derive(Debug, Clone, Deserialize)] +pub struct Manifest { + pub config: PickConfig, + pub rules: PickRules, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct PickConfig { + pub source: PathBuf, + pub target: PathBuf, +} + +// +// MARK: rules +// + +#[derive(Debug, Clone, Deserialize)] +#[serde(untagged)] +pub enum OptVec { + Single(T), + Vec(Vec), +} + +impl OptVec { + pub fn len(&self) -> usize { + match self { + Self::Single(_) => 1, + Self::Vec(v) => v.len(), + } + } + + pub fn get(&self, idx: usize) -> Option<&T> { + match self { + Self::Single(t) => (idx == 0).then_some(t), + Self::Vec(v) => v.get(idx), + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(untagged)] +pub enum PickRule { + Plain(String), + Nested(PickRules), +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(transparent)] +pub struct PickRules(OptVec>); + +impl PickRules { + pub fn iter<'a>(&'a self) -> PickRuleIterator<'a> { + PickRuleIterator { + stack: vec![PickRuleIterState { + rules: self, + map_index: 0, + entry_index: 0, + prefix: Vec::new(), + }], + } + } +} + +impl<'a> IntoIterator for &'a PickRules { + type Item = FlatPickRule; + type IntoIter = PickRuleIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +// +// MARK: rule iterator +// + +#[derive(Debug, Clone)] +pub struct FlatPickRule { + pub patterns: Vec, + pub action: String, +} + +impl FlatPickRule { + pub fn regex(&self) -> Regex { + Regex::new( + &self + .patterns + .join("/") + .split("/") + .map(|x| match x { + "**" => "((:?[^/]+)*)".to_owned(), + "*" => "([^/]+)".to_owned(), + x => regex::escape(x), + }) + .collect::>() + .join("/"), + ) + .unwrap() + } +} + +struct PickRuleIterState<'a> { + rules: &'a PickRules, + map_index: usize, + entry_index: usize, + prefix: Vec, +} +pub struct PickRuleIterator<'a> { + stack: Vec>, +} + +impl Iterator for PickRuleIterator<'_> { + type Item = FlatPickRule; + + fn next(&mut self) -> Option { + if self.stack.is_empty() { + return None; + } + + let current = self.stack.last_mut().unwrap(); + + if current.map_index >= current.rules.0.len() { + self.stack.pop(); + return self.next(); + } + + let current_map = ¤t.rules.0.get(current.map_index).unwrap(); + + if current.entry_index >= current_map.len() { + current.map_index += 1; + current.entry_index = 0; + return self.next(); + } + + let (key, value) = current_map.get_index(current.entry_index).unwrap(); + + current.entry_index += 1; + + match value { + PickRule::Plain(action) => { + let mut patterns = current.prefix.clone(); + patterns.push(key.to_string()); + + Some(FlatPickRule { + patterns, + action: action.clone(), + }) + } + PickRule::Nested(nested_rules) => { + let mut prefix = current.prefix.clone(); + prefix.push(key.to_string()); + + self.stack.push(PickRuleIterState { + rules: nested_rules, + map_index: 0, + entry_index: 0, + prefix, + }); + + self.next() + } + } + } +} + +// +// MARK: tests +// + +#[cfg(test)] +mod tests { + use super::*; + use std::path::Path; + + #[test] + fn parse_simple_manifest() { + let toml_str = r#" + [config] + source = "./src" + target = "./tgt" + + [[rules]] + "*.rs" = "copy" + "*.md" = "ignore" + "#; + + let manifest: Manifest = toml::from_str(toml_str).unwrap(); + + assert_eq!(manifest.config.source, Path::new("./src")); + assert_eq!(manifest.config.target, Path::new("./tgt")); + + let rules: Vec = manifest.rules.iter().collect(); + assert_eq!(rules.len(), 2); + + assert_eq!(rules[0].patterns, vec!["*.rs"]); + assert_eq!(rules[0].action, "copy"); + + assert_eq!(rules[1].patterns, vec!["*.md"]); + assert_eq!(rules[1].action, "ignore"); + } + + #[test] + fn rule_ordering_preserved() { + let toml_str = r#" + [config] + source = "./src" + target = "./tgt" + + [[rules]] + "third" = "c" + "first" = "a" + "second" = "b" + "#; + + let manifest: Manifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 3); + assert_eq!(rules[0].patterns, vec!["third"]); + assert_eq!(rules[0].action, "c"); + assert_eq!(rules[1].patterns, vec!["first"]); + assert_eq!(rules[1].action, "a"); + assert_eq!(rules[2].patterns, vec!["second"]); + assert_eq!(rules[2].action, "b"); + } + + #[test] + fn nested_rules_order() { + let toml_str = r#" + [config] + source = "./src" + target = "./tgt" + + [[rules]] + "a" = "action_a" + "b" = "action_b" + + [[rules."nested"]] + "c" = "action_c" + "d" = "action_d" + + [[rules]] + "e" = "action_e" + "#; + + let manifest: Manifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 5); + assert_eq!(rules[0].patterns, vec!["a"]); + assert_eq!(rules[0].action, "action_a"); + assert_eq!(rules[1].patterns, vec!["b"]); + assert_eq!(rules[1].action, "action_b"); + assert_eq!(rules[2].patterns, vec!["nested", "c"]); + assert_eq!(rules[2].action, "action_c"); + assert_eq!(rules[3].patterns, vec!["nested", "d"]); + assert_eq!(rules[3].action, "action_d"); + assert_eq!(rules[4].patterns, vec!["e"]); + assert_eq!(rules[4].action, "action_e"); + } + + #[test] + fn deeply_nested_rules() { + let toml_str = r#" + [config] + source = "./src" + target = "./tgt" + + [[rules."a"."b"."c"]] + "d" = "action_d" + "#; + + let manifest: Manifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 1); + assert_eq!(rules[0].patterns, vec!["a", "b", "c", "d"]); + assert_eq!(rules[0].action, "action_d"); + } + + #[test] + fn multiple_maps_same_level() { + let toml_str = r#" + [config] + source = "./src" + target = "./tgt" + + [[rules]] + "a1" = "copy" + "a2" = "ignore" + + [[rules]] + "b1" = "copy" + "b2" = "ignore" + "#; + + let manifest: Manifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = manifest.rules.iter().collect(); + + // Test that all rules exist and are in the correct order + assert_eq!(rules.len(), 4); + assert_eq!(rules[0].patterns, vec!["a1"]); + assert_eq!(rules[0].action, "copy"); + assert_eq!(rules[1].patterns, vec!["a2"]); + assert_eq!(rules[1].action, "ignore"); + assert_eq!(rules[2].patterns, vec!["b1"]); + assert_eq!(rules[2].action, "copy"); + assert_eq!(rules[3].patterns, vec!["b2"]); + assert_eq!(rules[3].action, "ignore"); + } + + #[test] + fn empty_rules_list() { + let toml_str = r#" + [config] + source = "./src" + target = "./tgt" + + [[rules]] + "#; + + let manifest: Manifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 0); + } + + #[test] + #[should_panic(expected = "missing field `config`")] + fn missing_config() { + let toml_str = r#" + [[rules]] + "a" = "copy" + "#; + + let _: Manifest = toml::from_str(toml_str).unwrap(); + } + + #[test] + #[should_panic(expected = "missing field `source`")] + fn incomplete_config() { + let toml_str = r#" + [config] + target = "./tgt" + + [[rules]] + "a" = "copy" + "#; + + let _: Manifest = toml::from_str(toml_str).unwrap(); + } + + #[test] + #[should_panic] + fn invalid_toml_syntax() { + let toml_str = r#" + [config] + source = "./src" + target = "./tgt" + + [[rules]] + "invalid" = { this is not valid TOML } + "#; + + let _: Manifest = toml::from_str(toml_str).unwrap(); + } + + #[test] + fn mixed_rule_types() { + let toml_str = r#" + [config] + source = "./src" + target = "./tgt" + + [[rules]] + "plain" = "copy" + "nested" = { invalid_as_string = true } + "#; + + // This should fail because a table is not a valid PickRule + let result = toml::from_str::(toml_str); + assert!(result.is_err()); + } +}