From 64f45a443cacb98e622ea3038e9a10036ba099e6 Mon Sep 17 00:00:00 2001 From: Mark Date: Sat, 3 May 2025 16:42:33 -0700 Subject: [PATCH] Refactor --- .gitea/workflows/ci.yml | 5 +- Cargo.lock | 143 ++++++- Cargo.toml | 3 +- README.md | 100 +++++ default.nix | 16 + sample.toml | 48 +++ src/logging.rs | 2 - src/main.rs | 183 ++------- src/manifest.rs | 781 -------------------------------------- src/manifest/mod.rs | 2 + src/manifest/rule.rs | 357 +++++++++++++++++ src/manifest/types.rs | 341 +++++++++++++++++ src/prepare.rs | 161 ++++++++ src/{util.rs => style.rs} | 20 + src/tool/bash.rs | 2 +- src/tool/mod.rs | 2 +- test.toml | 59 --- 17 files changed, 1229 insertions(+), 996 deletions(-) create mode 100644 README.md create mode 100644 default.nix create mode 100644 sample.toml delete mode 100644 src/manifest.rs create mode 100644 src/manifest/mod.rs create mode 100644 src/manifest/rule.rs create mode 100644 src/manifest/types.rs create mode 100644 src/prepare.rs rename src/{util.rs => style.rs} (55%) delete mode 100644 test.toml diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 3143e95..37ba63b 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -29,9 +29,9 @@ jobs: sudo apt update DEBIAN_FRONTEND=noninteractive \ sudo apt install --yes rustup + rustup default stable - name: Run clippy - working-directory: ./index run: cargo clippy --all-targets --all-features buildandtest: @@ -45,11 +45,10 @@ jobs: sudo apt update DEBIAN_FRONTEND=noninteractive \ sudo apt install --yes rustup + rustup default stable - name: Build - working-directory: ./index run: cargo build --release - name: Test - working-directory: ./index run: cargo test --release diff --git a/Cargo.lock b/Cargo.lock index 2cf178d..c8dfd0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,12 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +[[package]] +name = "bumpalo" +version = "3.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" + [[package]] name = "cfg-if" version = "1.0.0" @@ -125,6 +131,25 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys", +] + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "equivalent" version = "1.0.2" @@ -182,12 +207,36 @@ dependencies = [ "serde", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-segmentation", + "unicode-width", + "web-time", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -237,6 +286,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "once_cell" version = "1.21.3" @@ -251,12 +306,13 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "pick" -version = "0.1.0" +version = "0.0.1" dependencies = [ "anstyle", "anyhow", "clap", "indexmap", + "indicatif", "regex", "serde", "tempfile", @@ -272,6 +328,12 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + [[package]] name = "proc-macro2" version = "1.0.95" @@ -555,6 +617,18 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "utf8parse" version = "0.2.2" @@ -586,6 +660,73 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 4c8e664..ade33b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pick" -version = "0.1.0" +version = "0.0.1" edition = "2024" # @@ -69,3 +69,4 @@ tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } walkdir = "2.5.0" tempfile = "3.10.1" anyhow = "1.0.98" +indicatif = { version = "0.17.11", features = ["improved_unicode"] } diff --git a/README.md b/README.md new file mode 100644 index 0000000..09df8c1 --- /dev/null +++ b/README.md @@ -0,0 +1,100 @@ +# ⛏️ Pick + +Pick is a utility that processes files based on pattern matching rules. + + +## Usage +- `pick manifest.toml` to run a task +- `pick --help` for documentation +- `pick gen manifest.toml` generate a sample manifest + +A detailed manifest specification is below. + + +# Writing manifests + +A pick manifest is a TOML file with three main sections: + +- `config`: Global configuration settings +- `tool`: Tool configuration +- `rules`: Patterns that tell us which files to process + +See [`sample.toml`](./sample.toml) for an example configuration. + + +## Selector Pattern Syntax + +Pick uses patterns to select files to process. + +- Patterns match against the full file path relative to the source directory +- The first matching rule is applied to each file. Once a rule matches, all others are ignored. +- Patterns are matched in the order they are defined. +- Leading and trailing slashes are ignored +- Multiple consecutive slashes are treated as a single slash + + +### Wildcards + +- `*`: Matches exactly one path segment (one directory or filename component) +- `**`: Matches zero or more path segments (can span across multiple directories) + +### Pattern Rules + + +### Syntax Examples + +| Pattern | Description | Matches | Doesn't Match | +|---------|-------------|---------|---------------| +| `file.txt` | Exact file match | `file.txt` | `other.txt`, `dir/file.txt` | +| `dir/file.txt` | Exact path match | `dir/file.txt` | `file.txt`, `other/file.txt` | +| `*.txt` | Any file with .txt extension in root | `file.txt`, `other.txt` | `file.jpg`, `dir/file.txt` | +| `**/*.txt` | Any .txt file anywhere | `file.txt`, `dir/file.txt`, `a/b/c.txt` | `file.jpg` | +| `dir/**` | Any file under dir | `dir/file.txt`, `dir/sub/file.jpg` | `root/file.txt` | +| `**/dir` | Any dir named "dir" | `dir`, `a/b/dir` | `dir/file`, `dirname` | +| `root/**test` (same as `root/**/test`) | Files named "test" in any subdir of root | `root/test`, `root/a/b/test` | `root/testfile`, `root/file` | + + +## TOML Rule Structure + +### Simple rules: + +```toml +[[rules]] +"a/**" = "task" +"b/**" = "task" +``` + +### Nested rules: + +```toml +[[rules."a"]] +"1/**" = "task" +"2/**" = "task" + +# Equivalent to: +[[rules]] +"a/1/**" = "task" +"a/2/**" = "task" +``` + +Nested rules may use wildcards: + +```toml +[[rules."a/**/"]] +"1/**" = "task" +"2/**" = "task" + +# Equivalent to: +[[rules]] +"a/**/1/**" = "task" +"a/**/2/**" = "task" +``` + + +# Tools + +## Bash +Executes bash scripts. The following environment variables are available: + +- `PICK_FILE`: Absolute path to the current file +- `PICK_RELATIVE`: Relative path (from the source directory) \ No newline at end of file diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..0deb489 --- /dev/null +++ b/default.nix @@ -0,0 +1,16 @@ +{ lib, fetchgit, rustPlatform }: +rustPlatform.buildRustPackage rec { + pname = "pick"; + version = "0.0.1"; + cargoLock.lockFile = src + /Cargo.lock; + + src = builtins.fetchGit { + url = "ssh://git@git.betalupi.com:33/Mark/pick.git"; + ref = "refs/tags/v${version}"; + }; + + meta = with lib; { + description = "A utility that processes files based on pattern matching rules"; + homepage = "hhttps://git.betalupi.com/Mark/pick"; + }; +} \ No newline at end of file diff --git a/sample.toml b/sample.toml new file mode 100644 index 0000000..4f87dbc --- /dev/null +++ b/sample.toml @@ -0,0 +1,48 @@ +# All paths are relative to workdir. +# Workdir is this file's parent by default. +# If workdir is relative, it is relative to this file's parent. +[config] +work_dir = "/mnt/hdd/media/Media/Music/Library" + +# If true, follow symlinks +# Conflicts with `links`. +# follow_links = false + +# If true, act on directories +# process_dirs = false + +# If true, act on regular files +# process_files = true + +# If true, act on symlinks. +# Conflicts with `follow_links. +# process_links = false + + +# The first rule to match a path is run. Files that match no rules are ignored. +# Paths are checked relative to the source directory: +# e.g, "/source/path/to/file.gz" is matched as "path/to/file.gz" +# +# Each pattern is matched _exactly_ against the full path of each file. +# e.g, `file` will _only_ match `/source/file`. This is not a .gitignore. +# To match batches of files, use wildcards. +# +# Wildcards: +# - `*` matches exactly one path segment. It will _not_ match across directories. +# - `*.flac` matches all flac files +# - `/a/*/file.txt` matches all files named `file.txt` in subdirectories of `a`. +# this will _not_ match `/a/b/c/file.txt`. +# +# - `**` matches zero or more path segments. +# - `**` will match all files. +# - `**/*.flac` will match all files ending in ".flac" +# - The ** wildcard should always be surrounded by slashes. +# if you omit them, they will be implied. Avoid doing this: +# - `**.flac` is equivalent to `**/.flac`. +# - `a**b` is equivalent to `a/**/b` +# +# Tasks: +# - See the docs for a list of tasks we support. +# - An empty string (as below) always means "ignore this file". +[[rules]] +"**" = "" diff --git a/src/logging.rs b/src/logging.rs index d18e526..a0990fb 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -132,8 +132,6 @@ impl LoggingPreset { } } -/// A pre-baked set of loglevel cli arguments. -/// /// # Usage /// ```ignore /// #[derive(Parser, Debug)] diff --git a/src/main.rs b/src/main.rs index ae32146..0768248 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,39 +1,44 @@ -use anyhow::{Context, Result}; +use anyhow::Result; use clap::Parser; +use indicatif::ProgressIterator; use logging::LogCli; -use manifest::Manifest; use std::{path::PathBuf, process::ExitCode}; -use tool::{PickTool, TaskContext}; +use style::progress_style; +use tool::PickTool; use tracing::{debug, error, trace}; -use walkdir::WalkDir; pub mod logging; pub mod manifest; +pub mod style; pub mod tool; -pub mod util; -// enumerate files with a spinner (count size) -// trim everything +mod prepare; + +// count size of files to process // parallelism -// input from stdin? -// show progress // capture/print stdout/stderr -// workdir vs root? +// workdir vs root +// package & auto-build +// Generate demo toml with contents // // Tools: -// - * list -// - *** rename +// - * list (m3u) +// - *** rename and retag // - ** typst -// - *** retag // - gitea pkg (POST) // - s3 // - rsync // +// Later: // chain tools -// print output? +// pattern capture groups +// input from stdin +// improve docs (inline!) +// warn when no matches +/// Pick is a utility that processes files based on pattern matching rules. #[derive(Parser, Debug)] -#[command(version, about, long_about = None, styles=util::get_styles())] +#[command(version, about, long_about = None, styles=style::get_styles())] struct Cli { #[command(flatten)] log: LogCli, @@ -58,7 +63,9 @@ fn main_inner() -> Result { // MARK: setup // - let cli = Cli::parse(); + let mut cli = Cli::parse(); + cli.manifest = std::path::absolute(&cli.manifest)?; + let cli = cli; tracing_subscriber::fmt() .with_env_filter(cli.log.to_preset().get_config()) @@ -67,145 +74,27 @@ fn main_inner() -> Result { .with_writer(std::io::stderr) .init(); - let manifest_path_str = cli - .manifest - .to_str() - .context("while converting path to string")?; + let manifest = prepare::load_manifest(&cli)?; - if !cli.manifest.is_file() { - error!("Manifest {manifest_path_str} isn't a file"); - return Ok(ExitCode::FAILURE); - } - - let manifest_string = match std::fs::read_to_string(&cli.manifest) { - Ok(x) => x, - Err(error) => { - error!("Error while reading {manifest_path_str}: {error}"); - return Ok(ExitCode::FAILURE); - } - }; - - let manifest = match toml::from_str::(&manifest_string) { - Ok(manifest) => { - // Validate manifest - if manifest.config.follow_links && manifest.config.links { - error!("Error: `follow_links` and `links` are mutually exclusive"); - return Ok(ExitCode::FAILURE); - } - - manifest - } - Err(error) => { - error!("Error while parsing {manifest_path_str}"); - error!("{}", error.to_string()); - return Ok(ExitCode::FAILURE); - } - }; - - let manifest_path = std::path::absolute(cli.manifest)?; - let work_dir = manifest.config.work_dir(&manifest_path)?; + let work_dir = manifest.config.work_dir(&cli.manifest)?; debug!("Working directory is {work_dir:?}"); - // - // MARK: rules - // - - let rules = { - let mut rules = Vec::new(); - for rule in &manifest.rules { - rules.push((rule.regex()?, rule.tasks)); - } - rules - }; - - let source_path = std::path::absolute(&work_dir)?; - let walker = WalkDir::new(&source_path).follow_links(manifest.config.follow_links); + let queue = prepare::list_queue(&manifest, &work_dir)?; + #[expect(clippy::unwrap_used)] // Fix later let bash = manifest.tool.bash.as_ref().unwrap(); - bash.before(&manifest_path, &manifest.config)?; + bash.before(&cli.manifest, &manifest.config)?; - for entry in walker { - let entry = entry?; - let path_abs = std::path::absolute(entry.path())?; - - // This path is a child of source_path, so this cannot fail - #[expect(clippy::unwrap_used)] - let path_rel = entry.path().strip_prefix(&source_path).unwrap(); - let path_rel = if path_rel.parent().is_none() { - // Make sure we never have empty string paths - // (makes logs clearer) - PathBuf::from(".").join(path_rel) - } else { - path_rel.to_path_buf() - }; - - let path_abs_str = path_abs - .to_str() - .context("could not convert path to string")? - .to_owned(); - let path_rel_str = path_rel - .to_str() - .context("could not convert path to string")? - .to_owned(); - - if path_abs.is_symlink() && !manifest.config.links { - trace!("Skipping {}, is a symlink", path_rel_str); - continue; - } - - if path_abs.is_dir() && !manifest.config.dirs { - trace!("Skipping {}, is a directory", path_rel_str); - continue; - } - - if path_abs.is_file() && !manifest.config.files { - trace!("Skipping {}, is a file", path_rel_str); - continue; - } - - let task = rules.iter().find(|(r, _)| r.is_match(&path_rel_str)); - - let tasks = match task { - None => { - trace!("Skipping {}, no match", path_rel_str); - continue; - } - Some(x) => { - let tasks: Vec = - x.1.iter() - .map(|x| x.trim()) - .filter(|x| !x.is_empty()) - .map(|x| x.to_owned()) - .collect(); - - if tasks.is_empty() { - trace!("Skipping {}", path_rel_str); - continue; - } - - tasks - } - }; - - let base_ctx = TaskContext { - task: "".into(), - path_abs, - path_abs_str, - path_rel, - path_rel_str, - }; - - for task in tasks { - trace!("Running `{task}` on {}", base_ctx.path_rel_str); - - let mut ctx = base_ctx.clone(); - ctx.task = task; - - bash.run(&manifest_path, &manifest.config, ctx)?; - } + for ctx in queue + .into_iter() + .progress_with_style(progress_style()) + .with_message("Processing") + { + trace!("Running `{}` on {}", ctx.task, ctx.path_rel_str); + bash.run(&cli.manifest, &manifest.config, ctx)?; } - bash.after(&manifest_path, &manifest.config)?; + bash.after(&cli.manifest, &manifest.config)?; return Ok(ExitCode::SUCCESS); } diff --git a/src/manifest.rs b/src/manifest.rs deleted file mode 100644 index a49d16f..0000000 --- a/src/manifest.rs +++ /dev/null @@ -1,781 +0,0 @@ -use anyhow::{Result, bail}; -use indexmap::IndexMap; -use regex::Regex; -use serde::Deserialize; -use std::path::{Path, PathBuf}; -use tracing::warn; - -use crate::tool::ToolConfig; - -#[derive(Debug, Deserialize)] -#[serde(deny_unknown_fields)] -pub struct Manifest { - pub config: PickConfig, - pub tool: ToolConfig, - pub rules: PickRules, -} - -#[derive(Debug, Clone, Deserialize)] -#[serde(deny_unknown_fields)] -pub struct PickConfig { - #[serde(default)] - pub work_dir: Option, - - #[serde(default = "default_false")] - pub follow_links: bool, - - #[serde(default = "default_true")] - pub files: bool, - - #[serde(default = "default_false")] - pub dirs: bool, - - #[serde(default = "default_false")] - pub links: bool, -} - -impl PickConfig { - pub fn work_dir(&self, manifest_path: &Path) -> Result { - // Parent directory should always exist since manifest is a file. - #[expect(clippy::unwrap_used)] - let p = manifest_path.parent().unwrap().to_path_buf(); - - match &self.work_dir { - None => Ok(p), - Some(path) => { - if path.is_absolute() { - Ok(path.to_owned()) - } else { - Ok(std::path::absolute(p.join(path))?) - } - } - } - } -} - -fn default_true() -> bool { - true -} - -fn default_false() -> bool { - false -} - -// -// MARK: rules -// - -#[derive(Debug, Clone, Deserialize)] -#[serde(untagged)] -pub enum OptVec { - Single(T), - Vec(Vec), -} - -impl OptVec { - pub fn len(&self) -> usize { - match self { - Self::Single(_) => 1, - Self::Vec(v) => v.len(), - } - } - - pub fn is_empty(&self) -> bool { - match self { - Self::Single(_) => false, - Self::Vec(v) => v.is_empty(), - } - } - - pub fn get(&self, idx: usize) -> Option<&T> { - match self { - Self::Single(t) => (idx == 0).then_some(t), - Self::Vec(v) => v.get(idx), - } - } -} -impl From> for Vec { - fn from(val: OptVec) -> Self { - match val { - OptVec::Single(t) => vec![t], - OptVec::Vec(v) => v, - } - } -} - -#[derive(Debug, Clone, Deserialize)] -#[serde(untagged)] -#[serde(deny_unknown_fields)] -pub enum PickRule { - Plain(OptVec), - Nested(PickRules), -} - -#[derive(Debug, Clone, Deserialize)] -#[serde(transparent)] -pub struct PickRules(OptVec>); - -impl PickRules { - pub fn iter(&self) -> PickRuleIterator<'_> { - PickRuleIterator { - stack: vec![PickRuleIterState { - rules: self, - map_index: 0, - entry_index: 0, - prefix: Vec::new(), - }], - } - } -} - -impl<'a> IntoIterator for &'a PickRules { - type Item = FlatPickRule; - type IntoIter = PickRuleIterator<'a>; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -// -// MARK: rule iterator -// - -#[derive(Debug, Clone)] -pub struct FlatPickRule { - pub patterns: Vec, - pub tasks: Vec, -} - -#[derive(Debug)] -enum RegexSegment { - /// A single segment - Single(String), - - /// An optional doublestar segment - DoubleStar, -} - -impl RegexSegment { - /// Returns the regex pattern of this part, - /// prefixed with a /. - fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String { - match (prev, self, next) { - // Consecutive single segments need a trailing slash - (_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"), - - // Terminal single segments don't need a trailing slash - (_, Self::Single(x), None) => x.to_owned(), - - // Neighboring doublestar is always responsible for slashes - (_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(), - - // [^/]+ is a "segment" (a block of non-slash chars) - // The "base" doublestar pattern is a segment - // followed by zero or more segments prefixed by a slash. - // - // No additional slashes - (None, Self::DoubleStar, None) => "((?:[^/]+(?:[/][^/]+)*)?)".into(), - - // Doublestars cannot be neighbors - (_, Self::DoubleStar, Some(Self::DoubleStar)) - | (Some(Self::DoubleStar), Self::DoubleStar, _) => { - unreachable!("consecutive doublestars must be reduced") - } - - // Leading slash - (Some(Self::Single(_)), Self::DoubleStar, None) => { - "((?:[/][^/]+(?:[/][^/]+)*)?)".into() - } - - // Trailing slash - (None, Self::DoubleStar, Some(Self::Single(_))) => { - "((?:[^/]+(?:[/][^/]+)*[/])?)".into() - } - - // Leading and trailing slash. - // Also, replace self with a [/] when empty. - (Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => { - "((?:[/][^/]+(?:[/][^/]+)*[/])|[/])".into() - } - } - } -} - -impl FlatPickRule { - pub fn regex(&self) -> Result { - // Flatten pattern - // Double slashes are handled later - let pattern = self.patterns.join("/"); - - if pattern.ends_with("/") { - warn!("Pattern `{pattern}` has a trailing slash which will be ignored") - } - - if pattern.starts_with("/") { - warn!("Pattern `{pattern}` has a leading slash which will be ignored") - } - - // Split on slashes or stars - // This is a lot like .split("/"), but handles - // the edge case where ** is not delimited by slashes - // (`root**test` is equivalent to `root/**/test`) - let segments = { - #[expect(clippy::unwrap_used)] - let re = Regex::new("[*]{2,}|[/]").unwrap(); - let split = re.find_iter(&pattern); - - let bounds = split - .into_iter() - .flat_map(|x| { - let r = x.range(); - let a = r.start; - let b = r.end; - [a, b] - }) - .chain([pattern.len()]) - .collect::>(); - - let mut parts = Vec::new(); - let mut last = 0; - for next in bounds { - let seg = &pattern[last..next]; - // Consecutive slashes are identical to a single slash - if seg != "/" && !seg.is_empty() { - parts.push(seg); - } - last = next; - } - - parts - }; - - let mut rebuilt_segments = Vec::new(); - let mut last_was_doublestar = false; - for segment in segments { - // This is a wilcard regex - // (**, ***, etc) - if segment.len() > 1 && segment.chars().all(|x| x == '*') { - match segment { - "**" => { - // Consecutive doublestars are meaningless - if !last_was_doublestar { - rebuilt_segments.push(RegexSegment::DoubleStar); - } - last_was_doublestar = true; - } - _ => bail!("Invalid wildcard `{segment}`"), - } - continue; - } - last_was_doublestar = false; - - let parts = segment.split("*").collect::>(); - - let mut rebuilt = String::new(); - for (i, part) in parts.into_iter().enumerate() { - if i != 0 { - rebuilt.push_str("([^/]*)") - } - - rebuilt.push_str(®ex::escape(part)); - } - - rebuilt_segments.push(RegexSegment::Single(rebuilt)); - } - - let mut re_built = String::new(); - let mut prev = None; - for (i, seg) in rebuilt_segments.iter().enumerate() { - let next = rebuilt_segments.get(i + 1); - re_built.push_str(&seg.to_regex_part(prev, next)); - prev = Some(seg); - } - - let re_built = format!("^{re_built}$"); - // This regex should always be valid - #[expect(clippy::unwrap_used)] - Ok(Regex::new(&re_built).unwrap()) - } -} - -struct PickRuleIterState<'a> { - rules: &'a PickRules, - map_index: usize, - entry_index: usize, - prefix: Vec, -} -pub struct PickRuleIterator<'a> { - stack: Vec>, -} - -impl Iterator for PickRuleIterator<'_> { - type Item = FlatPickRule; - - fn next(&mut self) -> Option { - if self.stack.is_empty() { - return None; - } - - #[expect(clippy::unwrap_used)] - let current = self.stack.last_mut().unwrap(); - - if current.map_index >= current.rules.0.len() { - self.stack.pop(); - return self.next(); - } - - #[expect(clippy::unwrap_used)] - let current_map = ¤t.rules.0.get(current.map_index).unwrap(); - - if current.entry_index >= current_map.len() { - current.map_index += 1; - current.entry_index = 0; - return self.next(); - } - - #[expect(clippy::unwrap_used)] - let (key, value) = current_map.get_index(current.entry_index).unwrap(); - - current.entry_index += 1; - - match value { - PickRule::Plain(task) => { - let mut patterns = current.prefix.clone(); - patterns.push(key.to_string()); - - Some(FlatPickRule { - patterns, - tasks: task.clone().into(), - }) - } - PickRule::Nested(nested_rules) => { - let mut prefix = current.prefix.clone(); - prefix.push(key.to_string()); - - self.stack.push(PickRuleIterState { - rules: nested_rules, - map_index: 0, - entry_index: 0, - prefix, - }); - - self.next() - } - } - } -} - -// -// MARK: tests -// - -#[cfg(test)] -mod tests { - use super::*; - - #[derive(Debug, Clone, Deserialize)] - struct TestManifest { - rules: PickRules, - } - - #[test] - fn rule_ordering_preserved() { - let toml_str = r#" - [[rules]] - "third" = "c" - "first" = "a" - "second" = "b" - "#; - - let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); - let rules: Vec = test_manifest.rules.iter().collect(); - - assert_eq!(rules.len(), 3); - assert_eq!(rules[0].patterns, vec!["third"]); - assert_eq!(rules[0].tasks, vec!["c"]); - assert_eq!(rules[1].patterns, vec!["first"]); - assert_eq!(rules[1].tasks, vec!["a"]); - assert_eq!(rules[2].patterns, vec!["second"]); - assert_eq!(rules[2].tasks, vec!["b"]); - } - - #[test] - fn nested_rules_order() { - let toml_str = r#" - [[rules]] - "a" = "task_a" - "b" = "task_b" - - [[rules."nested"]] - "c" = "task_c" - "d" = "task_d" - - [[rules]] - "e" = "task_e" - "#; - - let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); - let rules: Vec = test_manifest.rules.iter().collect(); - - assert_eq!(rules.len(), 5); - assert_eq!(rules[0].patterns, vec!["a"]); - assert_eq!(rules[0].tasks, vec!["task_a"]); - assert_eq!(rules[1].patterns, vec!["b"]); - assert_eq!(rules[1].tasks, vec!["task_b"]); - assert_eq!(rules[2].patterns, vec!["nested", "c"]); - assert_eq!(rules[2].tasks, vec!["task_c"]); - assert_eq!(rules[3].patterns, vec!["nested", "d"]); - assert_eq!(rules[3].tasks, vec!["task_d"]); - assert_eq!(rules[4].patterns, vec!["e"]); - assert_eq!(rules[4].tasks, vec!["task_e"]); - } - - #[test] - fn deeply_nested_rules() { - let toml_str = r#" - [[rules."a"."b"."c"]] - "d" = "task_d" - "#; - - let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); - let rules: Vec = test_manifest.rules.iter().collect(); - - assert_eq!(rules.len(), 1); - assert_eq!(rules[0].patterns, vec!["a", "b", "c", "d"]); - assert_eq!(rules[0].tasks, vec!["task_d"]); - } - - #[test] - fn multiple_maps_same_level() { - let toml_str = r#" - [[rules]] - "a1" = "copy" - "a2" = "ignore" - - [[rules]] - "b1" = "copy" - "b2" = "ignore" - "#; - - let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); - let rules: Vec = test_manifest.rules.iter().collect(); - - assert_eq!(rules.len(), 4); - assert_eq!(rules[0].patterns, vec!["a1"]); - assert_eq!(rules[0].tasks, vec!["copy"]); - assert_eq!(rules[1].patterns, vec!["a2"]); - assert_eq!(rules[1].tasks, vec!["ignore"]); - assert_eq!(rules[2].patterns, vec!["b1"]); - assert_eq!(rules[2].tasks, vec!["copy"]); - assert_eq!(rules[3].patterns, vec!["b2"]); - assert_eq!(rules[3].tasks, vec!["ignore"]); - } - - #[test] - fn empty_rules_list() { - let toml_str = r#" - [[rules]] - "#; - - let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); - let rules: Vec = test_manifest.rules.iter().collect(); - - assert_eq!(rules.len(), 0); - } - - #[test] - fn mixed_rule_types() { - let toml_str = r#" - [[rules]] - "plain" = "copy" - "nested" = { invalid_as_string = true } - "#; - - let result = toml::from_str::(toml_str); - assert!(result.is_err()); - } - - #[test] - fn pattern_simple() { - let rule = FlatPickRule { - patterns: vec!["file.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("file.txt")); - assert!(!regex.is_match("other.txt")); - assert!(!regex.is_match("path/file.txt")); - } - - #[test] - fn pattern_with_path() { - let rule = FlatPickRule { - patterns: vec!["dir".to_string(), "file.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("dir/file.txt")); - assert!(!regex.is_match("file.txt")); - assert!(!regex.is_match("other/file.txt")); - } - - #[test] - fn pattern_wildcard_simple() { - let rule = FlatPickRule { - patterns: vec!["*.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("file.txt")); - assert!(regex.is_match("other.txt")); - assert!(!regex.is_match("file.jpg")); - assert!(!regex.is_match("nested/file.txt")); - } - - #[test] - fn pattern_doublestar() { - let rule = FlatPickRule { - patterns: vec!["**".to_string(), "*.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("file.txt")); - assert!(regex.is_match("dir/file.txt")); - assert!(regex.is_match("dir/subdir/file.txt")); - assert!(!regex.is_match("file.jpg")); - assert!(!regex.is_match("dir/file.jpg")); - } - - #[test] - fn pattern_doublestar_consecutive() { - let rule = FlatPickRule { - patterns: vec![ - "**".to_string(), - "**".to_string(), - "**".to_string(), - "*.txt".to_string(), - ], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("file.txt")); - assert!(regex.is_match("dir/file.txt")); - assert!(regex.is_match("dir/subdir/file.txt")); - assert!(!regex.is_match("file.jpg")); - assert!(!regex.is_match("dir/file.jpg")); - } - - #[test] - fn pattern_wildcard_double_slash() { - let rule = FlatPickRule { - patterns: vec!["**/*.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("file.txt")); - assert!(regex.is_match("dir/file.txt")); - assert!(regex.is_match("dir/subdir/file.txt")); - assert!(!regex.is_match("file.jpg")); - assert!(!regex.is_match("dir/file.jpg")); - } - - #[test] - fn pattern_single_dual() { - let rule = FlatPickRule { - patterns: vec!["**/*a*".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("fileafile")); - assert!(regex.is_match("dir/fileafile")); - assert!(regex.is_match("filea")); - assert!(regex.is_match("dir/filea")); - assert!(regex.is_match("afile")); - assert!(regex.is_match("dir/afile")); - assert!(!regex.is_match("noletter")); - assert!(!regex.is_match("dir/noletter")); - } - - #[test] - fn pattern_single_end() { - let rule = FlatPickRule { - patterns: vec!["**/*".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("file")); - assert!(regex.is_match("dir/file")); - } - - #[test] - fn pattern_double_end() { - let rule = FlatPickRule { - patterns: vec!["root/**".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("root/file")); - assert!(!regex.is_match("dir/file")); - } - - #[test] - fn pattern_double_start() { - let rule = FlatPickRule { - patterns: vec!["**/dir".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("dir")); - assert!(regex.is_match("a/b/dir")); - assert!(!regex.is_match("dir/file")); - } - - #[test] - fn pattern_double_adjacent_before() { - let rule = FlatPickRule { - // equivalent to root/**/test - patterns: vec!["root/**test".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("root/test")); - assert!(regex.is_match("root/a/test")); - assert!(regex.is_match("root/a/b/c/test")); - assert!(!regex.is_match("root/file")); - assert!(!regex.is_match("root/xxtest")); - } - - #[test] - fn pattern_double_adjacent_after() { - let rule = FlatPickRule { - // equivalent to root/test/** - patterns: vec!["root/test**".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("root/test")); - assert!(regex.is_match("root/test/a")); - assert!(regex.is_match("root/test/a/b/c")); - assert!(!regex.is_match("root/testxx")); - assert!(!regex.is_match("root/file")); - } - - #[test] - fn pattern_bad_any_extension() { - let rule = FlatPickRule { - // equivalent to root/test/** - patterns: vec!["**.flac".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("root/.flac")); - assert!(regex.is_match("root/a/.flac")); - assert!(!regex.is_match("root/test.flac")); - assert!(!regex.is_match("test.flac")); - assert!(!regex.is_match("root/test/a/b/c.flac")); - assert!(!regex.is_match("root/testflac")); - assert!(!regex.is_match("test.mp3")); - } - - #[test] - fn pattern_good_any_extension() { - let rule = FlatPickRule { - // equivalent to root/test/** - patterns: vec!["**/*.flac".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("root/.flac")); - assert!(regex.is_match("root/a/.flac")); - assert!(regex.is_match("root/test.flac")); - assert!(regex.is_match("test.flac")); - assert!(regex.is_match("root/test/a/b/c.flac")); - assert!(!regex.is_match("root/testflac")); - assert!(!regex.is_match("test.mp3")); - } - - #[test] - fn pattern_double_adjacent_between() { - let rule = FlatPickRule { - // equivalent to root/test/**/file - patterns: vec!["root/test**file".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("root/test/file")); - assert!(regex.is_match("root/test/a/b/c/file")); - assert!(!regex.is_match("root/test")); - assert!(!regex.is_match("root/file")); - assert!(!regex.is_match("root/testfile")); - assert!(!regex.is_match("root/testxxfile")); - } - - #[test] - fn pattern_double_slashes() { - let rule = FlatPickRule { - patterns: vec!["dir//file.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("dir/file.txt")); - assert!(!regex.is_match("dirfile.txt")); - assert!(!regex.is_match("dir/other.txt")); - } - - #[test] - fn pattern_double_slash() { - let rule = FlatPickRule { - patterns: vec!["a///b////c.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("a/b/c.txt")); - assert!(!regex.is_match("abc.txt")); - assert!(!regex.is_match("a/b/d.txt")); - } - - #[test] - fn pattern_double_slash_wildcards() { - let rule = FlatPickRule { - patterns: vec!["**///*.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("dir/file.txt")); - assert!(regex.is_match("dir/subdir/file.txt")); - assert!(!regex.is_match("file.jpg")); - } - - #[test] - fn pattern_slashes_around_wildcards() { - let rule = FlatPickRule { - patterns: vec!["dir//**//*.txt".to_string()], - tasks: vec!["copy".to_string()], - }; - - let regex = rule.regex().unwrap(); - assert!(regex.is_match("dir/subdir/file.txt")); - assert!(regex.is_match("dir/sub1/sub2/file.txt")); - assert!(!regex.is_match("other/sub/file.txt")); - assert!(!regex.is_match("dir/file.jpg")); - } -} diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs new file mode 100644 index 0000000..6b23fb5 --- /dev/null +++ b/src/manifest/mod.rs @@ -0,0 +1,2 @@ +pub mod rule; +pub mod types; diff --git a/src/manifest/rule.rs b/src/manifest/rule.rs new file mode 100644 index 0000000..2f73011 --- /dev/null +++ b/src/manifest/rule.rs @@ -0,0 +1,357 @@ +use anyhow::{Result, bail}; +use regex::Regex; +use tracing::warn; + +#[derive(Debug)] +enum RegexSegment { + /// A single segment + Single(String), + + /// An optional doublestar segment + DoubleStar, +} + +impl RegexSegment { + /// Returns the regex pattern of this part, + /// prefixed with a /. + fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String { + match (prev, self, next) { + // Consecutive single segments need a trailing slash + (_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"), + + // Terminal single segments don't need a trailing slash + (_, Self::Single(x), None) => x.to_owned(), + + // Neighboring doublestar is always responsible for slashes + (_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(), + + // [^/]+ is a "segment" (a block of non-slash chars) + // The "base" doublestar pattern is a segment + // followed by zero or more segments prefixed by a slash. + // + // No additional slashes + (None, Self::DoubleStar, None) => "((?:[^/]+(?:[/][^/]+)*)?)".into(), + + // Doublestars cannot be neighbors + (_, Self::DoubleStar, Some(Self::DoubleStar)) + | (Some(Self::DoubleStar), Self::DoubleStar, _) => { + unreachable!("consecutive doublestars must be reduced") + } + + // Leading slash + (Some(Self::Single(_)), Self::DoubleStar, None) => { + "((?:[/][^/]+(?:[/][^/]+)*)?)".into() + } + + // Trailing slash + (None, Self::DoubleStar, Some(Self::Single(_))) => { + "((?:[^/]+(?:[/][^/]+)*[/])?)".into() + } + + // Leading and trailing slash. + // Also, replace self with a [/] when empty. + (Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => { + "((?:[/][^/]+(?:[/][^/]+)*[/])|[/])".into() + } + } + } +} + +#[derive(Debug, Clone)] +pub struct FlatPickRule { + pub patterns: Vec, + pub tasks: Vec, +} + +impl FlatPickRule { + pub fn regex(&self) -> Result { + // Flatten pattern + // Double slashes are handled later + let pattern = self.patterns.join("/"); + + if pattern.ends_with("/") { + warn!("Pattern `{pattern}` has a trailing slash which will be ignored") + } + + if pattern.starts_with("/") { + warn!("Pattern `{pattern}` has a leading slash which will be ignored") + } + + // Split on slashes or stars + // This is a lot like .split("/"), but handles + // the edge case where ** is not delimited by slashes + // (`root**test` is equivalent to `root/**/test`) + let segments = { + #[expect(clippy::unwrap_used)] + let re = Regex::new("[*]{2,}|[/]").unwrap(); + let split = re.find_iter(&pattern); + + let bounds = split + .into_iter() + .flat_map(|x| { + let r = x.range(); + let a = r.start; + let b = r.end; + [a, b] + }) + .chain([pattern.len()]) + .collect::>(); + + let mut parts = Vec::new(); + let mut last = 0; + for next in bounds { + let seg = &pattern[last..next]; + // Consecutive slashes are identical to a single slash + if seg != "/" && !seg.is_empty() { + parts.push(seg); + } + last = next; + } + + parts + }; + + let mut rebuilt_segments = Vec::new(); + let mut last_was_doublestar = false; + for segment in segments { + // This is a wildcard regex + // (**, ***, etc) + if segment.len() > 1 && segment.chars().all(|x| x == '*') { + match segment { + "**" => { + // Consecutive doublestars are meaningless + if !last_was_doublestar { + rebuilt_segments.push(RegexSegment::DoubleStar); + } + last_was_doublestar = true; + } + _ => bail!("Invalid wildcard `{segment}`"), + } + continue; + } + last_was_doublestar = false; + + let parts = segment.split("*").collect::>(); + + let mut rebuilt = String::new(); + for (i, part) in parts.into_iter().enumerate() { + if i != 0 { + rebuilt.push_str("([^/]*)") + } + + rebuilt.push_str(®ex::escape(part)); + } + + rebuilt_segments.push(RegexSegment::Single(rebuilt)); + } + + let mut re_built = String::new(); + let mut prev = None; + for (i, seg) in rebuilt_segments.iter().enumerate() { + let next = rebuilt_segments.get(i + 1); + re_built.push_str(&seg.to_regex_part(prev, next)); + prev = Some(seg); + } + + let re_built = format!("^{re_built}$"); + // This regex should always be valid + #[expect(clippy::unwrap_used)] + Ok(Regex::new(&re_built).unwrap()) + } +} + +// +// MARK: tests +// + +#[cfg(test)] +#[expect(clippy::unwrap_used)] +mod tests { + use super::*; + + fn rule_regex(pattern: &[&str]) -> Regex { + let rule = FlatPickRule { + patterns: pattern.iter().map(|x| x.to_string()).collect(), + tasks: vec!["task".to_owned()], + }; + + return rule.regex().unwrap(); + } + + #[test] + fn simple() { + let regex = rule_regex(&["file.txt"]); + + assert!(regex.is_match("file.txt")); + assert!(!regex.is_match("other.txt")); + assert!(!regex.is_match("path/file.txt")); + } + + #[test] + fn simple_dir() { + let regex = rule_regex(&["dir", "file.txt"]); + + assert!(regex.is_match("dir/file.txt")); + assert!(!regex.is_match("file.txt")); + assert!(!regex.is_match("other/file.txt")); + } + + #[test] + fn simple_star() { + let regex = rule_regex(&["*.txt"]); + + assert!(regex.is_match("file.txt")); + assert!(regex.is_match("other.txt")); + assert!(!regex.is_match("file.jpg")); + assert!(!regex.is_match("nested/file.txt")); + } + + #[test] + fn simple_doublestar() { + let regex = rule_regex(&["**/*.txt"]); + + assert!(regex.is_match("file.txt")); + assert!(regex.is_match("dir/file.txt")); + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(!regex.is_match("file.jpg")); + assert!(!regex.is_match("dir/file.jpg")); + } + + #[test] + fn consecutive_doublestar() { + let regex = rule_regex(&["**", "**", "**", "*.txt"]); + + assert!(regex.is_match("file.txt")); + assert!(regex.is_match("dir/file.txt")); + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(!regex.is_match("file.jpg")); + assert!(!regex.is_match("dir/file.jpg")); + } + + #[test] + fn dual_star() { + let regex = rule_regex(&["**/*a*"]); + + assert!(regex.is_match("fileafile")); + assert!(regex.is_match("dir/fileafile")); + assert!(regex.is_match("filea")); + assert!(regex.is_match("dir/filea")); + assert!(regex.is_match("afile")); + assert!(regex.is_match("dir/afile")); + assert!(!regex.is_match("noletter")); + assert!(!regex.is_match("dir/noletter")); + } + + #[test] + fn single_end() { + let regex = rule_regex(&["**/*"]); + + assert!(regex.is_match("file")); + assert!(regex.is_match("dir/file")); + assert!(regex.is_match("a/b/c/dir/file")); + } + + #[test] + fn doublestar_end() { + let regex = rule_regex(&["root/**"]); + + assert!(regex.is_match("root/file")); + assert!(!regex.is_match("dir/file")); + } + + #[test] + fn doublestar_start() { + let regex = rule_regex(&["**/dir"]); + + assert!(regex.is_match("dir")); + assert!(regex.is_match("a/b/dir")); + assert!(!regex.is_match("dir/file")); + } + + #[test] + fn doublestar_adjacent_before() { + let regex = rule_regex(&["root/**test"]); + + assert!(regex.is_match("root/test")); + assert!(regex.is_match("root/a/test")); + assert!(regex.is_match("root/a/b/c/test")); + assert!(!regex.is_match("root/file")); + assert!(!regex.is_match("root/xxtest")); + } + + #[test] + fn doublestar_adjacent_after() { + let regex = rule_regex(&["root/test**"]); + + assert!(regex.is_match("root/test")); + assert!(regex.is_match("root/test/a")); + assert!(regex.is_match("root/test/a/b/c")); + assert!(!regex.is_match("root/testxx")); + assert!(!regex.is_match("root/file")); + } + + #[test] + fn doublestar_adjacent_middle() { + let regex = rule_regex(&["root/test**file"]); + + assert!(regex.is_match("root/test/file")); + assert!(regex.is_match("root/test/a/b/c/file")); + assert!(!regex.is_match("root/test")); + assert!(!regex.is_match("root/file")); + assert!(!regex.is_match("root/testfile")); + assert!(!regex.is_match("root/testxxfile")); + } + #[test] + fn doublestar_bad_extension() { + let regex = rule_regex(&["**.flac"]); + + assert!(regex.is_match("root/.flac")); + assert!(regex.is_match("root/a/.flac")); + assert!(!regex.is_match("root/test.flac")); + assert!(!regex.is_match("test.flac")); + assert!(!regex.is_match("root/test/a/b/c.flac")); + assert!(!regex.is_match("root/testflac")); + assert!(!regex.is_match("test.mp3")); + } + + #[test] + fn doublestar_good_extension() { + let regex = rule_regex(&["**/*.flac"]); + + assert!(regex.is_match("root/.flac")); + assert!(regex.is_match("root/a/.flac")); + assert!(regex.is_match("root/test.flac")); + assert!(regex.is_match("test.flac")); + assert!(regex.is_match("root/test/a/b/c.flac")); + assert!(!regex.is_match("root/testflac")); + assert!(!regex.is_match("test.mp3")); + } + + #[test] + fn multi_slash_a() { + let regex = rule_regex(&["dir//file.txt"]); + + assert!(regex.is_match("dir/file.txt")); + assert!(!regex.is_match("dirfile.txt")); + assert!(!regex.is_match("dir/other.txt")); + } + + #[test] + fn multi_slash_b() { + let regex = rule_regex(&["**///*.txt"]); + + assert!(regex.is_match("dir/file.txt")); + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(!regex.is_match("file.jpg")); + } + + #[test] + fn multi_slash_c() { + let regex = rule_regex(&["///dir//**//*.txt//"]); + + assert!(regex.is_match("dir/subdir/file.txt")); + assert!(regex.is_match("dir/sub1/sub2/file.txt")); + assert!(!regex.is_match("other/sub/file.txt")); + assert!(!regex.is_match("dir/file.jpg")); + } +} diff --git a/src/manifest/types.rs b/src/manifest/types.rs new file mode 100644 index 0000000..6f2e3ce --- /dev/null +++ b/src/manifest/types.rs @@ -0,0 +1,341 @@ +use anyhow::Result; +use indexmap::IndexMap; +use serde::Deserialize; +use std::path::{Path, PathBuf}; + +use crate::tool::ToolConfig; + +use super::rule::FlatPickRule; + +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Manifest { + pub config: PickConfig, + pub tool: ToolConfig, + pub rules: PickRules, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct PickConfig { + #[serde(default)] + pub work_dir: Option, + + #[serde(default = "default_false")] + pub follow_links: bool, + + #[serde(default = "default_true")] + pub process_files: bool, + + #[serde(default = "default_false")] + pub process_dirs: bool, + + #[serde(default = "default_false")] + pub process_links: bool, +} + +impl PickConfig { + pub fn work_dir(&self, manifest_path: &Path) -> Result { + // Parent directory should always exist since manifest is a file. + #[expect(clippy::unwrap_used)] + let p = manifest_path.parent().unwrap().to_path_buf(); + + match &self.work_dir { + None => Ok(p), + Some(path) => { + if path.is_absolute() { + Ok(path.to_owned()) + } else { + Ok(std::path::absolute(p.join(path))?) + } + } + } + } +} + +fn default_true() -> bool { + true +} + +fn default_false() -> bool { + false +} + +// +// MARK: rules +// + +#[derive(Debug, Clone, Deserialize)] +#[serde(untagged)] +pub enum OptVec { + Single(T), + Vec(Vec), +} + +impl OptVec { + pub fn len(&self) -> usize { + match self { + Self::Single(_) => 1, + Self::Vec(v) => v.len(), + } + } + + pub fn is_empty(&self) -> bool { + match self { + Self::Single(_) => false, + Self::Vec(v) => v.is_empty(), + } + } + + pub fn get(&self, idx: usize) -> Option<&T> { + match self { + Self::Single(t) => (idx == 0).then_some(t), + Self::Vec(v) => v.get(idx), + } + } +} +impl From> for Vec { + fn from(val: OptVec) -> Self { + match val { + OptVec::Single(t) => vec![t], + OptVec::Vec(v) => v, + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(untagged)] +#[serde(deny_unknown_fields)] +pub enum PickRule { + Plain(OptVec), + Nested(PickRules), +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(transparent)] +pub struct PickRules(OptVec>); + +impl PickRules { + pub fn iter(&self) -> PickRuleIterator<'_> { + PickRuleIterator { + stack: vec![PickRuleIterState { + rules: self, + map_index: 0, + entry_index: 0, + prefix: Vec::new(), + }], + } + } +} + +impl<'a> IntoIterator for &'a PickRules { + type Item = FlatPickRule; + type IntoIter = PickRuleIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +// +// MARK: rule iterator +// + +struct PickRuleIterState<'a> { + rules: &'a PickRules, + map_index: usize, + entry_index: usize, + prefix: Vec, +} +pub struct PickRuleIterator<'a> { + stack: Vec>, +} + +impl Iterator for PickRuleIterator<'_> { + type Item = FlatPickRule; + + fn next(&mut self) -> Option { + if self.stack.is_empty() { + return None; + } + + #[expect(clippy::unwrap_used)] + let current = self.stack.last_mut().unwrap(); + + if current.map_index >= current.rules.0.len() { + self.stack.pop(); + return self.next(); + } + + #[expect(clippy::unwrap_used)] + let current_map = ¤t.rules.0.get(current.map_index).unwrap(); + + if current.entry_index >= current_map.len() { + current.map_index += 1; + current.entry_index = 0; + return self.next(); + } + + #[expect(clippy::unwrap_used)] + let (key, value) = current_map.get_index(current.entry_index).unwrap(); + + current.entry_index += 1; + + match value { + PickRule::Plain(task) => { + let mut patterns = current.prefix.clone(); + patterns.push(key.to_string()); + + Some(FlatPickRule { + patterns, + tasks: task.clone().into(), + }) + } + PickRule::Nested(nested_rules) => { + let mut prefix = current.prefix.clone(); + prefix.push(key.to_string()); + + self.stack.push(PickRuleIterState { + rules: nested_rules, + map_index: 0, + entry_index: 0, + prefix, + }); + + self.next() + } + } + } +} + +// +// MARK: tests +// + +#[cfg(test)] +#[expect(clippy::unwrap_used)] +mod tests { + use super::*; + + #[derive(Debug, Clone, Deserialize)] + struct TestManifest { + rules: PickRules, + } + + #[test] + fn rule_ordering_preserved() { + let toml_str = r#" + [[rules]] + "third" = "c" + "first" = "a" + "second" = "b" + "#; + + let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = test_manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 3); + assert_eq!(rules[0].patterns, vec!["third"]); + assert_eq!(rules[0].tasks, vec!["c"]); + assert_eq!(rules[1].patterns, vec!["first"]); + assert_eq!(rules[1].tasks, vec!["a"]); + assert_eq!(rules[2].patterns, vec!["second"]); + assert_eq!(rules[2].tasks, vec!["b"]); + } + + #[test] + fn nested_rules_order() { + let toml_str = r#" + [[rules]] + "a" = "task_a" + "b" = "task_b" + + [[rules."nested"]] + "c" = "task_c" + "d" = "task_d" + + [[rules]] + "e" = "task_e" + "#; + + let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = test_manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 5); + assert_eq!(rules[0].patterns, vec!["a"]); + assert_eq!(rules[0].tasks, vec!["task_a"]); + assert_eq!(rules[1].patterns, vec!["b"]); + assert_eq!(rules[1].tasks, vec!["task_b"]); + assert_eq!(rules[2].patterns, vec!["nested", "c"]); + assert_eq!(rules[2].tasks, vec!["task_c"]); + assert_eq!(rules[3].patterns, vec!["nested", "d"]); + assert_eq!(rules[3].tasks, vec!["task_d"]); + assert_eq!(rules[4].patterns, vec!["e"]); + assert_eq!(rules[4].tasks, vec!["task_e"]); + } + + #[test] + fn deeply_nested_rules() { + let toml_str = r#" + [[rules."a"."b"."c"]] + "d" = "task_d" + "#; + + let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = test_manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 1); + assert_eq!(rules[0].patterns, vec!["a", "b", "c", "d"]); + assert_eq!(rules[0].tasks, vec!["task_d"]); + } + + #[test] + fn multiple_maps_same_level() { + let toml_str = r#" + [[rules]] + "a1" = "copy" + "a2" = "ignore" + + [[rules]] + "b1" = "copy" + "b2" = "ignore" + "#; + + let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = test_manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 4); + assert_eq!(rules[0].patterns, vec!["a1"]); + assert_eq!(rules[0].tasks, vec!["copy"]); + assert_eq!(rules[1].patterns, vec!["a2"]); + assert_eq!(rules[1].tasks, vec!["ignore"]); + assert_eq!(rules[2].patterns, vec!["b1"]); + assert_eq!(rules[2].tasks, vec!["copy"]); + assert_eq!(rules[3].patterns, vec!["b2"]); + assert_eq!(rules[3].tasks, vec!["ignore"]); + } + + #[test] + fn empty_rules_list() { + let toml_str = " + [[rules]] + "; + + let test_manifest: TestManifest = toml::from_str(toml_str).unwrap(); + let rules: Vec = test_manifest.rules.iter().collect(); + + assert_eq!(rules.len(), 0); + } + + #[test] + fn mixed_rule_types() { + let toml_str = r#" + [[rules]] + "plain" = "copy" + "nested" = { invalid_as_string = true } + "#; + + let result = toml::from_str::(toml_str); + assert!(result.is_err()); + } +} diff --git a/src/prepare.rs b/src/prepare.rs new file mode 100644 index 0000000..a8706a2 --- /dev/null +++ b/src/prepare.rs @@ -0,0 +1,161 @@ +use anyhow::{Context, Result, bail}; +use indicatif::ProgressBar; +use std::{ + path::{Path, PathBuf}, + time::Duration, +}; +use tracing::{error, trace}; +use walkdir::WalkDir; + +use crate::{Cli, manifest::types::Manifest, style::spinner_style_list, tool::TaskContext}; + +pub fn load_manifest(cli: &Cli) -> Result { + let manifest_path_str = cli + .manifest + .to_str() + .context("while converting path to string")?; + + if !cli.manifest.is_file() { + bail!("Manifest {manifest_path_str} isn't a file"); + } + + let manifest_string = match std::fs::read_to_string(&cli.manifest) { + Ok(x) => x, + Err(error) => { + bail!("Error while reading {manifest_path_str}: {error}"); + } + }; + + let manifest = match toml::from_str::(&manifest_string) { + Ok(manifest) => { + // Validate manifest + if manifest.config.follow_links && manifest.config.process_links { + bail!("Error: `follow_links` and `links` are mutually exclusive"); + } + + manifest + } + Err(error) => { + error!("{}", error.to_string()); + bail!("Error while parsing {manifest_path_str}"); + } + }; + + return Ok(manifest); +} + +pub fn list_queue(manifest: &Manifest, work_dir: &Path) -> Result> { + let rules = { + let mut rules = Vec::new(); + for rule in &manifest.rules { + rules.push((rule.regex()?, rule.tasks)); + } + rules + }; + + let source_path = std::path::absolute(work_dir)?; + let walker = WalkDir::new(&source_path).follow_links(manifest.config.follow_links); + + let mut queue = Vec::new(); + + let mut total = 0u64; + let spin = ProgressBar::new_spinner() + .with_style(spinner_style_list()) + .with_message(format!( + "Listing files {} queued, {} skipped", + queue.len(), + total - queue.len() as u64, + )); + spin.enable_steady_tick(Duration::from_millis(100)); + + for entry in walker { + total += 1; + + spin.set_message(format!( + "Listing files {} queued, {} skipped", + queue.len(), + total - queue.len() as u64, + )); + + let entry = entry?; + let path_abs = std::path::absolute(entry.path())?; + + // This path is a child of source_path, so this cannot fail + #[expect(clippy::unwrap_used)] + let path_rel = entry.path().strip_prefix(&source_path).unwrap(); + let path_rel = if path_rel.parent().is_none() { + // Make sure we never have empty string paths + // (makes logs clearer) + PathBuf::from(".").join(path_rel) + } else { + path_rel.to_path_buf() + }; + + let path_abs_str = path_abs + .to_str() + .context("could not convert path to string")? + .to_owned(); + let path_rel_str = path_rel + .to_str() + .context("could not convert path to string")? + .to_owned(); + + if path_abs.is_symlink() && !manifest.config.process_links { + trace!("Skipping {}, is a symlink", path_rel_str); + continue; + } + + if path_abs.is_dir() && !manifest.config.process_dirs { + trace!("Skipping {}, is a directory", path_rel_str); + continue; + } + + if path_abs.is_file() && !manifest.config.process_files { + trace!("Skipping {}, is a file", path_rel_str); + continue; + } + + let task = rules.iter().find(|(r, _)| r.is_match(&path_rel_str)); + + let tasks = match task { + None => { + trace!("Skipping {}, no match", path_rel_str); + continue; + } + Some(x) => { + let tasks: Vec = + x.1.iter() + .map(|x| x.trim()) + .filter(|x| !x.is_empty()) + .map(|x| x.to_owned()) + .collect(); + + if tasks.is_empty() { + trace!("Skipping {}", path_rel_str); + continue; + } + + tasks + } + }; + + let base_ctx = TaskContext { + task: "".into(), + path_abs, + path_abs_str, + path_rel, + path_rel_str, + }; + + for task in tasks { + let mut ctx = base_ctx.clone(); + ctx.task = task; + + queue.push(ctx); + } + } + + spin.finish(); + + return Ok(queue); +} diff --git a/src/util.rs b/src/style.rs similarity index 55% rename from src/util.rs rename to src/style.rs index a29080b..97ca81a 100644 --- a/src/util.rs +++ b/src/style.rs @@ -1,4 +1,24 @@ use anstyle::{AnsiColor, Color, Style}; +use indicatif::ProgressStyle; + +#[expect(clippy::unwrap_used)] +pub fn progress_style() -> ProgressStyle { + return ProgressStyle::default_bar() + .template( + " {spinner:.green} [{elapsed_precise}] [{bar:40.green/dim}] {pos:>7}/{len:7} {msg:.dim}", + ) + .unwrap() + .progress_chars("=>-") + .tick_strings(&["←", "↖", "↑", "↗", "→", "↘", "↓", "↙"]); +} + +#[expect(clippy::unwrap_used)] +pub fn spinner_style_list() -> ProgressStyle { + return ProgressStyle::default_bar() + .template(" {spinner:.green} {elapsed_precise:.dim} {msg:.dim}") + .unwrap() + .tick_strings(&["⎽", "⎼", "―", "⎻", "⎺", "⎻", "―", "⎼"]); +} pub fn get_styles() -> clap::builder::Styles { clap::builder::Styles::styled() diff --git a/src/tool/bash.rs b/src/tool/bash.rs index 9858392..438ce14 100644 --- a/src/tool/bash.rs +++ b/src/tool/bash.rs @@ -4,7 +4,7 @@ use std::io::Write; use std::{collections::HashMap, path::Path}; use tracing::{debug, error, trace, warn}; -use crate::manifest::PickConfig; +use crate::manifest::types::PickConfig; use super::{PickTool, TaskContext}; diff --git a/src/tool/mod.rs b/src/tool/mod.rs index 690d879..9db3b2c 100644 --- a/src/tool/mod.rs +++ b/src/tool/mod.rs @@ -8,7 +8,7 @@ use std::{ mod bash; pub use bash::*; -use crate::manifest::PickConfig; +use crate::manifest::types::PickConfig; pub trait PickTool: Debug + DeserializeOwned { /// Runs once, before all tasks diff --git a/test.toml b/test.toml deleted file mode 100644 index abd4179..0000000 --- a/test.toml +++ /dev/null @@ -1,59 +0,0 @@ -# All paths are relative to workdir. -# Workdir is this file's parent by default. -# If workdir is relative, it is relative to this file's parent. -[config] -work_dir = "./music" -# follow_links: if true, follow symlinks (default false) -# dirs: if true, act on directories (default false) -# files: if true, act on regular files (default true) -# links: if true, act on symlinks (default false. throw an error if this is provided with follow_links) - -[tool.bash] -script.test = """ -mkdir -p "$(dirname "../out/${PICK_RELATIVE}")" - -filename="${PICK_RELATIVE%.*}" - -ffmpeg \ - -i "${PICK_FILE}" \ - -map_metadata 0 \ - -id3v2_version 3 \ - -b:a 192k \ - -loglevel error \ - -hide_banner -n \ - "../out/${filename}.mp3" -""" - -script.ogg = """ -mkdir -p "$(dirname "../out/${PICK_RELATIVE}")" - -filename="${PICK_RELATIVE%.*}" - -ffmpeg \ - -i "${PICK_FILE}" \ - -c:v libtheora \ - -q:v 10 \ - -c:a libopus \ - -b:a 192k \ - -loglevel error \ - -hide_banner -n \ - "../out/${filename}.ogg" -""" - - -# The first rule to match a path is run. -# Paths are checked relative to source. -# "/source/path/to/file.gz" becomes "path/to/file.gz" -# -# a "path segment" is a single file or directory. -# -# * matches exactly one path segment. In regex, this is [^/]+ -# ** matches zero or more path segments. In regex, this is ([^/]+)* -# -# All rules are matched against the FULL PATH of files. -# Directories are ignored. -[[rules]] -"**.flac" = "test" - -[[rules]] -"**" = ""