Refactor
All checks were successful
CI / Typos (push) Successful in 8s
CI / Clippy (push) Successful in 54s
CI / Build and test (push) Successful in 52s

This commit is contained in:
2025-05-03 16:42:33 -07:00
parent b8302d3381
commit 64f45a443c
17 changed files with 1229 additions and 996 deletions

View File

@ -29,9 +29,9 @@ jobs:
sudo apt update
DEBIAN_FRONTEND=noninteractive \
sudo apt install --yes rustup
rustup default stable
- name: Run clippy
working-directory: ./index
run: cargo clippy --all-targets --all-features
buildandtest:
@ -45,11 +45,10 @@ jobs:
sudo apt update
DEBIAN_FRONTEND=noninteractive \
sudo apt install --yes rustup
rustup default stable
- name: Build
working-directory: ./index
run: cargo build --release
- name: Test
working-directory: ./index
run: cargo test --release

143
Cargo.lock generated
View File

@ -73,6 +73,12 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "bumpalo"
version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -125,6 +131,25 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "console"
version = "0.15.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"unicode-width",
"windows-sys",
]
[[package]]
name = "encode_unicode"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "equivalent"
version = "1.0.2"
@ -182,12 +207,36 @@ dependencies = [
"serde",
]
[[package]]
name = "indicatif"
version = "0.17.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
dependencies = [
"console",
"number_prefix",
"portable-atomic",
"unicode-segmentation",
"unicode-width",
"web-time",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@ -237,6 +286,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "once_cell"
version = "1.21.3"
@ -251,12 +306,13 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "pick"
version = "0.1.0"
version = "0.0.1"
dependencies = [
"anstyle",
"anyhow",
"clap",
"indexmap",
"indicatif",
"regex",
"serde",
"tempfile",
@ -272,6 +328,12 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "portable-atomic"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
[[package]]
name = "proc-macro2"
version = "1.0.95"
@ -555,6 +617,18 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
[[package]]
name = "utf8parse"
version = "0.2.2"
@ -586,6 +660,73 @@ dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]]
name = "web-time"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"

View File

@ -1,6 +1,6 @@
[package]
name = "pick"
version = "0.1.0"
version = "0.0.1"
edition = "2024"
#
@ -69,3 +69,4 @@ tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
walkdir = "2.5.0"
tempfile = "3.10.1"
anyhow = "1.0.98"
indicatif = { version = "0.17.11", features = ["improved_unicode"] }

100
README.md Normal file
View File

@ -0,0 +1,100 @@
# ⛏️ Pick
Pick is a utility that processes files based on pattern matching rules.
## Usage
- `pick manifest.toml` to run a task
- `pick --help` for documentation
- `pick gen manifest.toml` generate a sample manifest
A detailed manifest specification is below.
# Writing manifests
A pick manifest is a TOML file with three main sections:
- `config`: Global configuration settings
- `tool`: Tool configuration
- `rules`: Patterns that tell us which files to process
See [`sample.toml`](./sample.toml) for an example configuration.
## Selector Pattern Syntax
Pick uses patterns to select files to process.
- Patterns match against the full file path relative to the source directory
- The first matching rule is applied to each file. Once a rule matches, all others are ignored.
- Patterns are matched in the order they are defined.
- Leading and trailing slashes are ignored
- Multiple consecutive slashes are treated as a single slash
### Wildcards
- `*`: Matches exactly one path segment (one directory or filename component)
- `**`: Matches zero or more path segments (can span across multiple directories)
### Pattern Rules
### Syntax Examples
| Pattern | Description | Matches | Doesn't Match |
|---------|-------------|---------|---------------|
| `file.txt` | Exact file match | `file.txt` | `other.txt`, `dir/file.txt` |
| `dir/file.txt` | Exact path match | `dir/file.txt` | `file.txt`, `other/file.txt` |
| `*.txt` | Any file with .txt extension in root | `file.txt`, `other.txt` | `file.jpg`, `dir/file.txt` |
| `**/*.txt` | Any .txt file anywhere | `file.txt`, `dir/file.txt`, `a/b/c.txt` | `file.jpg` |
| `dir/**` | Any file under dir | `dir/file.txt`, `dir/sub/file.jpg` | `root/file.txt` |
| `**/dir` | Any dir named "dir" | `dir`, `a/b/dir` | `dir/file`, `dirname` |
| `root/**test` (same as `root/**/test`) | Files named "test" in any subdir of root | `root/test`, `root/a/b/test` | `root/testfile`, `root/file` |
## TOML Rule Structure
### Simple rules:
```toml
[[rules]]
"a/**" = "task"
"b/**" = "task"
```
### Nested rules:
```toml
[[rules."a"]]
"1/**" = "task"
"2/**" = "task"
# Equivalent to:
[[rules]]
"a/1/**" = "task"
"a/2/**" = "task"
```
Nested rules may use wildcards:
```toml
[[rules."a/**/"]]
"1/**" = "task"
"2/**" = "task"
# Equivalent to:
[[rules]]
"a/**/1/**" = "task"
"a/**/2/**" = "task"
```
# Tools
## Bash
Executes bash scripts. The following environment variables are available:
- `PICK_FILE`: Absolute path to the current file
- `PICK_RELATIVE`: Relative path (from the source directory)

16
default.nix Normal file
View File

@ -0,0 +1,16 @@
{ lib, fetchgit, rustPlatform }:
rustPlatform.buildRustPackage rec {
pname = "pick";
version = "0.0.1";
cargoLock.lockFile = src + /Cargo.lock;
src = builtins.fetchGit {
url = "ssh://git@git.betalupi.com:33/Mark/pick.git";
ref = "refs/tags/v${version}";
};
meta = with lib; {
description = "A utility that processes files based on pattern matching rules";
homepage = "hhttps://git.betalupi.com/Mark/pick";
};
}

48
sample.toml Normal file
View File

@ -0,0 +1,48 @@
# All paths are relative to workdir.
# Workdir is this file's parent by default.
# If workdir is relative, it is relative to this file's parent.
[config]
work_dir = "/mnt/hdd/media/Media/Music/Library"
# If true, follow symlinks
# Conflicts with `links`.
# follow_links = false
# If true, act on directories
# process_dirs = false
# If true, act on regular files
# process_files = true
# If true, act on symlinks.
# Conflicts with `follow_links.
# process_links = false
# The first rule to match a path is run. Files that match no rules are ignored.
# Paths are checked relative to the source directory:
# e.g, "/source/path/to/file.gz" is matched as "path/to/file.gz"
#
# Each pattern is matched _exactly_ against the full path of each file.
# e.g, `file` will _only_ match `/source/file`. This is not a .gitignore.
# To match batches of files, use wildcards.
#
# Wildcards:
# - `*` matches exactly one path segment. It will _not_ match across directories.
# - `*.flac` matches all flac files
# - `/a/*/file.txt` matches all files named `file.txt` in subdirectories of `a`.
# this will _not_ match `/a/b/c/file.txt`.
#
# - `**` matches zero or more path segments.
# - `**` will match all files.
# - `**/*.flac` will match all files ending in ".flac"
# - The ** wildcard should always be surrounded by slashes.
# if you omit them, they will be implied. Avoid doing this:
# - `**.flac` is equivalent to `**/.flac`.
# - `a**b` is equivalent to `a/**/b`
#
# Tasks:
# - See the docs for a list of tasks we support.
# - An empty string (as below) always means "ignore this file".
[[rules]]
"**" = ""

View File

@ -132,8 +132,6 @@ impl LoggingPreset {
}
}
/// A pre-baked set of loglevel cli arguments.
///
/// # Usage
/// ```ignore
/// #[derive(Parser, Debug)]

View File

@ -1,39 +1,44 @@
use anyhow::{Context, Result};
use anyhow::Result;
use clap::Parser;
use indicatif::ProgressIterator;
use logging::LogCli;
use manifest::Manifest;
use std::{path::PathBuf, process::ExitCode};
use tool::{PickTool, TaskContext};
use style::progress_style;
use tool::PickTool;
use tracing::{debug, error, trace};
use walkdir::WalkDir;
pub mod logging;
pub mod manifest;
pub mod style;
pub mod tool;
pub mod util;
// enumerate files with a spinner (count size)
// trim everything
mod prepare;
// count size of files to process
// parallelism
// input from stdin?
// show progress
// capture/print stdout/stderr
// workdir vs root?
// workdir vs root
// package & auto-build
// Generate demo toml with contents
//
// Tools:
// - * list
// - *** rename
// - * list (m3u)
// - *** rename and retag
// - ** typst
// - *** retag
// - gitea pkg (POST)
// - s3
// - rsync
//
// Later:
// chain tools
// print output?
// pattern capture groups
// input from stdin
// improve docs (inline!)
// warn when no matches
/// Pick is a utility that processes files based on pattern matching rules.
#[derive(Parser, Debug)]
#[command(version, about, long_about = None, styles=util::get_styles())]
#[command(version, about, long_about = None, styles=style::get_styles())]
struct Cli {
#[command(flatten)]
log: LogCli,
@ -58,7 +63,9 @@ fn main_inner() -> Result<ExitCode> {
// MARK: setup
//
let cli = Cli::parse();
let mut cli = Cli::parse();
cli.manifest = std::path::absolute(&cli.manifest)?;
let cli = cli;
tracing_subscriber::fmt()
.with_env_filter(cli.log.to_preset().get_config())
@ -67,145 +74,27 @@ fn main_inner() -> Result<ExitCode> {
.with_writer(std::io::stderr)
.init();
let manifest_path_str = cli
.manifest
.to_str()
.context("while converting path to string")?;
let manifest = prepare::load_manifest(&cli)?;
if !cli.manifest.is_file() {
error!("Manifest {manifest_path_str} isn't a file");
return Ok(ExitCode::FAILURE);
}
let manifest_string = match std::fs::read_to_string(&cli.manifest) {
Ok(x) => x,
Err(error) => {
error!("Error while reading {manifest_path_str}: {error}");
return Ok(ExitCode::FAILURE);
}
};
let manifest = match toml::from_str::<Manifest>(&manifest_string) {
Ok(manifest) => {
// Validate manifest
if manifest.config.follow_links && manifest.config.links {
error!("Error: `follow_links` and `links` are mutually exclusive");
return Ok(ExitCode::FAILURE);
}
manifest
}
Err(error) => {
error!("Error while parsing {manifest_path_str}");
error!("{}", error.to_string());
return Ok(ExitCode::FAILURE);
}
};
let manifest_path = std::path::absolute(cli.manifest)?;
let work_dir = manifest.config.work_dir(&manifest_path)?;
let work_dir = manifest.config.work_dir(&cli.manifest)?;
debug!("Working directory is {work_dir:?}");
//
// MARK: rules
//
let rules = {
let mut rules = Vec::new();
for rule in &manifest.rules {
rules.push((rule.regex()?, rule.tasks));
}
rules
};
let source_path = std::path::absolute(&work_dir)?;
let walker = WalkDir::new(&source_path).follow_links(manifest.config.follow_links);
let queue = prepare::list_queue(&manifest, &work_dir)?;
#[expect(clippy::unwrap_used)] // Fix later
let bash = manifest.tool.bash.as_ref().unwrap();
bash.before(&manifest_path, &manifest.config)?;
bash.before(&cli.manifest, &manifest.config)?;
for entry in walker {
let entry = entry?;
let path_abs = std::path::absolute(entry.path())?;
// This path is a child of source_path, so this cannot fail
#[expect(clippy::unwrap_used)]
let path_rel = entry.path().strip_prefix(&source_path).unwrap();
let path_rel = if path_rel.parent().is_none() {
// Make sure we never have empty string paths
// (makes logs clearer)
PathBuf::from(".").join(path_rel)
} else {
path_rel.to_path_buf()
};
let path_abs_str = path_abs
.to_str()
.context("could not convert path to string")?
.to_owned();
let path_rel_str = path_rel
.to_str()
.context("could not convert path to string")?
.to_owned();
if path_abs.is_symlink() && !manifest.config.links {
trace!("Skipping {}, is a symlink", path_rel_str);
continue;
for ctx in queue
.into_iter()
.progress_with_style(progress_style())
.with_message("Processing")
{
trace!("Running `{}` on {}", ctx.task, ctx.path_rel_str);
bash.run(&cli.manifest, &manifest.config, ctx)?;
}
if path_abs.is_dir() && !manifest.config.dirs {
trace!("Skipping {}, is a directory", path_rel_str);
continue;
}
if path_abs.is_file() && !manifest.config.files {
trace!("Skipping {}, is a file", path_rel_str);
continue;
}
let task = rules.iter().find(|(r, _)| r.is_match(&path_rel_str));
let tasks = match task {
None => {
trace!("Skipping {}, no match", path_rel_str);
continue;
}
Some(x) => {
let tasks: Vec<String> =
x.1.iter()
.map(|x| x.trim())
.filter(|x| !x.is_empty())
.map(|x| x.to_owned())
.collect();
if tasks.is_empty() {
trace!("Skipping {}", path_rel_str);
continue;
}
tasks
}
};
let base_ctx = TaskContext {
task: "".into(),
path_abs,
path_abs_str,
path_rel,
path_rel_str,
};
for task in tasks {
trace!("Running `{task}` on {}", base_ctx.path_rel_str);
let mut ctx = base_ctx.clone();
ctx.task = task;
bash.run(&manifest_path, &manifest.config, ctx)?;
}
}
bash.after(&manifest_path, &manifest.config)?;
bash.after(&cli.manifest, &manifest.config)?;
return Ok(ExitCode::SUCCESS);
}

View File

@ -1,781 +0,0 @@
use anyhow::{Result, bail};
use indexmap::IndexMap;
use regex::Regex;
use serde::Deserialize;
use std::path::{Path, PathBuf};
use tracing::warn;
use crate::tool::ToolConfig;
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Manifest {
pub config: PickConfig,
pub tool: ToolConfig,
pub rules: PickRules,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PickConfig {
#[serde(default)]
pub work_dir: Option<PathBuf>,
#[serde(default = "default_false")]
pub follow_links: bool,
#[serde(default = "default_true")]
pub files: bool,
#[serde(default = "default_false")]
pub dirs: bool,
#[serde(default = "default_false")]
pub links: bool,
}
impl PickConfig {
pub fn work_dir(&self, manifest_path: &Path) -> Result<PathBuf> {
// Parent directory should always exist since manifest is a file.
#[expect(clippy::unwrap_used)]
let p = manifest_path.parent().unwrap().to_path_buf();
match &self.work_dir {
None => Ok(p),
Some(path) => {
if path.is_absolute() {
Ok(path.to_owned())
} else {
Ok(std::path::absolute(p.join(path))?)
}
}
}
}
}
fn default_true() -> bool {
true
}
fn default_false() -> bool {
false
}
//
// MARK: rules
//
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum OptVec<T: Clone> {
Single(T),
Vec(Vec<T>),
}
impl<T: Clone> OptVec<T> {
pub fn len(&self) -> usize {
match self {
Self::Single(_) => 1,
Self::Vec(v) => v.len(),
}
}
pub fn is_empty(&self) -> bool {
match self {
Self::Single(_) => false,
Self::Vec(v) => v.is_empty(),
}
}
pub fn get(&self, idx: usize) -> Option<&T> {
match self {
Self::Single(t) => (idx == 0).then_some(t),
Self::Vec(v) => v.get(idx),
}
}
}
impl<T: Clone> From<OptVec<T>> for Vec<T> {
fn from(val: OptVec<T>) -> Self {
match val {
OptVec::Single(t) => vec![t],
OptVec::Vec(v) => v,
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
#[serde(deny_unknown_fields)]
pub enum PickRule {
Plain(OptVec<String>),
Nested(PickRules),
}
#[derive(Debug, Clone, Deserialize)]
#[serde(transparent)]
pub struct PickRules(OptVec<IndexMap<String, PickRule>>);
impl PickRules {
pub fn iter(&self) -> PickRuleIterator<'_> {
PickRuleIterator {
stack: vec![PickRuleIterState {
rules: self,
map_index: 0,
entry_index: 0,
prefix: Vec::new(),
}],
}
}
}
impl<'a> IntoIterator for &'a PickRules {
type Item = FlatPickRule;
type IntoIter = PickRuleIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
//
// MARK: rule iterator
//
#[derive(Debug, Clone)]
pub struct FlatPickRule {
pub patterns: Vec<String>,
pub tasks: Vec<String>,
}
#[derive(Debug)]
enum RegexSegment {
/// A single segment
Single(String),
/// An optional doublestar segment
DoubleStar,
}
impl RegexSegment {
/// Returns the regex pattern of this part,
/// prefixed with a /.
fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String {
match (prev, self, next) {
// Consecutive single segments need a trailing slash
(_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"),
// Terminal single segments don't need a trailing slash
(_, Self::Single(x), None) => x.to_owned(),
// Neighboring doublestar is always responsible for slashes
(_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(),
// [^/]+ is a "segment" (a block of non-slash chars)
// The "base" doublestar pattern is a segment
// followed by zero or more segments prefixed by a slash.
//
// No additional slashes
(None, Self::DoubleStar, None) => "((?:[^/]+(?:[/][^/]+)*)?)".into(),
// Doublestars cannot be neighbors
(_, Self::DoubleStar, Some(Self::DoubleStar))
| (Some(Self::DoubleStar), Self::DoubleStar, _) => {
unreachable!("consecutive doublestars must be reduced")
}
// Leading slash
(Some(Self::Single(_)), Self::DoubleStar, None) => {
"((?:[/][^/]+(?:[/][^/]+)*)?)".into()
}
// Trailing slash
(None, Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[^/]+(?:[/][^/]+)*[/])?)".into()
}
// Leading and trailing slash.
// Also, replace self with a [/] when empty.
(Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[/][^/]+(?:[/][^/]+)*[/])|[/])".into()
}
}
}
}
impl FlatPickRule {
pub fn regex(&self) -> Result<Regex> {
// Flatten pattern
// Double slashes are handled later
let pattern = self.patterns.join("/");
if pattern.ends_with("/") {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
}
if pattern.starts_with("/") {
warn!("Pattern `{pattern}` has a leading slash which will be ignored")
}
// Split on slashes or stars
// This is a lot like .split("/"), but handles
// the edge case where ** is not delimited by slashes
// (`root**test` is equivalent to `root/**/test`)
let segments = {
#[expect(clippy::unwrap_used)]
let re = Regex::new("[*]{2,}|[/]").unwrap();
let split = re.find_iter(&pattern);
let bounds = split
.into_iter()
.flat_map(|x| {
let r = x.range();
let a = r.start;
let b = r.end;
[a, b]
})
.chain([pattern.len()])
.collect::<Vec<_>>();
let mut parts = Vec::new();
let mut last = 0;
for next in bounds {
let seg = &pattern[last..next];
// Consecutive slashes are identical to a single slash
if seg != "/" && !seg.is_empty() {
parts.push(seg);
}
last = next;
}
parts
};
let mut rebuilt_segments = Vec::new();
let mut last_was_doublestar = false;
for segment in segments {
// This is a wilcard regex
// (**, ***, etc)
if segment.len() > 1 && segment.chars().all(|x| x == '*') {
match segment {
"**" => {
// Consecutive doublestars are meaningless
if !last_was_doublestar {
rebuilt_segments.push(RegexSegment::DoubleStar);
}
last_was_doublestar = true;
}
_ => bail!("Invalid wildcard `{segment}`"),
}
continue;
}
last_was_doublestar = false;
let parts = segment.split("*").collect::<Vec<_>>();
let mut rebuilt = String::new();
for (i, part) in parts.into_iter().enumerate() {
if i != 0 {
rebuilt.push_str("([^/]*)")
}
rebuilt.push_str(&regex::escape(part));
}
rebuilt_segments.push(RegexSegment::Single(rebuilt));
}
let mut re_built = String::new();
let mut prev = None;
for (i, seg) in rebuilt_segments.iter().enumerate() {
let next = rebuilt_segments.get(i + 1);
re_built.push_str(&seg.to_regex_part(prev, next));
prev = Some(seg);
}
let re_built = format!("^{re_built}$");
// This regex should always be valid
#[expect(clippy::unwrap_used)]
Ok(Regex::new(&re_built).unwrap())
}
}
struct PickRuleIterState<'a> {
rules: &'a PickRules,
map_index: usize,
entry_index: usize,
prefix: Vec<String>,
}
pub struct PickRuleIterator<'a> {
stack: Vec<PickRuleIterState<'a>>,
}
impl Iterator for PickRuleIterator<'_> {
type Item = FlatPickRule;
fn next(&mut self) -> Option<Self::Item> {
if self.stack.is_empty() {
return None;
}
#[expect(clippy::unwrap_used)]
let current = self.stack.last_mut().unwrap();
if current.map_index >= current.rules.0.len() {
self.stack.pop();
return self.next();
}
#[expect(clippy::unwrap_used)]
let current_map = &current.rules.0.get(current.map_index).unwrap();
if current.entry_index >= current_map.len() {
current.map_index += 1;
current.entry_index = 0;
return self.next();
}
#[expect(clippy::unwrap_used)]
let (key, value) = current_map.get_index(current.entry_index).unwrap();
current.entry_index += 1;
match value {
PickRule::Plain(task) => {
let mut patterns = current.prefix.clone();
patterns.push(key.to_string());
Some(FlatPickRule {
patterns,
tasks: task.clone().into(),
})
}
PickRule::Nested(nested_rules) => {
let mut prefix = current.prefix.clone();
prefix.push(key.to_string());
self.stack.push(PickRuleIterState {
rules: nested_rules,
map_index: 0,
entry_index: 0,
prefix,
});
self.next()
}
}
}
}
//
// MARK: tests
//
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug, Clone, Deserialize)]
struct TestManifest {
rules: PickRules,
}
#[test]
fn rule_ordering_preserved() {
let toml_str = r#"
[[rules]]
"third" = "c"
"first" = "a"
"second" = "b"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 3);
assert_eq!(rules[0].patterns, vec!["third"]);
assert_eq!(rules[0].tasks, vec!["c"]);
assert_eq!(rules[1].patterns, vec!["first"]);
assert_eq!(rules[1].tasks, vec!["a"]);
assert_eq!(rules[2].patterns, vec!["second"]);
assert_eq!(rules[2].tasks, vec!["b"]);
}
#[test]
fn nested_rules_order() {
let toml_str = r#"
[[rules]]
"a" = "task_a"
"b" = "task_b"
[[rules."nested"]]
"c" = "task_c"
"d" = "task_d"
[[rules]]
"e" = "task_e"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 5);
assert_eq!(rules[0].patterns, vec!["a"]);
assert_eq!(rules[0].tasks, vec!["task_a"]);
assert_eq!(rules[1].patterns, vec!["b"]);
assert_eq!(rules[1].tasks, vec!["task_b"]);
assert_eq!(rules[2].patterns, vec!["nested", "c"]);
assert_eq!(rules[2].tasks, vec!["task_c"]);
assert_eq!(rules[3].patterns, vec!["nested", "d"]);
assert_eq!(rules[3].tasks, vec!["task_d"]);
assert_eq!(rules[4].patterns, vec!["e"]);
assert_eq!(rules[4].tasks, vec!["task_e"]);
}
#[test]
fn deeply_nested_rules() {
let toml_str = r#"
[[rules."a"."b"."c"]]
"d" = "task_d"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].patterns, vec!["a", "b", "c", "d"]);
assert_eq!(rules[0].tasks, vec!["task_d"]);
}
#[test]
fn multiple_maps_same_level() {
let toml_str = r#"
[[rules]]
"a1" = "copy"
"a2" = "ignore"
[[rules]]
"b1" = "copy"
"b2" = "ignore"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 4);
assert_eq!(rules[0].patterns, vec!["a1"]);
assert_eq!(rules[0].tasks, vec!["copy"]);
assert_eq!(rules[1].patterns, vec!["a2"]);
assert_eq!(rules[1].tasks, vec!["ignore"]);
assert_eq!(rules[2].patterns, vec!["b1"]);
assert_eq!(rules[2].tasks, vec!["copy"]);
assert_eq!(rules[3].patterns, vec!["b2"]);
assert_eq!(rules[3].tasks, vec!["ignore"]);
}
#[test]
fn empty_rules_list() {
let toml_str = r#"
[[rules]]
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 0);
}
#[test]
fn mixed_rule_types() {
let toml_str = r#"
[[rules]]
"plain" = "copy"
"nested" = { invalid_as_string = true }
"#;
let result = toml::from_str::<TestManifest>(toml_str);
assert!(result.is_err());
}
#[test]
fn pattern_simple() {
let rule = FlatPickRule {
patterns: vec!["file.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("other.txt"));
assert!(!regex.is_match("path/file.txt"));
}
#[test]
fn pattern_with_path() {
let rule = FlatPickRule {
patterns: vec!["dir".to_string(), "file.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("file.txt"));
assert!(!regex.is_match("other/file.txt"));
}
#[test]
fn pattern_wildcard_simple() {
let rule = FlatPickRule {
patterns: vec!["*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("other.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("nested/file.txt"));
}
#[test]
fn pattern_doublestar() {
let rule = FlatPickRule {
patterns: vec!["**".to_string(), "*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn pattern_doublestar_consecutive() {
let rule = FlatPickRule {
patterns: vec![
"**".to_string(),
"**".to_string(),
"**".to_string(),
"*.txt".to_string(),
],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn pattern_wildcard_double_slash() {
let rule = FlatPickRule {
patterns: vec!["**/*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn pattern_single_dual() {
let rule = FlatPickRule {
patterns: vec!["**/*a*".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("fileafile"));
assert!(regex.is_match("dir/fileafile"));
assert!(regex.is_match("filea"));
assert!(regex.is_match("dir/filea"));
assert!(regex.is_match("afile"));
assert!(regex.is_match("dir/afile"));
assert!(!regex.is_match("noletter"));
assert!(!regex.is_match("dir/noletter"));
}
#[test]
fn pattern_single_end() {
let rule = FlatPickRule {
patterns: vec!["**/*".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("file"));
assert!(regex.is_match("dir/file"));
}
#[test]
fn pattern_double_end() {
let rule = FlatPickRule {
patterns: vec!["root/**".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn pattern_double_start() {
let rule = FlatPickRule {
patterns: vec!["**/dir".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir"));
assert!(regex.is_match("a/b/dir"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn pattern_double_adjacent_before() {
let rule = FlatPickRule {
// equivalent to root/**/test
patterns: vec!["root/**test".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/a/test"));
assert!(regex.is_match("root/a/b/c/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/xxtest"));
}
#[test]
fn pattern_double_adjacent_after() {
let rule = FlatPickRule {
// equivalent to root/test/**
patterns: vec!["root/test**".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/test/a"));
assert!(regex.is_match("root/test/a/b/c"));
assert!(!regex.is_match("root/testxx"));
assert!(!regex.is_match("root/file"));
}
#[test]
fn pattern_bad_any_extension() {
let rule = FlatPickRule {
// equivalent to root/test/**
patterns: vec!["**.flac".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(!regex.is_match("root/test.flac"));
assert!(!regex.is_match("test.flac"));
assert!(!regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn pattern_good_any_extension() {
let rule = FlatPickRule {
// equivalent to root/test/**
patterns: vec!["**/*.flac".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(regex.is_match("root/test.flac"));
assert!(regex.is_match("test.flac"));
assert!(regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn pattern_double_adjacent_between() {
let rule = FlatPickRule {
// equivalent to root/test/**/file
patterns: vec!["root/test**file".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/test/a/b/c/file"));
assert!(!regex.is_match("root/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/testfile"));
assert!(!regex.is_match("root/testxxfile"));
}
#[test]
fn pattern_double_slashes() {
let rule = FlatPickRule {
patterns: vec!["dir//file.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("dirfile.txt"));
assert!(!regex.is_match("dir/other.txt"));
}
#[test]
fn pattern_double_slash() {
let rule = FlatPickRule {
patterns: vec!["a///b////c.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("a/b/c.txt"));
assert!(!regex.is_match("abc.txt"));
assert!(!regex.is_match("a/b/d.txt"));
}
#[test]
fn pattern_double_slash_wildcards() {
let rule = FlatPickRule {
patterns: vec!["**///*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
}
#[test]
fn pattern_slashes_around_wildcards() {
let rule = FlatPickRule {
patterns: vec!["dir//**//*.txt".to_string()],
tasks: vec!["copy".to_string()],
};
let regex = rule.regex().unwrap();
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(regex.is_match("dir/sub1/sub2/file.txt"));
assert!(!regex.is_match("other/sub/file.txt"));
assert!(!regex.is_match("dir/file.jpg"));
}
}

2
src/manifest/mod.rs Normal file
View File

@ -0,0 +1,2 @@
pub mod rule;
pub mod types;

357
src/manifest/rule.rs Normal file
View File

@ -0,0 +1,357 @@
use anyhow::{Result, bail};
use regex::Regex;
use tracing::warn;
#[derive(Debug)]
enum RegexSegment {
/// A single segment
Single(String),
/// An optional doublestar segment
DoubleStar,
}
impl RegexSegment {
/// Returns the regex pattern of this part,
/// prefixed with a /.
fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String {
match (prev, self, next) {
// Consecutive single segments need a trailing slash
(_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"),
// Terminal single segments don't need a trailing slash
(_, Self::Single(x), None) => x.to_owned(),
// Neighboring doublestar is always responsible for slashes
(_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(),
// [^/]+ is a "segment" (a block of non-slash chars)
// The "base" doublestar pattern is a segment
// followed by zero or more segments prefixed by a slash.
//
// No additional slashes
(None, Self::DoubleStar, None) => "((?:[^/]+(?:[/][^/]+)*)?)".into(),
// Doublestars cannot be neighbors
(_, Self::DoubleStar, Some(Self::DoubleStar))
| (Some(Self::DoubleStar), Self::DoubleStar, _) => {
unreachable!("consecutive doublestars must be reduced")
}
// Leading slash
(Some(Self::Single(_)), Self::DoubleStar, None) => {
"((?:[/][^/]+(?:[/][^/]+)*)?)".into()
}
// Trailing slash
(None, Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[^/]+(?:[/][^/]+)*[/])?)".into()
}
// Leading and trailing slash.
// Also, replace self with a [/] when empty.
(Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[/][^/]+(?:[/][^/]+)*[/])|[/])".into()
}
}
}
}
#[derive(Debug, Clone)]
pub struct FlatPickRule {
pub patterns: Vec<String>,
pub tasks: Vec<String>,
}
impl FlatPickRule {
pub fn regex(&self) -> Result<Regex> {
// Flatten pattern
// Double slashes are handled later
let pattern = self.patterns.join("/");
if pattern.ends_with("/") {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
}
if pattern.starts_with("/") {
warn!("Pattern `{pattern}` has a leading slash which will be ignored")
}
// Split on slashes or stars
// This is a lot like .split("/"), but handles
// the edge case where ** is not delimited by slashes
// (`root**test` is equivalent to `root/**/test`)
let segments = {
#[expect(clippy::unwrap_used)]
let re = Regex::new("[*]{2,}|[/]").unwrap();
let split = re.find_iter(&pattern);
let bounds = split
.into_iter()
.flat_map(|x| {
let r = x.range();
let a = r.start;
let b = r.end;
[a, b]
})
.chain([pattern.len()])
.collect::<Vec<_>>();
let mut parts = Vec::new();
let mut last = 0;
for next in bounds {
let seg = &pattern[last..next];
// Consecutive slashes are identical to a single slash
if seg != "/" && !seg.is_empty() {
parts.push(seg);
}
last = next;
}
parts
};
let mut rebuilt_segments = Vec::new();
let mut last_was_doublestar = false;
for segment in segments {
// This is a wildcard regex
// (**, ***, etc)
if segment.len() > 1 && segment.chars().all(|x| x == '*') {
match segment {
"**" => {
// Consecutive doublestars are meaningless
if !last_was_doublestar {
rebuilt_segments.push(RegexSegment::DoubleStar);
}
last_was_doublestar = true;
}
_ => bail!("Invalid wildcard `{segment}`"),
}
continue;
}
last_was_doublestar = false;
let parts = segment.split("*").collect::<Vec<_>>();
let mut rebuilt = String::new();
for (i, part) in parts.into_iter().enumerate() {
if i != 0 {
rebuilt.push_str("([^/]*)")
}
rebuilt.push_str(&regex::escape(part));
}
rebuilt_segments.push(RegexSegment::Single(rebuilt));
}
let mut re_built = String::new();
let mut prev = None;
for (i, seg) in rebuilt_segments.iter().enumerate() {
let next = rebuilt_segments.get(i + 1);
re_built.push_str(&seg.to_regex_part(prev, next));
prev = Some(seg);
}
let re_built = format!("^{re_built}$");
// This regex should always be valid
#[expect(clippy::unwrap_used)]
Ok(Regex::new(&re_built).unwrap())
}
}
//
// MARK: tests
//
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod tests {
use super::*;
fn rule_regex(pattern: &[&str]) -> Regex {
let rule = FlatPickRule {
patterns: pattern.iter().map(|x| x.to_string()).collect(),
tasks: vec!["task".to_owned()],
};
return rule.regex().unwrap();
}
#[test]
fn simple() {
let regex = rule_regex(&["file.txt"]);
assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("other.txt"));
assert!(!regex.is_match("path/file.txt"));
}
#[test]
fn simple_dir() {
let regex = rule_regex(&["dir", "file.txt"]);
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("file.txt"));
assert!(!regex.is_match("other/file.txt"));
}
#[test]
fn simple_star() {
let regex = rule_regex(&["*.txt"]);
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("other.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("nested/file.txt"));
}
#[test]
fn simple_doublestar() {
let regex = rule_regex(&["**/*.txt"]);
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn consecutive_doublestar() {
let regex = rule_regex(&["**", "**", "**", "*.txt"]);
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn dual_star() {
let regex = rule_regex(&["**/*a*"]);
assert!(regex.is_match("fileafile"));
assert!(regex.is_match("dir/fileafile"));
assert!(regex.is_match("filea"));
assert!(regex.is_match("dir/filea"));
assert!(regex.is_match("afile"));
assert!(regex.is_match("dir/afile"));
assert!(!regex.is_match("noletter"));
assert!(!regex.is_match("dir/noletter"));
}
#[test]
fn single_end() {
let regex = rule_regex(&["**/*"]);
assert!(regex.is_match("file"));
assert!(regex.is_match("dir/file"));
assert!(regex.is_match("a/b/c/dir/file"));
}
#[test]
fn doublestar_end() {
let regex = rule_regex(&["root/**"]);
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn doublestar_start() {
let regex = rule_regex(&["**/dir"]);
assert!(regex.is_match("dir"));
assert!(regex.is_match("a/b/dir"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn doublestar_adjacent_before() {
let regex = rule_regex(&["root/**test"]);
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/a/test"));
assert!(regex.is_match("root/a/b/c/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/xxtest"));
}
#[test]
fn doublestar_adjacent_after() {
let regex = rule_regex(&["root/test**"]);
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/test/a"));
assert!(regex.is_match("root/test/a/b/c"));
assert!(!regex.is_match("root/testxx"));
assert!(!regex.is_match("root/file"));
}
#[test]
fn doublestar_adjacent_middle() {
let regex = rule_regex(&["root/test**file"]);
assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/test/a/b/c/file"));
assert!(!regex.is_match("root/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/testfile"));
assert!(!regex.is_match("root/testxxfile"));
}
#[test]
fn doublestar_bad_extension() {
let regex = rule_regex(&["**.flac"]);
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(!regex.is_match("root/test.flac"));
assert!(!regex.is_match("test.flac"));
assert!(!regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn doublestar_good_extension() {
let regex = rule_regex(&["**/*.flac"]);
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(regex.is_match("root/test.flac"));
assert!(regex.is_match("test.flac"));
assert!(regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn multi_slash_a() {
let regex = rule_regex(&["dir//file.txt"]);
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("dirfile.txt"));
assert!(!regex.is_match("dir/other.txt"));
}
#[test]
fn multi_slash_b() {
let regex = rule_regex(&["**///*.txt"]);
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
}
#[test]
fn multi_slash_c() {
let regex = rule_regex(&["///dir//**//*.txt//"]);
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(regex.is_match("dir/sub1/sub2/file.txt"));
assert!(!regex.is_match("other/sub/file.txt"));
assert!(!regex.is_match("dir/file.jpg"));
}
}

341
src/manifest/types.rs Normal file
View File

@ -0,0 +1,341 @@
use anyhow::Result;
use indexmap::IndexMap;
use serde::Deserialize;
use std::path::{Path, PathBuf};
use crate::tool::ToolConfig;
use super::rule::FlatPickRule;
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Manifest {
pub config: PickConfig,
pub tool: ToolConfig,
pub rules: PickRules,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PickConfig {
#[serde(default)]
pub work_dir: Option<PathBuf>,
#[serde(default = "default_false")]
pub follow_links: bool,
#[serde(default = "default_true")]
pub process_files: bool,
#[serde(default = "default_false")]
pub process_dirs: bool,
#[serde(default = "default_false")]
pub process_links: bool,
}
impl PickConfig {
pub fn work_dir(&self, manifest_path: &Path) -> Result<PathBuf> {
// Parent directory should always exist since manifest is a file.
#[expect(clippy::unwrap_used)]
let p = manifest_path.parent().unwrap().to_path_buf();
match &self.work_dir {
None => Ok(p),
Some(path) => {
if path.is_absolute() {
Ok(path.to_owned())
} else {
Ok(std::path::absolute(p.join(path))?)
}
}
}
}
}
fn default_true() -> bool {
true
}
fn default_false() -> bool {
false
}
//
// MARK: rules
//
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum OptVec<T: Clone> {
Single(T),
Vec(Vec<T>),
}
impl<T: Clone> OptVec<T> {
pub fn len(&self) -> usize {
match self {
Self::Single(_) => 1,
Self::Vec(v) => v.len(),
}
}
pub fn is_empty(&self) -> bool {
match self {
Self::Single(_) => false,
Self::Vec(v) => v.is_empty(),
}
}
pub fn get(&self, idx: usize) -> Option<&T> {
match self {
Self::Single(t) => (idx == 0).then_some(t),
Self::Vec(v) => v.get(idx),
}
}
}
impl<T: Clone> From<OptVec<T>> for Vec<T> {
fn from(val: OptVec<T>) -> Self {
match val {
OptVec::Single(t) => vec![t],
OptVec::Vec(v) => v,
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
#[serde(deny_unknown_fields)]
pub enum PickRule {
Plain(OptVec<String>),
Nested(PickRules),
}
#[derive(Debug, Clone, Deserialize)]
#[serde(transparent)]
pub struct PickRules(OptVec<IndexMap<String, PickRule>>);
impl PickRules {
pub fn iter(&self) -> PickRuleIterator<'_> {
PickRuleIterator {
stack: vec![PickRuleIterState {
rules: self,
map_index: 0,
entry_index: 0,
prefix: Vec::new(),
}],
}
}
}
impl<'a> IntoIterator for &'a PickRules {
type Item = FlatPickRule;
type IntoIter = PickRuleIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
//
// MARK: rule iterator
//
struct PickRuleIterState<'a> {
rules: &'a PickRules,
map_index: usize,
entry_index: usize,
prefix: Vec<String>,
}
pub struct PickRuleIterator<'a> {
stack: Vec<PickRuleIterState<'a>>,
}
impl Iterator for PickRuleIterator<'_> {
type Item = FlatPickRule;
fn next(&mut self) -> Option<Self::Item> {
if self.stack.is_empty() {
return None;
}
#[expect(clippy::unwrap_used)]
let current = self.stack.last_mut().unwrap();
if current.map_index >= current.rules.0.len() {
self.stack.pop();
return self.next();
}
#[expect(clippy::unwrap_used)]
let current_map = &current.rules.0.get(current.map_index).unwrap();
if current.entry_index >= current_map.len() {
current.map_index += 1;
current.entry_index = 0;
return self.next();
}
#[expect(clippy::unwrap_used)]
let (key, value) = current_map.get_index(current.entry_index).unwrap();
current.entry_index += 1;
match value {
PickRule::Plain(task) => {
let mut patterns = current.prefix.clone();
patterns.push(key.to_string());
Some(FlatPickRule {
patterns,
tasks: task.clone().into(),
})
}
PickRule::Nested(nested_rules) => {
let mut prefix = current.prefix.clone();
prefix.push(key.to_string());
self.stack.push(PickRuleIterState {
rules: nested_rules,
map_index: 0,
entry_index: 0,
prefix,
});
self.next()
}
}
}
}
//
// MARK: tests
//
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod tests {
use super::*;
#[derive(Debug, Clone, Deserialize)]
struct TestManifest {
rules: PickRules,
}
#[test]
fn rule_ordering_preserved() {
let toml_str = r#"
[[rules]]
"third" = "c"
"first" = "a"
"second" = "b"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 3);
assert_eq!(rules[0].patterns, vec!["third"]);
assert_eq!(rules[0].tasks, vec!["c"]);
assert_eq!(rules[1].patterns, vec!["first"]);
assert_eq!(rules[1].tasks, vec!["a"]);
assert_eq!(rules[2].patterns, vec!["second"]);
assert_eq!(rules[2].tasks, vec!["b"]);
}
#[test]
fn nested_rules_order() {
let toml_str = r#"
[[rules]]
"a" = "task_a"
"b" = "task_b"
[[rules."nested"]]
"c" = "task_c"
"d" = "task_d"
[[rules]]
"e" = "task_e"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 5);
assert_eq!(rules[0].patterns, vec!["a"]);
assert_eq!(rules[0].tasks, vec!["task_a"]);
assert_eq!(rules[1].patterns, vec!["b"]);
assert_eq!(rules[1].tasks, vec!["task_b"]);
assert_eq!(rules[2].patterns, vec!["nested", "c"]);
assert_eq!(rules[2].tasks, vec!["task_c"]);
assert_eq!(rules[3].patterns, vec!["nested", "d"]);
assert_eq!(rules[3].tasks, vec!["task_d"]);
assert_eq!(rules[4].patterns, vec!["e"]);
assert_eq!(rules[4].tasks, vec!["task_e"]);
}
#[test]
fn deeply_nested_rules() {
let toml_str = r#"
[[rules."a"."b"."c"]]
"d" = "task_d"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].patterns, vec!["a", "b", "c", "d"]);
assert_eq!(rules[0].tasks, vec!["task_d"]);
}
#[test]
fn multiple_maps_same_level() {
let toml_str = r#"
[[rules]]
"a1" = "copy"
"a2" = "ignore"
[[rules]]
"b1" = "copy"
"b2" = "ignore"
"#;
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 4);
assert_eq!(rules[0].patterns, vec!["a1"]);
assert_eq!(rules[0].tasks, vec!["copy"]);
assert_eq!(rules[1].patterns, vec!["a2"]);
assert_eq!(rules[1].tasks, vec!["ignore"]);
assert_eq!(rules[2].patterns, vec!["b1"]);
assert_eq!(rules[2].tasks, vec!["copy"]);
assert_eq!(rules[3].patterns, vec!["b2"]);
assert_eq!(rules[3].tasks, vec!["ignore"]);
}
#[test]
fn empty_rules_list() {
let toml_str = "
[[rules]]
";
let test_manifest: TestManifest = toml::from_str(toml_str).unwrap();
let rules: Vec<FlatPickRule> = test_manifest.rules.iter().collect();
assert_eq!(rules.len(), 0);
}
#[test]
fn mixed_rule_types() {
let toml_str = r#"
[[rules]]
"plain" = "copy"
"nested" = { invalid_as_string = true }
"#;
let result = toml::from_str::<TestManifest>(toml_str);
assert!(result.is_err());
}
}

161
src/prepare.rs Normal file
View File

@ -0,0 +1,161 @@
use anyhow::{Context, Result, bail};
use indicatif::ProgressBar;
use std::{
path::{Path, PathBuf},
time::Duration,
};
use tracing::{error, trace};
use walkdir::WalkDir;
use crate::{Cli, manifest::types::Manifest, style::spinner_style_list, tool::TaskContext};
pub fn load_manifest(cli: &Cli) -> Result<Manifest> {
let manifest_path_str = cli
.manifest
.to_str()
.context("while converting path to string")?;
if !cli.manifest.is_file() {
bail!("Manifest {manifest_path_str} isn't a file");
}
let manifest_string = match std::fs::read_to_string(&cli.manifest) {
Ok(x) => x,
Err(error) => {
bail!("Error while reading {manifest_path_str}: {error}");
}
};
let manifest = match toml::from_str::<Manifest>(&manifest_string) {
Ok(manifest) => {
// Validate manifest
if manifest.config.follow_links && manifest.config.process_links {
bail!("Error: `follow_links` and `links` are mutually exclusive");
}
manifest
}
Err(error) => {
error!("{}", error.to_string());
bail!("Error while parsing {manifest_path_str}");
}
};
return Ok(manifest);
}
pub fn list_queue(manifest: &Manifest, work_dir: &Path) -> Result<Vec<TaskContext>> {
let rules = {
let mut rules = Vec::new();
for rule in &manifest.rules {
rules.push((rule.regex()?, rule.tasks));
}
rules
};
let source_path = std::path::absolute(work_dir)?;
let walker = WalkDir::new(&source_path).follow_links(manifest.config.follow_links);
let mut queue = Vec::new();
let mut total = 0u64;
let spin = ProgressBar::new_spinner()
.with_style(spinner_style_list())
.with_message(format!(
"Listing files {} queued, {} skipped",
queue.len(),
total - queue.len() as u64,
));
spin.enable_steady_tick(Duration::from_millis(100));
for entry in walker {
total += 1;
spin.set_message(format!(
"Listing files {} queued, {} skipped",
queue.len(),
total - queue.len() as u64,
));
let entry = entry?;
let path_abs = std::path::absolute(entry.path())?;
// This path is a child of source_path, so this cannot fail
#[expect(clippy::unwrap_used)]
let path_rel = entry.path().strip_prefix(&source_path).unwrap();
let path_rel = if path_rel.parent().is_none() {
// Make sure we never have empty string paths
// (makes logs clearer)
PathBuf::from(".").join(path_rel)
} else {
path_rel.to_path_buf()
};
let path_abs_str = path_abs
.to_str()
.context("could not convert path to string")?
.to_owned();
let path_rel_str = path_rel
.to_str()
.context("could not convert path to string")?
.to_owned();
if path_abs.is_symlink() && !manifest.config.process_links {
trace!("Skipping {}, is a symlink", path_rel_str);
continue;
}
if path_abs.is_dir() && !manifest.config.process_dirs {
trace!("Skipping {}, is a directory", path_rel_str);
continue;
}
if path_abs.is_file() && !manifest.config.process_files {
trace!("Skipping {}, is a file", path_rel_str);
continue;
}
let task = rules.iter().find(|(r, _)| r.is_match(&path_rel_str));
let tasks = match task {
None => {
trace!("Skipping {}, no match", path_rel_str);
continue;
}
Some(x) => {
let tasks: Vec<String> =
x.1.iter()
.map(|x| x.trim())
.filter(|x| !x.is_empty())
.map(|x| x.to_owned())
.collect();
if tasks.is_empty() {
trace!("Skipping {}", path_rel_str);
continue;
}
tasks
}
};
let base_ctx = TaskContext {
task: "".into(),
path_abs,
path_abs_str,
path_rel,
path_rel_str,
};
for task in tasks {
let mut ctx = base_ctx.clone();
ctx.task = task;
queue.push(ctx);
}
}
spin.finish();
return Ok(queue);
}

View File

@ -1,4 +1,24 @@
use anstyle::{AnsiColor, Color, Style};
use indicatif::ProgressStyle;
#[expect(clippy::unwrap_used)]
pub fn progress_style() -> ProgressStyle {
return ProgressStyle::default_bar()
.template(
" {spinner:.green} [{elapsed_precise}] [{bar:40.green/dim}] {pos:>7}/{len:7} {msg:.dim}",
)
.unwrap()
.progress_chars("=>-")
.tick_strings(&["", "", "", "", "", "", "", ""]);
}
#[expect(clippy::unwrap_used)]
pub fn spinner_style_list() -> ProgressStyle {
return ProgressStyle::default_bar()
.template(" {spinner:.green} {elapsed_precise:.dim} {msg:.dim}")
.unwrap()
.tick_strings(&["", "", "", "", "", "", "", ""]);
}
pub fn get_styles() -> clap::builder::Styles {
clap::builder::Styles::styled()

View File

@ -4,7 +4,7 @@ use std::io::Write;
use std::{collections::HashMap, path::Path};
use tracing::{debug, error, trace, warn};
use crate::manifest::PickConfig;
use crate::manifest::types::PickConfig;
use super::{PickTool, TaskContext};

View File

@ -8,7 +8,7 @@ use std::{
mod bash;
pub use bash::*;
use crate::manifest::PickConfig;
use crate::manifest::types::PickConfig;
pub trait PickTool: Debug + DeserializeOwned {
/// Runs once, before all tasks

View File

@ -1,59 +0,0 @@
# All paths are relative to workdir.
# Workdir is this file's parent by default.
# If workdir is relative, it is relative to this file's parent.
[config]
work_dir = "./music"
# follow_links: if true, follow symlinks (default false)
# dirs: if true, act on directories (default false)
# files: if true, act on regular files (default true)
# links: if true, act on symlinks (default false. throw an error if this is provided with follow_links)
[tool.bash]
script.test = """
mkdir -p "$(dirname "../out/${PICK_RELATIVE}")"
filename="${PICK_RELATIVE%.*}"
ffmpeg \
-i "${PICK_FILE}" \
-map_metadata 0 \
-id3v2_version 3 \
-b:a 192k \
-loglevel error \
-hide_banner -n \
"../out/${filename}.mp3"
"""
script.ogg = """
mkdir -p "$(dirname "../out/${PICK_RELATIVE}")"
filename="${PICK_RELATIVE%.*}"
ffmpeg \
-i "${PICK_FILE}" \
-c:v libtheora \
-q:v 10 \
-c:a libopus \
-b:a 192k \
-loglevel error \
-hide_banner -n \
"../out/${filename}.ogg"
"""
# The first rule to match a path is run.
# Paths are checked relative to source.
# "/source/path/to/file.gz" becomes "path/to/file.gz"
#
# a "path segment" is a single file or directory.
#
# * matches exactly one path segment. In regex, this is [^/]+
# ** matches zero or more path segments. In regex, this is ([^/]+)*
#
# All rules are matched against the FULL PATH of files.
# Directories are ignored.
[[rules]]
"**.flac" = "test"
[[rules]]
"**" = ""