1
0
mirror of https://github.com/rm-dr/datapath.git synced 2025-12-07 20:04:13 -08:00
This commit is contained in:
2025-12-03 12:57:10 -08:00
committed by Mark
parent f51162478b
commit 08003a3fbe
8 changed files with 1000 additions and 6 deletions

186
Cargo.lock generated
View File

@@ -2,6 +2,15 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "bumpalo"
version = "3.19.0"
@@ -14,23 +23,68 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "datapath"
version = "0.0.2"
version = "0.0.3"
dependencies = [
"datapath-macro",
"itertools",
"regex",
"tokio",
"tracing",
"trie-rs",
"uuid",
]
[[package]]
name = "datapath-macro"
version = "0.0.2"
version = "0.0.3"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "fid-rs"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6956a1e60e2d1412b44b4169d44a03dae518f8583d3e10090c912c105e48447"
dependencies = [
"rayon",
]
[[package]]
name = "getrandom"
version = "0.3.4"
@@ -43,6 +97,15 @@ dependencies = [
"wasip2",
]
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]]
name = "js-sys"
version = "0.3.83"
@@ -59,12 +122,33 @@ version = "0.2.178"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
[[package]]
name = "louds-rs"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "936de6c22f08e7135a921f8ada907acd0d88880c4f42b5591f634b9f1dd8e07f"
dependencies = [
"fid-rs",
]
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "pin-project-lite"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "proc-macro2"
version = "1.0.103"
@@ -89,6 +173,55 @@ version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rustversion"
version = "1.0.22"
@@ -106,6 +239,55 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
dependencies = [
"pin-project-lite",
]
[[package]]
name = "tracing"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647"
dependencies = [
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tracing-core"
version = "0.1.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c"
dependencies = [
"once_cell",
]
[[package]]
name = "trie-rs"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6f88f4b0a1ebd6c3d16be3e45eb0e8089372ccadd88849b7ca162ba64b5e6f6"
dependencies = [
"louds-rs",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"

View File

@@ -11,7 +11,7 @@ readme = "README.md"
authors = ["rm-dr"]
# Don't forget to bump datapath-macro below!
version = "0.0.2"
version = "0.0.3"
[workspace.lints.rust]
unused_import_braces = "deny"
@@ -70,12 +70,16 @@ cargo_common_metadata = "deny"
#
[workspace.dependencies]
datapath-macro = { path = "crates/datapath-macro", version = "0.0.2" }
datapath-macro = { path = "crates/datapath-macro", version = "0.0.3" }
datapath = { path = "crates/datapath" }
chrono = "0.4.42"
itertools = "0.14.0"
proc-macro2 = "1.0.103"
quote = "1.0.42"
regex = "1.12.2"
syn = "2.0.111"
tracing = "0.1"
trie-rs = "0.4.2"
uuid = "1.19.0"
tokio = { version = "1.48.0", features = ["sync"] }

View File

@@ -549,7 +549,7 @@ fn generate_common_impls(
}
// Extract just the field names for struct construction
let field_names = typed_fields.iter().map(|(name, _)| name);
let field_names: Vec<_> = typed_fields.iter().map(|(name, _)| name).collect();
let datapath_impl = quote! {
impl ::datapath::Datapath for #struct_name {
@@ -600,6 +600,13 @@ fn generate_common_impls(
file,
})
}
fn field(&self, name: &str) -> Option<::std::string::String> {
match name {
#(stringify!(#field_names) => Some(self.#field_names.to_string()),)*
_ => None,
}
}
}
};

View File

@@ -17,5 +17,16 @@ workspace = true
[dependencies]
datapath-macro = { workspace = true }
regex = { workspace = true, optional = true }
tracing = { workspace = true, optional = true }
trie-rs = { workspace = true, optional = true }
itertools = { workspace = true, optional = true }
tokio = { workspace = true, optional = true }
[dev-dependencies]
uuid = { version = "1", features = ["v4"] }
[features]
default = []
index = ["dep:regex", "dep:trie-rs", "dep:tracing", "dep:itertools"]
tokio = ["dep:tokio"]

View File

@@ -33,4 +33,8 @@ where
/// Parse a string as this datapath with a (possibly empty-string)
/// file, returning `None` if this string is invalid.
fn parse(path: &str) -> Option<DatapathFile<Self>>;
/// Get the string value of the field with the given name,
/// if it exists.
fn field(&self, name: &str) -> Option<String>;
}

View File

@@ -0,0 +1,395 @@
use itertools::Itertools;
use std::{collections::HashMap, fmt::Display, str::FromStr};
use trie_rs::map::{Trie, TrieBuilder};
mod rule;
/// A path segment in an [`AnyDatapath`]
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
enum PathSegment {
/// A constant value, like `web`
Constant(String),
/// A key=value partition, like `domain=gouletpens.com`
Value { key: String, value: String },
}
impl Display for PathSegment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PathSegment::Constant(x) => write!(f, "{x}"),
PathSegment::Value { key, value } => write!(f, "{key}={value}"),
}
}
}
impl FromStr for PathSegment {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.contains("\n") {
return Err(());
}
if s.is_empty() {
return Err(());
}
return Ok(if s.contains("=") {
let mut s = s.split("=");
let key = s.next().ok_or(())?.to_owned();
let value = s.join("=");
Self::Value { key, value }
} else {
Self::Constant(s.to_owned())
});
}
}
//
// MARK: index
//
/// An in-memory cache of s3 paths.
#[derive(Debug)]
pub struct DatapathIndex {
patterns: Trie<u8, Vec<String>>,
len: usize,
}
impl DatapathIndex {
/// Convert a query string to a trie search key by normalizing values to `*`.
/// Stops at the first wildcard constant since it can't be used for prefix matching.
fn query_to_key(query: &str) -> String {
let trimmed = query.trim().trim_end_matches("**").trim_matches('/');
let mut segments = Vec::new();
for seg in trimmed.split('/') {
let segment = match PathSegment::from_str(&seg) {
Ok(x) => x,
Err(_) => continue,
};
// Stop at wildcard constants - can't use for trie prefix search
if matches!(segment, PathSegment::Constant(ref s) if s == "*") {
break;
}
segments.push(segment);
}
segments.iter_mut().for_each(|x| match x {
PathSegment::Constant(_) => {}
PathSegment::Value { value, .. } => *value = "*".into(),
});
segments.iter().join("/")
}
pub fn new_empty() -> Self {
Self {
patterns: TrieBuilder::new().build(),
len: 0,
}
}
pub fn new<S: Into<String>, I: Iterator<Item = S>>(paths: I) -> Self {
let mut len = 0;
let mut patterns = HashMap::new();
for s in paths {
let s: String = s.into();
let mut segments = Vec::new();
for seg in s.split('/') {
segments.push(match PathSegment::from_str(&seg) {
Ok(x) => x,
Err(_) => continue,
});
}
segments.iter_mut().for_each(|x| match x {
PathSegment::Constant(_) => {}
PathSegment::Value { value, .. } => *value = "*".into(),
});
let pattern = segments.iter().join("/");
patterns.entry(pattern).or_insert(Vec::new()).push(s);
len += 1;
}
let mut builder = TrieBuilder::new();
for (k, v) in patterns {
builder.push(k, v);
}
Self {
len,
patterns: builder.build(),
}
}
#[cfg(feature = "tokio")]
pub async fn async_new<S: Into<String>>(mut paths: tokio::sync::mpsc::Receiver<S>) -> Self {
let mut len = 0;
let mut patterns = HashMap::new();
while let Some(s) = paths.recv().await {
let s: String = s.into();
let mut segments = Vec::new();
for seg in s.split('/') {
segments.push(match PathSegment::from_str(&seg) {
Ok(x) => x,
Err(_) => continue,
});
}
segments.iter_mut().for_each(|x| match x {
PathSegment::Constant(_) => {}
PathSegment::Value { value, .. } => *value = "*".into(),
});
let pattern = segments.iter().join("/");
patterns.entry(pattern).or_insert(Vec::new()).push(s);
len += 1;
}
let mut builder = TrieBuilder::new();
for (k, v) in patterns {
builder.push(k, v);
}
Self {
len,
patterns: builder.build(),
}
}
#[inline(always)]
pub fn len(&self) -> usize {
self.len
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Given a datapath (that may contain wildcards) as a query,
/// return all known datapaths that match it.
///
/// Returns an empty iterator if no paths match.
/// Returns `None` if the query was invalid.
pub fn query(&self, query: impl Into<String>) -> Option<impl Iterator<Item = String> + '_> {
let query: String = query.into();
let regex = rule::Rule::new(query.clone()).regex()?;
let key = Self::query_to_key(&query);
Some(
self.patterns
.predictive_search::<String, _>(&key)
.flat_map(|(_, strings)| strings.iter())
.filter(move |s| regex.is_match(s))
.cloned(),
)
}
pub fn query_match(&self, query: impl Into<String>) -> Option<bool> {
let query: String = query.into();
let regex = rule::Rule::new(query.clone()).regex()?;
let key = Self::query_to_key(&query);
for (_, strings) in self.patterns.predictive_search::<String, _>(&key) {
for s in strings {
if regex.is_match(s) {
return Some(true);
}
}
}
return Some(false);
}
}
// MARK: index tests
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod index_tests {
use super::*;
#[test]
fn datapath_index_empty() {
let idx = DatapathIndex::new(std::iter::empty::<String>());
let query = "web/domain=example.com";
assert_eq!(idx.query(query).unwrap().count(), 0);
assert!(idx.is_empty());
assert_eq!(idx.len(), 0);
}
#[test]
fn insert_and_lookup_exact_match() {
let paths = vec!["web/domain=example.com/ts=1234"];
let idx = DatapathIndex::new(paths.into_iter());
// Exact match
let results: Vec<_> = idx
.query("web/domain=example.com/ts=1234")
.unwrap()
.collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], "web/domain=example.com/ts=1234");
// No match
let results: Vec<_> = idx.query("web/domain=other.com/ts=1234").unwrap().collect();
assert_eq!(results.len(), 0);
assert_eq!(idx.len(), 1);
}
#[test]
fn wildcard_constant_match() {
let paths = vec![
"web/domain=example.com/ts=1234",
"api/domain=example.com/ts=1234",
];
let idx = DatapathIndex::new(paths.into_iter());
// Wildcard first segment
let results: Vec<_> = idx.query("*/domain=example.com/ts=1234").unwrap().collect();
assert_eq!(results.len(), 2);
assert_eq!(idx.len(), 2);
}
#[test]
fn wildcard_value_match() {
let paths = vec![
"web/domain=example.com/ts=1234",
"web/domain=other.com/ts=1234",
];
let idx = DatapathIndex::new(paths.into_iter());
// Wildcard domain
let results: Vec<_> = idx.query("web/domain=*/ts=1234").unwrap().collect();
assert_eq!(results.len(), 2);
}
#[test]
fn multiple_datapaths() {
let paths = vec![
"web/domain=example.com/ts=1234",
"web/domain=other.com/ts=1234",
"api/domain=example.com/ts=5678",
];
let idx = DatapathIndex::new(paths.into_iter());
// Specific lookup
let results: Vec<_> = idx
.query("web/domain=example.com/ts=1234")
.unwrap()
.collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], "web/domain=example.com/ts=1234");
// Wildcard time lookup
let results: Vec<_> = idx.query("web/domain=example.com/ts=*").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], "web/domain=example.com/ts=1234");
// Double wildcard lookup
let results: Vec<_> = idx.query("web/domain=*/ts=*").unwrap().collect();
assert_eq!(results.len(), 2);
assert_eq!(idx.len(), 3);
}
#[test]
fn nested_wildcards() {
let paths = vec![
"web/domain=example.com/ts=1234/crawl/2.5",
"web/domain=other.com/ts=5678/crawl/2.5",
"web/domain=example.com/ts=9999/crawl/3.0",
];
let idx = DatapathIndex::new(paths.into_iter());
// Multiple wildcards in path
let results: Vec<_> = idx.query("web/domain=*/ts=*/crawl/*").unwrap().collect();
assert_eq!(results.len(), 3);
// Selective wildcards
let results: Vec<_> = idx
.query("web/domain=example.com/ts=*/crawl/*")
.unwrap()
.collect();
assert_eq!(results.len(), 2);
}
#[test]
fn partial_path_query() {
let paths = vec!["web/domain=example.com/ts=1234/crawl/2.5"];
let idx = DatapathIndex::new(paths.into_iter());
// Query with fewer segments than the stored path
let results: Vec<_> = idx.query("web/domain=example.com").unwrap().collect();
assert_eq!(results.len(), 0);
}
#[test]
fn longer_path_query() {
let paths = vec!["web/domain=example.com"];
let idx = DatapathIndex::new(paths.into_iter());
// Query with more segments than the stored path
let results: Vec<_> = idx
.query("web/domain=example.com/ts=1234/crawl/2.5")
.unwrap()
.collect();
assert_eq!(results.len(), 0);
}
#[test]
fn query_match() {
let paths = vec![
"web/domain=example.com/ts=1234",
"web/domain=other.com/ts=5678",
];
let idx = DatapathIndex::new(paths.into_iter());
// Match exists
assert_eq!(
idx.query_match("web/domain=example.com/ts=1234").unwrap(),
true
);
assert_eq!(idx.query_match("web/domain=*/ts=*").unwrap(), true);
// No match
assert_eq!(
idx.query_match("api/domain=example.com/ts=1234").unwrap(),
false
);
assert_eq!(
idx.query_match("web/domain=missing.com/ts=9999").unwrap(),
false
);
}
#[test]
fn suffix_wildcard() {
let paths = vec![
"web/domain=example.com/ts=1234/file1.json",
"web/domain=example.com/ts=1234/file2.json",
"web/domain=example.com/ts=5678/file3.json",
];
let idx = DatapathIndex::new(paths.into_iter());
// Query with suffix wildcard
let results: Vec<_> = idx.query("web/domain=example.com/**").unwrap().collect();
assert_eq!(results.len(), 3);
let results: Vec<_> = idx
.query("web/domain=example.com/ts=1234/**")
.unwrap()
.collect();
assert_eq!(results.len(), 2);
}
}

View File

@@ -0,0 +1,381 @@
use regex::Regex;
use tracing::warn;
//
// MARK: rule
//
#[derive(Debug)]
enum RegexSegment {
/// A single segment
Single(String),
/// An optional doublestar segment
DoubleStar,
}
impl RegexSegment {
/// Returns the regex pattern of this part,
/// prefixed with a /.
fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String {
match (prev, self, next) {
// Consecutive single segments need a trailing slash
(_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"),
// Terminal single segments don't need a trailing slash
(_, Self::Single(x), None) => x.to_owned(),
// Neighboring doublestar is always responsible for slashes
(_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(),
// No additional slashes
(None, Self::DoubleStar, None) => "((?:.*)?)".into(),
// Leading slash
(Some(Self::Single(_)), Self::DoubleStar, None) => "((?:[/].*)?)".into(),
// Trailing slash
(None, Self::DoubleStar, Some(Self::Single(_))) => "((?:.*[/])?)".into(),
// Leading and trailing slash.
// Also, replace self with a [/] when empty.
(Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => {
"((?:[/].*[/])|[/])".into()
}
// Doublestars cannot be neighbors
(_, Self::DoubleStar, Some(Self::DoubleStar))
| (Some(Self::DoubleStar), Self::DoubleStar, _) => {
unreachable!("consecutive doublestars must be reduced")
}
}
}
}
#[derive(Debug, Clone)]
pub struct Rule {
pub pattern: String,
}
impl Rule {
pub fn new(pattern: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
}
}
/// Turn this rule into a regex pattern.
/// Returns `None` if this rule was invalid.
pub fn regex(&self) -> Option<Regex> {
let pattern = &self.pattern;
if pattern.ends_with("/") {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
}
if pattern.starts_with("/") {
warn!("Pattern `{pattern}` has a leading slash which will be ignored")
}
// Split on slashes or stars
// This is a lot like .split("/"), but handles
// the edge case where ** is not delimited by slashes
// (`root**test` is equivalent to `root/**/test`)
let segments = {
#[expect(clippy::unwrap_used)]
let re = Regex::new("[*]{2,}|[/]").unwrap();
let split = re.find_iter(pattern);
let bounds = split
.into_iter()
.flat_map(|x| {
let r = x.range();
let a = r.start;
let b = r.end;
[a, b]
})
.chain([pattern.len()])
.collect::<Vec<_>>();
let mut parts = Vec::new();
let mut last = 0;
for next in bounds {
let seg = &pattern[last..next];
// Consecutive slashes are identical to a single slash
if seg != "/" && !seg.is_empty() {
parts.push(seg);
}
last = next;
}
parts
};
let mut rebuilt_segments = Vec::new();
let mut last_was_doublestar = false;
for segment in segments {
// This is a wildcard regex
// (**, ***, etc)
if segment.len() > 1 && segment.chars().all(|x| x == '*') {
match segment {
"**" => {
// Consecutive doublestars are meaningless
if !last_was_doublestar {
rebuilt_segments.push(RegexSegment::DoubleStar);
}
last_was_doublestar = true;
}
_ => return None,
}
continue;
}
last_was_doublestar = false;
let parts = segment.split("*").collect::<Vec<_>>();
let mut rebuilt = String::new();
for (i, part) in parts.into_iter().enumerate() {
if i != 0 {
rebuilt.push_str("([^/]*)")
}
rebuilt.push_str(&regex::escape(part));
}
rebuilt_segments.push(RegexSegment::Single(rebuilt));
}
let mut re_built = String::new();
let mut prev = None;
for (i, seg) in rebuilt_segments.iter().enumerate() {
let next = rebuilt_segments.get(i + 1);
re_built.push_str(&seg.to_regex_part(prev, next));
prev = Some(seg);
}
let re_built = format!("^{re_built}$");
// This regex should always be valid
#[expect(clippy::unwrap_used)]
Some(Regex::new(&re_built).unwrap())
}
}
//
// MARK: tests
//
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod rule_tests {
use super::*;
fn rule_regex(pattern: &str) -> Regex {
let rule = Rule::new(pattern);
return rule.regex().unwrap();
}
#[test]
fn simple() {
let regex = rule_regex("file.txt");
assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("other.txt"));
assert!(!regex.is_match("path/file.txt"));
}
#[test]
fn simple_dir() {
let regex = rule_regex("dir/file.txt");
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("file.txt"));
assert!(!regex.is_match("other/file.txt"));
}
#[test]
fn simple_star() {
let regex = rule_regex("*.txt");
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("other.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("nested/file.txt"));
}
#[test]
fn simple_doublestar() {
let regex = rule_regex("**/*.txt");
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn consecutive_doublestar() {
let regex = rule_regex("**/**/**/*.txt");
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
assert!(!regex.is_match("dir/file.jpg"));
}
#[test]
fn dual_star() {
let regex = rule_regex("**/*a*");
assert!(regex.is_match("fileafile"));
assert!(regex.is_match("dir/fileafile"));
assert!(regex.is_match("filea"));
assert!(regex.is_match("dir/filea"));
assert!(regex.is_match("afile"));
assert!(regex.is_match("dir/afile"));
assert!(!regex.is_match("noletter"));
assert!(!regex.is_match("dir/noletter"));
}
#[test]
fn single_end() {
let regex = rule_regex("**/*");
assert!(regex.is_match("file"));
assert!(regex.is_match("dir/file"));
assert!(regex.is_match("a/b/c/dir/file"));
}
#[test]
fn doublestar_end() {
let regex = rule_regex("root/**");
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn doublestar_start() {
let regex = rule_regex("**/dir");
assert!(regex.is_match("dir"));
assert!(regex.is_match("a/b/dir"));
assert!(!regex.is_match("dir/file"));
}
#[test]
fn doublestar_adjacent_before() {
let regex = rule_regex("root/**test");
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/a/test"));
assert!(regex.is_match("root/a/b/c/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/xxtest"));
}
#[test]
fn doublestar_adjacent_after() {
let regex = rule_regex("root/test**");
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/test/a"));
assert!(regex.is_match("root/test/a/b/c"));
assert!(!regex.is_match("root/testxx"));
assert!(!regex.is_match("root/file"));
}
#[test]
fn doublestar_adjacent_middle() {
let regex = rule_regex("root/test**file");
assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/test/a/b/c/file"));
assert!(!regex.is_match("root/test"));
assert!(!regex.is_match("root/file"));
assert!(!regex.is_match("root/testfile"));
assert!(!regex.is_match("root/testxxfile"));
}
#[test]
fn doublestar_nullable() {
let regex = rule_regex("root/**/file");
assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("rootfile"));
}
#[test]
fn doublestar_nullable_post() {
let regex = rule_regex("root/**");
assert!(regex.is_match("root"));
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("rootfile"));
}
#[test]
fn doublestar_nullable_pre() {
let regex = rule_regex("**/file");
assert!(regex.is_match("file"));
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("rootfile"));
}
#[test]
fn doublestar_bad_extension() {
let regex = rule_regex("**.flac");
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(!regex.is_match("root/test.flac"));
assert!(!regex.is_match("test.flac"));
assert!(!regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn doublestar_good_extension() {
let regex = rule_regex("**/*.flac");
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
assert!(regex.is_match("root/test.flac"));
assert!(regex.is_match("test.flac"));
assert!(regex.is_match("root/test/a/b/c.flac"));
assert!(!regex.is_match("root/testflac"));
assert!(!regex.is_match("test.mp3"));
}
#[test]
fn multi_slash_a() {
let regex = rule_regex("dir//file.txt");
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("dirfile.txt"));
assert!(!regex.is_match("dir/other.txt"));
}
#[test]
fn multi_slash_b() {
let regex = rule_regex("**///*.txt");
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(!regex.is_match("file.jpg"));
}
#[test]
fn multi_slash_c() {
let regex = rule_regex("///dir//**//*.txt//");
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(regex.is_match("dir/sub1/sub2/file.txt"));
assert!(!regex.is_match("other/sub/file.txt"));
assert!(!regex.is_match("dir/file.jpg"));
}
}

View File

@@ -7,6 +7,10 @@
#[cfg(test)]
use uuid as _;
// silence linter, used by fns in index.rs
#[cfg(feature = "tokio")]
use tokio as _;
mod datapath;
pub use datapath::*;
@@ -19,4 +23,10 @@ pub use schema::*;
mod wildcardable;
pub use wildcardable::*;
#[cfg(feature = "index")]
mod index;
#[cfg(feature = "index")]
pub use index::*;
pub use datapath_macro::datapath;