1
0
mirror of https://github.com/rm-dr/datapath.git synced 2025-12-10 05:14:13 -08:00
This commit is contained in:
2025-12-04 09:10:19 -08:00
committed by Mark
parent 08003a3fbe
commit d787e67e55
4 changed files with 95 additions and 50 deletions

4
Cargo.lock generated
View File

@@ -50,7 +50,7 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]] [[package]]
name = "datapath" name = "datapath"
version = "0.0.3" version = "0.0.4"
dependencies = [ dependencies = [
"datapath-macro", "datapath-macro",
"itertools", "itertools",
@@ -63,7 +63,7 @@ dependencies = [
[[package]] [[package]]
name = "datapath-macro" name = "datapath-macro"
version = "0.0.3" version = "0.0.4"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",

View File

@@ -11,7 +11,7 @@ readme = "README.md"
authors = ["rm-dr"] authors = ["rm-dr"]
# Don't forget to bump datapath-macro below! # Don't forget to bump datapath-macro below!
version = "0.0.3" version = "0.0.4"
[workspace.lints.rust] [workspace.lints.rust]
unused_import_braces = "deny" unused_import_braces = "deny"
@@ -70,7 +70,7 @@ cargo_common_metadata = "deny"
# #
[workspace.dependencies] [workspace.dependencies]
datapath-macro = { path = "crates/datapath-macro", version = "0.0.3" } datapath-macro = { path = "crates/datapath-macro", version = "0.0.4" }
datapath = { path = "crates/datapath" } datapath = { path = "crates/datapath" }
chrono = "0.4.42" chrono = "0.4.42"

View File

@@ -1,5 +1,6 @@
use itertools::Itertools; use itertools::Itertools;
use std::{collections::HashMap, fmt::Display, str::FromStr}; use std::{collections::HashMap, fmt::Display, str::FromStr};
use tracing::trace;
use trie_rs::map::{Trie, TrieBuilder}; use trie_rs::map::{Trie, TrieBuilder};
mod rule; mod rule;
@@ -68,8 +69,8 @@ impl DatapathIndex {
Err(_) => continue, Err(_) => continue,
}; };
// Stop at wildcard constants - can't use for trie prefix search // lone stars and double-stars aren't in the trie
if matches!(segment, PathSegment::Constant(ref s) if s == "*") { if matches!(segment, PathSegment::Constant(ref s) if s == "*" || s == "**" ) {
break; break;
} }
@@ -181,8 +182,9 @@ impl DatapathIndex {
/// Returns `None` if the query was invalid. /// Returns `None` if the query was invalid.
pub fn query(&self, query: impl Into<String>) -> Option<impl Iterator<Item = String> + '_> { pub fn query(&self, query: impl Into<String>) -> Option<impl Iterator<Item = String> + '_> {
let query: String = query.into(); let query: String = query.into();
let regex = rule::Rule::new(query.clone()).regex()?; let regex = rule::Rule::new(query.clone())?;
let key = Self::query_to_key(&query); let key = Self::query_to_key(&query);
trace!("DatapathIndex key is {key}");
Some( Some(
self.patterns self.patterns
@@ -193,10 +195,24 @@ impl DatapathIndex {
) )
} }
/// Like [Self::query], but with a precompiled rule
pub fn query_rule<'a>(&'a self, rule: &'a rule::Rule) -> impl Iterator<Item = String> + 'a {
let key = Self::query_to_key(rule.pattern());
trace!("DatapathIndex key is {key}");
self.patterns
.predictive_search::<String, _>(&key)
.flat_map(|(_, strings)| strings.iter())
.filter(move |s| rule.is_match(s))
.cloned()
}
/// Like [Self::query], but returns `true` if any paths match
pub fn query_match(&self, query: impl Into<String>) -> Option<bool> { pub fn query_match(&self, query: impl Into<String>) -> Option<bool> {
let query: String = query.into(); let query: String = query.into();
let regex = rule::Rule::new(query.clone()).regex()?; let regex = rule::Rule::new(query.clone())?;
let key = Self::query_to_key(&query); let key = Self::query_to_key(&query);
trace!("DatapathIndex key is {key}");
for (_, strings) in self.patterns.predictive_search::<String, _>(&key) { for (_, strings) in self.patterns.predictive_search::<String, _>(&key) {
for s in strings { for s in strings {
@@ -208,6 +224,22 @@ impl DatapathIndex {
return Some(false); return Some(false);
} }
/// Like [Self::query_match], but with a precompiled rule
pub fn query_rule_match<'a>(&'a self, rule: &'a rule::Rule) -> bool {
let key = Self::query_to_key(&rule.pattern());
trace!("DatapathIndex key is {key}");
for (_, strings) in self.patterns.predictive_search::<String, _>(&key) {
for s in strings {
if rule.is_match(s) {
return true;
}
}
}
return false;
}
} }
// MARK: index tests // MARK: index tests

View File

@@ -54,29 +54,28 @@ impl RegexSegment {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Rule { pub struct Rule {
pub pattern: String, regex: Regex,
pattern: String,
} }
impl Rule { impl Rule {
pub fn new(pattern: impl Into<String>) -> Self { pub fn pattern(&self) -> &str {
Self { &self.pattern
pattern: pattern.into(),
}
} }
/// Turn this rule into a regex pattern. pub fn regex(&self) -> &Regex {
/// Returns `None` if this rule was invalid. &self.regex
pub fn regex(&self) -> Option<Regex> { }
let pattern = &self.pattern;
if pattern.ends_with("/") { pub fn is_match(&self, s: &str) -> bool {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored") self.regex.is_match(s)
} }
if pattern.starts_with("/") { pub fn raw_regex_str(&self) -> String {
warn!("Pattern `{pattern}` has a leading slash which will be ignored") Self::regex_str(self.pattern()).unwrap()
} }
fn regex_str(pattern: &str) -> Option<String> {
// Split on slashes or stars // Split on slashes or stars
// This is a lot like .split("/"), but handles // This is a lot like .split("/"), but handles
// the edge case where ** is not delimited by slashes // the edge case where ** is not delimited by slashes
@@ -84,7 +83,7 @@ impl Rule {
let segments = { let segments = {
#[expect(clippy::unwrap_used)] #[expect(clippy::unwrap_used)]
let re = Regex::new("[*]{2,}|[/]").unwrap(); let re = Regex::new("[*]{2,}|[/]").unwrap();
let split = re.find_iter(pattern); let split = re.find_iter(&pattern);
let bounds = split let bounds = split
.into_iter() .into_iter()
@@ -153,10 +152,29 @@ impl Rule {
prev = Some(seg); prev = Some(seg);
} }
return Some(re_built);
}
/// Returns `None` if this rule was invalid.
pub fn new(pattern: impl Into<String>) -> Option<Self> {
let pattern: String = pattern.into();
if pattern.ends_with("/") {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
}
if pattern.starts_with("/") {
warn!("Pattern `{pattern}` has a leading slash which will be ignored")
}
let re_built = Self::regex_str(&pattern)?;
let re_built = format!("^{re_built}$"); let re_built = format!("^{re_built}$");
// This regex should always be valid // This regex should always be valid
#[expect(clippy::unwrap_used)] #[expect(clippy::unwrap_used)]
Some(Regex::new(&re_built).unwrap()) let regex = Regex::new(&re_built).unwrap();
Some(Self { regex, pattern })
} }
} }
@@ -169,14 +187,9 @@ impl Rule {
mod rule_tests { mod rule_tests {
use super::*; use super::*;
fn rule_regex(pattern: &str) -> Regex {
let rule = Rule::new(pattern);
return rule.regex().unwrap();
}
#[test] #[test]
fn simple() { fn simple() {
let regex = rule_regex("file.txt"); let regex = Rule::new("file.txt").unwrap();
assert!(regex.is_match("file.txt")); assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("other.txt")); assert!(!regex.is_match("other.txt"));
@@ -185,7 +198,7 @@ mod rule_tests {
#[test] #[test]
fn simple_dir() { fn simple_dir() {
let regex = rule_regex("dir/file.txt"); let regex = Rule::new("dir/file.txt").unwrap();
assert!(regex.is_match("dir/file.txt")); assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("file.txt")); assert!(!regex.is_match("file.txt"));
@@ -194,7 +207,7 @@ mod rule_tests {
#[test] #[test]
fn simple_star() { fn simple_star() {
let regex = rule_regex("*.txt"); let regex = Rule::new("*.txt").unwrap();
assert!(regex.is_match("file.txt")); assert!(regex.is_match("file.txt"));
assert!(regex.is_match("other.txt")); assert!(regex.is_match("other.txt"));
@@ -204,7 +217,7 @@ mod rule_tests {
#[test] #[test]
fn simple_doublestar() { fn simple_doublestar() {
let regex = rule_regex("**/*.txt"); let regex = Rule::new("**/*.txt").unwrap();
assert!(regex.is_match("file.txt")); assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt")); assert!(regex.is_match("dir/file.txt"));
@@ -215,7 +228,7 @@ mod rule_tests {
#[test] #[test]
fn consecutive_doublestar() { fn consecutive_doublestar() {
let regex = rule_regex("**/**/**/*.txt"); let regex = Rule::new("**/**/**/*.txt").unwrap();
assert!(regex.is_match("file.txt")); assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt")); assert!(regex.is_match("dir/file.txt"));
@@ -226,7 +239,7 @@ mod rule_tests {
#[test] #[test]
fn dual_star() { fn dual_star() {
let regex = rule_regex("**/*a*"); let regex = Rule::new("**/*a*").unwrap();
assert!(regex.is_match("fileafile")); assert!(regex.is_match("fileafile"));
assert!(regex.is_match("dir/fileafile")); assert!(regex.is_match("dir/fileafile"));
@@ -240,7 +253,7 @@ mod rule_tests {
#[test] #[test]
fn single_end() { fn single_end() {
let regex = rule_regex("**/*"); let regex = Rule::new("**/*").unwrap();
assert!(regex.is_match("file")); assert!(regex.is_match("file"));
assert!(regex.is_match("dir/file")); assert!(regex.is_match("dir/file"));
@@ -249,7 +262,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_end() { fn doublestar_end() {
let regex = rule_regex("root/**"); let regex = Rule::new("root/**").unwrap();
assert!(regex.is_match("root/file")); assert!(regex.is_match("root/file"));
assert!(!regex.is_match("dir/file")); assert!(!regex.is_match("dir/file"));
@@ -257,7 +270,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_start() { fn doublestar_start() {
let regex = rule_regex("**/dir"); let regex = Rule::new("**/dir").unwrap();
assert!(regex.is_match("dir")); assert!(regex.is_match("dir"));
assert!(regex.is_match("a/b/dir")); assert!(regex.is_match("a/b/dir"));
@@ -266,7 +279,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_adjacent_before() { fn doublestar_adjacent_before() {
let regex = rule_regex("root/**test"); let regex = Rule::new("root/**test").unwrap();
assert!(regex.is_match("root/test")); assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/a/test")); assert!(regex.is_match("root/a/test"));
@@ -277,7 +290,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_adjacent_after() { fn doublestar_adjacent_after() {
let regex = rule_regex("root/test**"); let regex = Rule::new("root/test**").unwrap();
assert!(regex.is_match("root/test")); assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/test/a")); assert!(regex.is_match("root/test/a"));
@@ -288,7 +301,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_adjacent_middle() { fn doublestar_adjacent_middle() {
let regex = rule_regex("root/test**file"); let regex = Rule::new("root/test**file").unwrap();
assert!(regex.is_match("root/test/file")); assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/test/a/b/c/file")); assert!(regex.is_match("root/test/a/b/c/file"));
@@ -300,7 +313,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_nullable() { fn doublestar_nullable() {
let regex = rule_regex("root/**/file"); let regex = Rule::new("root/**/file").unwrap();
assert!(regex.is_match("root/test/file")); assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/file")); assert!(regex.is_match("root/file"));
@@ -309,7 +322,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_nullable_post() { fn doublestar_nullable_post() {
let regex = rule_regex("root/**"); let regex = Rule::new("root/**").unwrap();
assert!(regex.is_match("root")); assert!(regex.is_match("root"));
assert!(regex.is_match("root/file")); assert!(regex.is_match("root/file"));
@@ -318,7 +331,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_nullable_pre() { fn doublestar_nullable_pre() {
let regex = rule_regex("**/file"); let regex = Rule::new("**/file").unwrap();
assert!(regex.is_match("file")); assert!(regex.is_match("file"));
assert!(regex.is_match("root/file")); assert!(regex.is_match("root/file"));
@@ -327,7 +340,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_bad_extension() { fn doublestar_bad_extension() {
let regex = rule_regex("**.flac"); let regex = Rule::new("**.flac").unwrap();
assert!(regex.is_match("root/.flac")); assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac")); assert!(regex.is_match("root/a/.flac"));
@@ -340,7 +353,7 @@ mod rule_tests {
#[test] #[test]
fn doublestar_good_extension() { fn doublestar_good_extension() {
let regex = rule_regex("**/*.flac"); let regex = Rule::new("**/*.flac").unwrap();
assert!(regex.is_match("root/.flac")); assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac")); assert!(regex.is_match("root/a/.flac"));
@@ -353,7 +366,7 @@ mod rule_tests {
#[test] #[test]
fn multi_slash_a() { fn multi_slash_a() {
let regex = rule_regex("dir//file.txt"); let regex = Rule::new("dir//file.txt").unwrap();
assert!(regex.is_match("dir/file.txt")); assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("dirfile.txt")); assert!(!regex.is_match("dirfile.txt"));
@@ -362,7 +375,7 @@ mod rule_tests {
#[test] #[test]
fn multi_slash_b() { fn multi_slash_b() {
let regex = rule_regex("**///*.txt"); let regex = Rule::new("**///*.txt").unwrap();
assert!(regex.is_match("dir/file.txt")); assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt")); assert!(regex.is_match("dir/subdir/file.txt"));
@@ -371,7 +384,7 @@ mod rule_tests {
#[test] #[test]
fn multi_slash_c() { fn multi_slash_c() {
let regex = rule_regex("///dir//**//*.txt//"); let regex = Rule::new("///dir//**//*.txt//").unwrap();
assert!(regex.is_match("dir/subdir/file.txt")); assert!(regex.is_match("dir/subdir/file.txt"));
assert!(regex.is_match("dir/sub1/sub2/file.txt")); assert!(regex.is_match("dir/sub1/sub2/file.txt"));