1
0
mirror of https://github.com/rm-dr/datapath.git synced 2025-12-07 20:04:13 -08:00
This commit is contained in:
2025-12-04 09:10:19 -08:00
committed by Mark
parent 08003a3fbe
commit d787e67e55
4 changed files with 95 additions and 50 deletions

4
Cargo.lock generated
View File

@@ -50,7 +50,7 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "datapath"
version = "0.0.3"
version = "0.0.4"
dependencies = [
"datapath-macro",
"itertools",
@@ -63,7 +63,7 @@ dependencies = [
[[package]]
name = "datapath-macro"
version = "0.0.3"
version = "0.0.4"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -11,7 +11,7 @@ readme = "README.md"
authors = ["rm-dr"]
# Don't forget to bump datapath-macro below!
version = "0.0.3"
version = "0.0.4"
[workspace.lints.rust]
unused_import_braces = "deny"
@@ -70,7 +70,7 @@ cargo_common_metadata = "deny"
#
[workspace.dependencies]
datapath-macro = { path = "crates/datapath-macro", version = "0.0.3" }
datapath-macro = { path = "crates/datapath-macro", version = "0.0.4" }
datapath = { path = "crates/datapath" }
chrono = "0.4.42"

View File

@@ -1,5 +1,6 @@
use itertools::Itertools;
use std::{collections::HashMap, fmt::Display, str::FromStr};
use tracing::trace;
use trie_rs::map::{Trie, TrieBuilder};
mod rule;
@@ -68,8 +69,8 @@ impl DatapathIndex {
Err(_) => continue,
};
// Stop at wildcard constants - can't use for trie prefix search
if matches!(segment, PathSegment::Constant(ref s) if s == "*") {
// lone stars and double-stars aren't in the trie
if matches!(segment, PathSegment::Constant(ref s) if s == "*" || s == "**" ) {
break;
}
@@ -181,8 +182,9 @@ impl DatapathIndex {
/// Returns `None` if the query was invalid.
pub fn query(&self, query: impl Into<String>) -> Option<impl Iterator<Item = String> + '_> {
let query: String = query.into();
let regex = rule::Rule::new(query.clone()).regex()?;
let regex = rule::Rule::new(query.clone())?;
let key = Self::query_to_key(&query);
trace!("DatapathIndex key is {key}");
Some(
self.patterns
@@ -193,10 +195,24 @@ impl DatapathIndex {
)
}
/// Like [Self::query], but with a precompiled rule
pub fn query_rule<'a>(&'a self, rule: &'a rule::Rule) -> impl Iterator<Item = String> + 'a {
let key = Self::query_to_key(rule.pattern());
trace!("DatapathIndex key is {key}");
self.patterns
.predictive_search::<String, _>(&key)
.flat_map(|(_, strings)| strings.iter())
.filter(move |s| rule.is_match(s))
.cloned()
}
/// Like [Self::query], but returns `true` if any paths match
pub fn query_match(&self, query: impl Into<String>) -> Option<bool> {
let query: String = query.into();
let regex = rule::Rule::new(query.clone()).regex()?;
let regex = rule::Rule::new(query.clone())?;
let key = Self::query_to_key(&query);
trace!("DatapathIndex key is {key}");
for (_, strings) in self.patterns.predictive_search::<String, _>(&key) {
for s in strings {
@@ -208,6 +224,22 @@ impl DatapathIndex {
return Some(false);
}
/// Like [Self::query_match], but with a precompiled rule
pub fn query_rule_match<'a>(&'a self, rule: &'a rule::Rule) -> bool {
let key = Self::query_to_key(&rule.pattern());
trace!("DatapathIndex key is {key}");
for (_, strings) in self.patterns.predictive_search::<String, _>(&key) {
for s in strings {
if rule.is_match(s) {
return true;
}
}
}
return false;
}
}
// MARK: index tests

View File

@@ -54,29 +54,28 @@ impl RegexSegment {
#[derive(Debug, Clone)]
pub struct Rule {
pub pattern: String,
regex: Regex,
pattern: String,
}
impl Rule {
pub fn new(pattern: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
}
pub fn pattern(&self) -> &str {
&self.pattern
}
/// Turn this rule into a regex pattern.
/// Returns `None` if this rule was invalid.
pub fn regex(&self) -> Option<Regex> {
let pattern = &self.pattern;
pub fn regex(&self) -> &Regex {
&self.regex
}
if pattern.ends_with("/") {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
}
pub fn is_match(&self, s: &str) -> bool {
self.regex.is_match(s)
}
if pattern.starts_with("/") {
warn!("Pattern `{pattern}` has a leading slash which will be ignored")
}
pub fn raw_regex_str(&self) -> String {
Self::regex_str(self.pattern()).unwrap()
}
fn regex_str(pattern: &str) -> Option<String> {
// Split on slashes or stars
// This is a lot like .split("/"), but handles
// the edge case where ** is not delimited by slashes
@@ -84,7 +83,7 @@ impl Rule {
let segments = {
#[expect(clippy::unwrap_used)]
let re = Regex::new("[*]{2,}|[/]").unwrap();
let split = re.find_iter(pattern);
let split = re.find_iter(&pattern);
let bounds = split
.into_iter()
@@ -153,10 +152,29 @@ impl Rule {
prev = Some(seg);
}
return Some(re_built);
}
/// Returns `None` if this rule was invalid.
pub fn new(pattern: impl Into<String>) -> Option<Self> {
let pattern: String = pattern.into();
if pattern.ends_with("/") {
warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
}
if pattern.starts_with("/") {
warn!("Pattern `{pattern}` has a leading slash which will be ignored")
}
let re_built = Self::regex_str(&pattern)?;
let re_built = format!("^{re_built}$");
// This regex should always be valid
#[expect(clippy::unwrap_used)]
Some(Regex::new(&re_built).unwrap())
let regex = Regex::new(&re_built).unwrap();
Some(Self { regex, pattern })
}
}
@@ -169,14 +187,9 @@ impl Rule {
mod rule_tests {
use super::*;
fn rule_regex(pattern: &str) -> Regex {
let rule = Rule::new(pattern);
return rule.regex().unwrap();
}
#[test]
fn simple() {
let regex = rule_regex("file.txt");
let regex = Rule::new("file.txt").unwrap();
assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("other.txt"));
@@ -185,7 +198,7 @@ mod rule_tests {
#[test]
fn simple_dir() {
let regex = rule_regex("dir/file.txt");
let regex = Rule::new("dir/file.txt").unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("file.txt"));
@@ -194,7 +207,7 @@ mod rule_tests {
#[test]
fn simple_star() {
let regex = rule_regex("*.txt");
let regex = Rule::new("*.txt").unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("other.txt"));
@@ -204,7 +217,7 @@ mod rule_tests {
#[test]
fn simple_doublestar() {
let regex = rule_regex("**/*.txt");
let regex = Rule::new("**/*.txt").unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
@@ -215,7 +228,7 @@ mod rule_tests {
#[test]
fn consecutive_doublestar() {
let regex = rule_regex("**/**/**/*.txt");
let regex = Rule::new("**/**/**/*.txt").unwrap();
assert!(regex.is_match("file.txt"));
assert!(regex.is_match("dir/file.txt"));
@@ -226,7 +239,7 @@ mod rule_tests {
#[test]
fn dual_star() {
let regex = rule_regex("**/*a*");
let regex = Rule::new("**/*a*").unwrap();
assert!(regex.is_match("fileafile"));
assert!(regex.is_match("dir/fileafile"));
@@ -240,7 +253,7 @@ mod rule_tests {
#[test]
fn single_end() {
let regex = rule_regex("**/*");
let regex = Rule::new("**/*").unwrap();
assert!(regex.is_match("file"));
assert!(regex.is_match("dir/file"));
@@ -249,7 +262,7 @@ mod rule_tests {
#[test]
fn doublestar_end() {
let regex = rule_regex("root/**");
let regex = Rule::new("root/**").unwrap();
assert!(regex.is_match("root/file"));
assert!(!regex.is_match("dir/file"));
@@ -257,7 +270,7 @@ mod rule_tests {
#[test]
fn doublestar_start() {
let regex = rule_regex("**/dir");
let regex = Rule::new("**/dir").unwrap();
assert!(regex.is_match("dir"));
assert!(regex.is_match("a/b/dir"));
@@ -266,7 +279,7 @@ mod rule_tests {
#[test]
fn doublestar_adjacent_before() {
let regex = rule_regex("root/**test");
let regex = Rule::new("root/**test").unwrap();
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/a/test"));
@@ -277,7 +290,7 @@ mod rule_tests {
#[test]
fn doublestar_adjacent_after() {
let regex = rule_regex("root/test**");
let regex = Rule::new("root/test**").unwrap();
assert!(regex.is_match("root/test"));
assert!(regex.is_match("root/test/a"));
@@ -288,7 +301,7 @@ mod rule_tests {
#[test]
fn doublestar_adjacent_middle() {
let regex = rule_regex("root/test**file");
let regex = Rule::new("root/test**file").unwrap();
assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/test/a/b/c/file"));
@@ -300,7 +313,7 @@ mod rule_tests {
#[test]
fn doublestar_nullable() {
let regex = rule_regex("root/**/file");
let regex = Rule::new("root/**/file").unwrap();
assert!(regex.is_match("root/test/file"));
assert!(regex.is_match("root/file"));
@@ -309,7 +322,7 @@ mod rule_tests {
#[test]
fn doublestar_nullable_post() {
let regex = rule_regex("root/**");
let regex = Rule::new("root/**").unwrap();
assert!(regex.is_match("root"));
assert!(regex.is_match("root/file"));
@@ -318,7 +331,7 @@ mod rule_tests {
#[test]
fn doublestar_nullable_pre() {
let regex = rule_regex("**/file");
let regex = Rule::new("**/file").unwrap();
assert!(regex.is_match("file"));
assert!(regex.is_match("root/file"));
@@ -327,7 +340,7 @@ mod rule_tests {
#[test]
fn doublestar_bad_extension() {
let regex = rule_regex("**.flac");
let regex = Rule::new("**.flac").unwrap();
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
@@ -340,7 +353,7 @@ mod rule_tests {
#[test]
fn doublestar_good_extension() {
let regex = rule_regex("**/*.flac");
let regex = Rule::new("**/*.flac").unwrap();
assert!(regex.is_match("root/.flac"));
assert!(regex.is_match("root/a/.flac"));
@@ -353,7 +366,7 @@ mod rule_tests {
#[test]
fn multi_slash_a() {
let regex = rule_regex("dir//file.txt");
let regex = Rule::new("dir//file.txt").unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(!regex.is_match("dirfile.txt"));
@@ -362,7 +375,7 @@ mod rule_tests {
#[test]
fn multi_slash_b() {
let regex = rule_regex("**///*.txt");
let regex = Rule::new("**///*.txt").unwrap();
assert!(regex.is_match("dir/file.txt"));
assert!(regex.is_match("dir/subdir/file.txt"));
@@ -371,7 +384,7 @@ mod rule_tests {
#[test]
fn multi_slash_c() {
let regex = rule_regex("///dir//**//*.txt//");
let regex = Rule::new("///dir//**//*.txt//").unwrap();
assert!(regex.is_match("dir/subdir/file.txt"));
assert!(regex.is_match("dir/sub1/sub2/file.txt"));