v0.0.3

2026-01-28 17:06:46 -08:00 · 2025-12-03 12:57:10 -08:00
parent f51162478b
commit 08003a3fbe
8 changed files with 1000 additions and 6 deletions
--- a/crates/datapath-macro/src/lib.rs
+++ b/crates/datapath-macro/src/lib.rs
@@ -549,7 +549,7 @@ fn generate_common_impls(
 	}

 	// Extract just the field names for struct construction
-	let field_names = typed_fields.iter().map(|(name, _)| name);
+	let field_names: Vec<_> = typed_fields.iter().map(|(name, _)| name).collect();

 	let datapath_impl = quote! {
 		impl ::datapath::Datapath for #struct_name {
@@ -600,6 +600,13 @@ fn generate_common_impls(
 					file,
 				})
 			}
+
+			fn field(&self, name: &str) -> Option<::std::string::String> {
+				match name {
+					#(stringify!(#field_names) => Some(self.#field_names.to_string()),)*
+					_ => None,
+				}
+			}
 		}
 	};

--- a/crates/datapath/Cargo.toml
+++ b/crates/datapath/Cargo.toml
@@ -17,5 +17,16 @@ workspace = true
 [dependencies]
 datapath-macro = { workspace = true }

+regex = { workspace = true, optional = true }
+tracing = { workspace = true, optional = true }
+trie-rs = { workspace = true, optional = true }
+itertools = { workspace = true, optional = true }
+tokio = { workspace = true, optional = true }
+
 [dev-dependencies]
 uuid = { version = "1", features = ["v4"] }
+
+[features]
+default = []
+index = ["dep:regex", "dep:trie-rs", "dep:tracing", "dep:itertools"]
+tokio = ["dep:tokio"]
--- a/crates/datapath/src/datapath.rs
+++ b/crates/datapath/src/datapath.rs
@@ -33,4 +33,8 @@ where
 	/// Parse a string as this datapath with a (possibly empty-string)
 	/// file, returning `None` if this string is invalid.
 	fn parse(path: &str) -> Option<DatapathFile<Self>>;
+
+	/// Get the string value of the field with the given name,
+	/// if it exists.
+	fn field(&self, name: &str) -> Option<String>;
 }
--- a/crates/datapath/src/index/mod.rs
+++ b/crates/datapath/src/index/mod.rs
@@ -0,0 +1,395 @@
+use itertools::Itertools;
+use std::{collections::HashMap, fmt::Display, str::FromStr};
+use trie_rs::map::{Trie, TrieBuilder};
+
+mod rule;
+
+/// A path segment in an [`AnyDatapath`]
+#[derive(Debug, Clone, Hash, PartialEq, Eq)]
+enum PathSegment {
+	/// A constant value, like `web`
+	Constant(String),
+
+	/// A key=value partition, like `domain=gouletpens.com`
+	Value { key: String, value: String },
+}
+
+impl Display for PathSegment {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		match self {
+			PathSegment::Constant(x) => write!(f, "{x}"),
+			PathSegment::Value { key, value } => write!(f, "{key}={value}"),
+		}
+	}
+}
+
+impl FromStr for PathSegment {
+	type Err = ();
+	fn from_str(s: &str) -> Result<Self, Self::Err> {
+		if s.contains("\n") {
+			return Err(());
+		}
+
+		if s.is_empty() {
+			return Err(());
+		}
+
+		return Ok(if s.contains("=") {
+			let mut s = s.split("=");
+			let key = s.next().ok_or(())?.to_owned();
+			let value = s.join("=");
+			Self::Value { key, value }
+		} else {
+			Self::Constant(s.to_owned())
+		});
+	}
+}
+
+//
+// MARK: index
+//
+
+/// An in-memory cache of s3 paths.
+#[derive(Debug)]
+pub struct DatapathIndex {
+	patterns: Trie<u8, Vec<String>>,
+	len: usize,
+}
+
+impl DatapathIndex {
+	/// Convert a query string to a trie search key by normalizing values to `*`.
+	/// Stops at the first wildcard constant since it can't be used for prefix matching.
+	fn query_to_key(query: &str) -> String {
+		let trimmed = query.trim().trim_end_matches("**").trim_matches('/');
+		let mut segments = Vec::new();
+		for seg in trimmed.split('/') {
+			let segment = match PathSegment::from_str(&seg) {
+				Ok(x) => x,
+				Err(_) => continue,
+			};
+
+			// Stop at wildcard constants - can't use for trie prefix search
+			if matches!(segment, PathSegment::Constant(ref s) if s == "*") {
+				break;
+			}
+
+			segments.push(segment);
+		}
+
+		segments.iter_mut().for_each(|x| match x {
+			PathSegment::Constant(_) => {}
+			PathSegment::Value { value, .. } => *value = "*".into(),
+		});
+
+		segments.iter().join("/")
+	}
+
+	pub fn new_empty() -> Self {
+		Self {
+			patterns: TrieBuilder::new().build(),
+			len: 0,
+		}
+	}
+
+	pub fn new<S: Into<String>, I: Iterator<Item = S>>(paths: I) -> Self {
+		let mut len = 0;
+		let mut patterns = HashMap::new();
+
+		for s in paths {
+			let s: String = s.into();
+			let mut segments = Vec::new();
+			for seg in s.split('/') {
+				segments.push(match PathSegment::from_str(&seg) {
+					Ok(x) => x,
+					Err(_) => continue,
+				});
+			}
+
+			segments.iter_mut().for_each(|x| match x {
+				PathSegment::Constant(_) => {}
+				PathSegment::Value { value, .. } => *value = "*".into(),
+			});
+
+			let pattern = segments.iter().join("/");
+
+			patterns.entry(pattern).or_insert(Vec::new()).push(s);
+			len += 1;
+		}
+
+		let mut builder = TrieBuilder::new();
+		for (k, v) in patterns {
+			builder.push(k, v);
+		}
+
+		Self {
+			len,
+			patterns: builder.build(),
+		}
+	}
+
+	#[cfg(feature = "tokio")]
+	pub async fn async_new<S: Into<String>>(mut paths: tokio::sync::mpsc::Receiver<S>) -> Self {
+		let mut len = 0;
+		let mut patterns = HashMap::new();
+
+		while let Some(s) = paths.recv().await {
+			let s: String = s.into();
+			let mut segments = Vec::new();
+			for seg in s.split('/') {
+				segments.push(match PathSegment::from_str(&seg) {
+					Ok(x) => x,
+					Err(_) => continue,
+				});
+			}
+
+			segments.iter_mut().for_each(|x| match x {
+				PathSegment::Constant(_) => {}
+				PathSegment::Value { value, .. } => *value = "*".into(),
+			});
+
+			let pattern = segments.iter().join("/");
+
+			patterns.entry(pattern).or_insert(Vec::new()).push(s);
+			len += 1;
+		}
+
+		let mut builder = TrieBuilder::new();
+		for (k, v) in patterns {
+			builder.push(k, v);
+		}
+
+		Self {
+			len,
+			patterns: builder.build(),
+		}
+	}
+
+	#[inline(always)]
+	pub fn len(&self) -> usize {
+		self.len
+	}
+
+	#[inline(always)]
+	pub fn is_empty(&self) -> bool {
+		self.len() == 0
+	}
+
+	/// Given a datapath (that may contain wildcards) as a query,
+	/// return all known datapaths that match it.
+	///
+	/// Returns an empty iterator if no paths match.
+	/// Returns `None` if the query was invalid.
+	pub fn query(&self, query: impl Into<String>) -> Option<impl Iterator<Item = String> + '_> {
+		let query: String = query.into();
+		let regex = rule::Rule::new(query.clone()).regex()?;
+		let key = Self::query_to_key(&query);
+
+		Some(
+			self.patterns
+				.predictive_search::<String, _>(&key)
+				.flat_map(|(_, strings)| strings.iter())
+				.filter(move |s| regex.is_match(s))
+				.cloned(),
+		)
+	}
+
+	pub fn query_match(&self, query: impl Into<String>) -> Option<bool> {
+		let query: String = query.into();
+		let regex = rule::Rule::new(query.clone()).regex()?;
+		let key = Self::query_to_key(&query);
+
+		for (_, strings) in self.patterns.predictive_search::<String, _>(&key) {
+			for s in strings {
+				if regex.is_match(s) {
+					return Some(true);
+				}
+			}
+		}
+
+		return Some(false);
+	}
+}
+
+// MARK: index tests
+
+#[cfg(test)]
+#[expect(clippy::unwrap_used)]
+mod index_tests {
+	use super::*;
+
+	#[test]
+	fn datapath_index_empty() {
+		let idx = DatapathIndex::new(std::iter::empty::<String>());
+		let query = "web/domain=example.com";
+		assert_eq!(idx.query(query).unwrap().count(), 0);
+		assert!(idx.is_empty());
+		assert_eq!(idx.len(), 0);
+	}
+
+	#[test]
+	fn insert_and_lookup_exact_match() {
+		let paths = vec!["web/domain=example.com/ts=1234"];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Exact match
+		let results: Vec<_> = idx
+			.query("web/domain=example.com/ts=1234")
+			.unwrap()
+			.collect();
+		assert_eq!(results.len(), 1);
+		assert_eq!(results[0], "web/domain=example.com/ts=1234");
+
+		// No match
+		let results: Vec<_> = idx.query("web/domain=other.com/ts=1234").unwrap().collect();
+		assert_eq!(results.len(), 0);
+
+		assert_eq!(idx.len(), 1);
+	}
+
+	#[test]
+	fn wildcard_constant_match() {
+		let paths = vec![
+			"web/domain=example.com/ts=1234",
+			"api/domain=example.com/ts=1234",
+		];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Wildcard first segment
+		let results: Vec<_> = idx.query("*/domain=example.com/ts=1234").unwrap().collect();
+		assert_eq!(results.len(), 2);
+
+		assert_eq!(idx.len(), 2);
+	}
+
+	#[test]
+	fn wildcard_value_match() {
+		let paths = vec![
+			"web/domain=example.com/ts=1234",
+			"web/domain=other.com/ts=1234",
+		];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Wildcard domain
+		let results: Vec<_> = idx.query("web/domain=*/ts=1234").unwrap().collect();
+		assert_eq!(results.len(), 2);
+	}
+
+	#[test]
+	fn multiple_datapaths() {
+		let paths = vec![
+			"web/domain=example.com/ts=1234",
+			"web/domain=other.com/ts=1234",
+			"api/domain=example.com/ts=5678",
+		];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Specific lookup
+		let results: Vec<_> = idx
+			.query("web/domain=example.com/ts=1234")
+			.unwrap()
+			.collect();
+		assert_eq!(results.len(), 1);
+		assert_eq!(results[0], "web/domain=example.com/ts=1234");
+
+		// Wildcard time lookup
+		let results: Vec<_> = idx.query("web/domain=example.com/ts=*").unwrap().collect();
+		assert_eq!(results.len(), 1);
+		assert_eq!(results[0], "web/domain=example.com/ts=1234");
+
+		// Double wildcard lookup
+		let results: Vec<_> = idx.query("web/domain=*/ts=*").unwrap().collect();
+		assert_eq!(results.len(), 2);
+
+		assert_eq!(idx.len(), 3);
+	}
+
+	#[test]
+	fn nested_wildcards() {
+		let paths = vec![
+			"web/domain=example.com/ts=1234/crawl/2.5",
+			"web/domain=other.com/ts=5678/crawl/2.5",
+			"web/domain=example.com/ts=9999/crawl/3.0",
+		];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Multiple wildcards in path
+		let results: Vec<_> = idx.query("web/domain=*/ts=*/crawl/*").unwrap().collect();
+		assert_eq!(results.len(), 3);
+
+		// Selective wildcards
+		let results: Vec<_> = idx
+			.query("web/domain=example.com/ts=*/crawl/*")
+			.unwrap()
+			.collect();
+		assert_eq!(results.len(), 2);
+	}
+
+	#[test]
+	fn partial_path_query() {
+		let paths = vec!["web/domain=example.com/ts=1234/crawl/2.5"];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Query with fewer segments than the stored path
+		let results: Vec<_> = idx.query("web/domain=example.com").unwrap().collect();
+		assert_eq!(results.len(), 0);
+	}
+
+	#[test]
+	fn longer_path_query() {
+		let paths = vec!["web/domain=example.com"];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Query with more segments than the stored path
+		let results: Vec<_> = idx
+			.query("web/domain=example.com/ts=1234/crawl/2.5")
+			.unwrap()
+			.collect();
+		assert_eq!(results.len(), 0);
+	}
+
+	#[test]
+	fn query_match() {
+		let paths = vec![
+			"web/domain=example.com/ts=1234",
+			"web/domain=other.com/ts=5678",
+		];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Match exists
+		assert_eq!(
+			idx.query_match("web/domain=example.com/ts=1234").unwrap(),
+			true
+		);
+		assert_eq!(idx.query_match("web/domain=*/ts=*").unwrap(), true);
+
+		// No match
+		assert_eq!(
+			idx.query_match("api/domain=example.com/ts=1234").unwrap(),
+			false
+		);
+		assert_eq!(
+			idx.query_match("web/domain=missing.com/ts=9999").unwrap(),
+			false
+		);
+	}
+
+	#[test]
+	fn suffix_wildcard() {
+		let paths = vec![
+			"web/domain=example.com/ts=1234/file1.json",
+			"web/domain=example.com/ts=1234/file2.json",
+			"web/domain=example.com/ts=5678/file3.json",
+		];
+		let idx = DatapathIndex::new(paths.into_iter());
+
+		// Query with suffix wildcard
+		let results: Vec<_> = idx.query("web/domain=example.com/**").unwrap().collect();
+		assert_eq!(results.len(), 3);
+
+		let results: Vec<_> = idx
+			.query("web/domain=example.com/ts=1234/**")
+			.unwrap()
+			.collect();
+		assert_eq!(results.len(), 2);
+	}
+}
--- a/crates/datapath/src/index/rule.rs
+++ b/crates/datapath/src/index/rule.rs
@@ -0,0 +1,381 @@
+use regex::Regex;
+use tracing::warn;
+
+//
+// MARK: rule
+//
+
+#[derive(Debug)]
+enum RegexSegment {
+	/// A single segment
+	Single(String),
+
+	/// An optional doublestar segment
+	DoubleStar,
+}
+
+impl RegexSegment {
+	/// Returns the regex pattern of this part,
+	/// prefixed with a /.
+	fn to_regex_part(&self, prev: Option<&Self>, next: Option<&Self>) -> String {
+		match (prev, self, next) {
+			// Consecutive single segments need a trailing slash
+			(_, Self::Single(x), Some(Self::Single(_))) => format!("{x}[/]"),
+
+			// Terminal single segments don't need a trailing slash
+			(_, Self::Single(x), None) => x.to_owned(),
+
+			// Neighboring doublestar is always responsible for slashes
+			(_, Self::Single(x), Some(Self::DoubleStar)) => x.to_owned(),
+
+			// No additional slashes
+			(None, Self::DoubleStar, None) => "((?:.*)?)".into(),
+
+			// Leading slash
+			(Some(Self::Single(_)), Self::DoubleStar, None) => "((?:[/].*)?)".into(),
+
+			// Trailing slash
+			(None, Self::DoubleStar, Some(Self::Single(_))) => "((?:.*[/])?)".into(),
+
+			// Leading and trailing slash.
+			// Also, replace self with a [/] when empty.
+			(Some(Self::Single(_)), Self::DoubleStar, Some(Self::Single(_))) => {
+				"((?:[/].*[/])|[/])".into()
+			}
+
+			// Doublestars cannot be neighbors
+			(_, Self::DoubleStar, Some(Self::DoubleStar))
+			| (Some(Self::DoubleStar), Self::DoubleStar, _) => {
+				unreachable!("consecutive doublestars must be reduced")
+			}
+		}
+	}
+}
+
+#[derive(Debug, Clone)]
+pub struct Rule {
+	pub pattern: String,
+}
+
+impl Rule {
+	pub fn new(pattern: impl Into<String>) -> Self {
+		Self {
+			pattern: pattern.into(),
+		}
+	}
+
+	/// Turn this rule into a regex pattern.
+	/// Returns `None` if this rule was invalid.
+	pub fn regex(&self) -> Option<Regex> {
+		let pattern = &self.pattern;
+
+		if pattern.ends_with("/") {
+			warn!("Pattern `{pattern}` has a trailing slash which will be ignored")
+		}
+
+		if pattern.starts_with("/") {
+			warn!("Pattern `{pattern}` has a leading slash which will be ignored")
+		}
+
+		// Split on slashes or stars
+		// This is a lot like .split("/"), but handles
+		// the edge case where ** is not delimited by slashes
+		// (`root**test` is equivalent to `root/**/test`)
+		let segments = {
+			#[expect(clippy::unwrap_used)]
+			let re = Regex::new("[*]{2,}|[/]").unwrap();
+			let split = re.find_iter(pattern);
+
+			let bounds = split
+				.into_iter()
+				.flat_map(|x| {
+					let r = x.range();
+					let a = r.start;
+					let b = r.end;
+					[a, b]
+				})
+				.chain([pattern.len()])
+				.collect::<Vec<_>>();
+
+			let mut parts = Vec::new();
+			let mut last = 0;
+			for next in bounds {
+				let seg = &pattern[last..next];
+				// Consecutive slashes are identical to a single slash
+				if seg != "/" && !seg.is_empty() {
+					parts.push(seg);
+				}
+				last = next;
+			}
+
+			parts
+		};
+
+		let mut rebuilt_segments = Vec::new();
+		let mut last_was_doublestar = false;
+		for segment in segments {
+			// This is a wildcard regex
+			// (**, ***, etc)
+			if segment.len() > 1 && segment.chars().all(|x| x == '*') {
+				match segment {
+					"**" => {
+						// Consecutive doublestars are meaningless
+						if !last_was_doublestar {
+							rebuilt_segments.push(RegexSegment::DoubleStar);
+						}
+						last_was_doublestar = true;
+					}
+					_ => return None,
+				}
+				continue;
+			}
+			last_was_doublestar = false;
+
+			let parts = segment.split("*").collect::<Vec<_>>();
+
+			let mut rebuilt = String::new();
+			for (i, part) in parts.into_iter().enumerate() {
+				if i != 0 {
+					rebuilt.push_str("([^/]*)")
+				}
+
+				rebuilt.push_str(&regex::escape(part));
+			}
+
+			rebuilt_segments.push(RegexSegment::Single(rebuilt));
+		}
+
+		let mut re_built = String::new();
+		let mut prev = None;
+		for (i, seg) in rebuilt_segments.iter().enumerate() {
+			let next = rebuilt_segments.get(i + 1);
+			re_built.push_str(&seg.to_regex_part(prev, next));
+			prev = Some(seg);
+		}
+
+		let re_built = format!("^{re_built}$");
+		// This regex should always be valid
+		#[expect(clippy::unwrap_used)]
+		Some(Regex::new(&re_built).unwrap())
+	}
+}
+
+//
+// MARK: tests
+//
+
+#[cfg(test)]
+#[expect(clippy::unwrap_used)]
+mod rule_tests {
+	use super::*;
+
+	fn rule_regex(pattern: &str) -> Regex {
+		let rule = Rule::new(pattern);
+		return rule.regex().unwrap();
+	}
+
+	#[test]
+	fn simple() {
+		let regex = rule_regex("file.txt");
+
+		assert!(regex.is_match("file.txt"));
+		assert!(!regex.is_match("other.txt"));
+		assert!(!regex.is_match("path/file.txt"));
+	}
+
+	#[test]
+	fn simple_dir() {
+		let regex = rule_regex("dir/file.txt");
+
+		assert!(regex.is_match("dir/file.txt"));
+		assert!(!regex.is_match("file.txt"));
+		assert!(!regex.is_match("other/file.txt"));
+	}
+
+	#[test]
+	fn simple_star() {
+		let regex = rule_regex("*.txt");
+
+		assert!(regex.is_match("file.txt"));
+		assert!(regex.is_match("other.txt"));
+		assert!(!regex.is_match("file.jpg"));
+		assert!(!regex.is_match("nested/file.txt"));
+	}
+
+	#[test]
+	fn simple_doublestar() {
+		let regex = rule_regex("**/*.txt");
+
+		assert!(regex.is_match("file.txt"));
+		assert!(regex.is_match("dir/file.txt"));
+		assert!(regex.is_match("dir/subdir/file.txt"));
+		assert!(!regex.is_match("file.jpg"));
+		assert!(!regex.is_match("dir/file.jpg"));
+	}
+
+	#[test]
+	fn consecutive_doublestar() {
+		let regex = rule_regex("**/**/**/*.txt");
+
+		assert!(regex.is_match("file.txt"));
+		assert!(regex.is_match("dir/file.txt"));
+		assert!(regex.is_match("dir/subdir/file.txt"));
+		assert!(!regex.is_match("file.jpg"));
+		assert!(!regex.is_match("dir/file.jpg"));
+	}
+
+	#[test]
+	fn dual_star() {
+		let regex = rule_regex("**/*a*");
+
+		assert!(regex.is_match("fileafile"));
+		assert!(regex.is_match("dir/fileafile"));
+		assert!(regex.is_match("filea"));
+		assert!(regex.is_match("dir/filea"));
+		assert!(regex.is_match("afile"));
+		assert!(regex.is_match("dir/afile"));
+		assert!(!regex.is_match("noletter"));
+		assert!(!regex.is_match("dir/noletter"));
+	}
+
+	#[test]
+	fn single_end() {
+		let regex = rule_regex("**/*");
+
+		assert!(regex.is_match("file"));
+		assert!(regex.is_match("dir/file"));
+		assert!(regex.is_match("a/b/c/dir/file"));
+	}
+
+	#[test]
+	fn doublestar_end() {
+		let regex = rule_regex("root/**");
+
+		assert!(regex.is_match("root/file"));
+		assert!(!regex.is_match("dir/file"));
+	}
+
+	#[test]
+	fn doublestar_start() {
+		let regex = rule_regex("**/dir");
+
+		assert!(regex.is_match("dir"));
+		assert!(regex.is_match("a/b/dir"));
+		assert!(!regex.is_match("dir/file"));
+	}
+
+	#[test]
+	fn doublestar_adjacent_before() {
+		let regex = rule_regex("root/**test");
+
+		assert!(regex.is_match("root/test"));
+		assert!(regex.is_match("root/a/test"));
+		assert!(regex.is_match("root/a/b/c/test"));
+		assert!(!regex.is_match("root/file"));
+		assert!(!regex.is_match("root/xxtest"));
+	}
+
+	#[test]
+	fn doublestar_adjacent_after() {
+		let regex = rule_regex("root/test**");
+
+		assert!(regex.is_match("root/test"));
+		assert!(regex.is_match("root/test/a"));
+		assert!(regex.is_match("root/test/a/b/c"));
+		assert!(!regex.is_match("root/testxx"));
+		assert!(!regex.is_match("root/file"));
+	}
+
+	#[test]
+	fn doublestar_adjacent_middle() {
+		let regex = rule_regex("root/test**file");
+
+		assert!(regex.is_match("root/test/file"));
+		assert!(regex.is_match("root/test/a/b/c/file"));
+		assert!(!regex.is_match("root/test"));
+		assert!(!regex.is_match("root/file"));
+		assert!(!regex.is_match("root/testfile"));
+		assert!(!regex.is_match("root/testxxfile"));
+	}
+
+	#[test]
+	fn doublestar_nullable() {
+		let regex = rule_regex("root/**/file");
+
+		assert!(regex.is_match("root/test/file"));
+		assert!(regex.is_match("root/file"));
+		assert!(!regex.is_match("rootfile"));
+	}
+
+	#[test]
+	fn doublestar_nullable_post() {
+		let regex = rule_regex("root/**");
+
+		assert!(regex.is_match("root"));
+		assert!(regex.is_match("root/file"));
+		assert!(!regex.is_match("rootfile"));
+	}
+
+	#[test]
+	fn doublestar_nullable_pre() {
+		let regex = rule_regex("**/file");
+
+		assert!(regex.is_match("file"));
+		assert!(regex.is_match("root/file"));
+		assert!(!regex.is_match("rootfile"));
+	}
+
+	#[test]
+	fn doublestar_bad_extension() {
+		let regex = rule_regex("**.flac");
+
+		assert!(regex.is_match("root/.flac"));
+		assert!(regex.is_match("root/a/.flac"));
+		assert!(!regex.is_match("root/test.flac"));
+		assert!(!regex.is_match("test.flac"));
+		assert!(!regex.is_match("root/test/a/b/c.flac"));
+		assert!(!regex.is_match("root/testflac"));
+		assert!(!regex.is_match("test.mp3"));
+	}
+
+	#[test]
+	fn doublestar_good_extension() {
+		let regex = rule_regex("**/*.flac");
+
+		assert!(regex.is_match("root/.flac"));
+		assert!(regex.is_match("root/a/.flac"));
+		assert!(regex.is_match("root/test.flac"));
+		assert!(regex.is_match("test.flac"));
+		assert!(regex.is_match("root/test/a/b/c.flac"));
+		assert!(!regex.is_match("root/testflac"));
+		assert!(!regex.is_match("test.mp3"));
+	}
+
+	#[test]
+	fn multi_slash_a() {
+		let regex = rule_regex("dir//file.txt");
+
+		assert!(regex.is_match("dir/file.txt"));
+		assert!(!regex.is_match("dirfile.txt"));
+		assert!(!regex.is_match("dir/other.txt"));
+	}
+
+	#[test]
+	fn multi_slash_b() {
+		let regex = rule_regex("**///*.txt");
+
+		assert!(regex.is_match("dir/file.txt"));
+		assert!(regex.is_match("dir/subdir/file.txt"));
+		assert!(!regex.is_match("file.jpg"));
+	}
+
+	#[test]
+	fn multi_slash_c() {
+		let regex = rule_regex("///dir//**//*.txt//");
+
+		assert!(regex.is_match("dir/subdir/file.txt"));
+		assert!(regex.is_match("dir/sub1/sub2/file.txt"));
+		assert!(!regex.is_match("other/sub/file.txt"));
+		assert!(!regex.is_match("dir/file.jpg"));
+	}
+}
--- a/crates/datapath/src/lib.rs
+++ b/crates/datapath/src/lib.rs
@@ -7,6 +7,10 @@
 #[cfg(test)]
 use uuid as _;

+// silence linter, used by fns in index.rs
+#[cfg(feature = "tokio")]
+use tokio as _;
+
 mod datapath;
 pub use datapath::*;

@@ -19,4 +23,10 @@ pub use schema::*;
 mod wildcardable;
 pub use wildcardable::*;

+#[cfg(feature = "index")]
+mod index;
+
+#[cfg(feature = "index")]
+pub use index::*;
+
 pub use datapath_macro::datapath;