mirror of
https://github.com/rm-dr/datapath.git
synced 2026-05-16 00:08:59 -07:00
Wrap strings in Arc (greatly reduces memory usage)
This commit is contained in:
@@ -0,0 +1,127 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
//
|
||||
// MARK: str
|
||||
//
|
||||
|
||||
/// A reference to a substring of an [Arc<String>]
|
||||
pub struct ArcSubstr<'a> {
|
||||
pub string: &'a Arc<String>,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl<'a> ArcSubstr<'a> {
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.string[self.start..self.end]
|
||||
}
|
||||
|
||||
pub fn from_string(string: &'a Arc<String>) -> Self {
|
||||
Self {
|
||||
start: 0,
|
||||
end: string.len(),
|
||||
string,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for ArcSubstr<'_> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.as_str() == other.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for ArcSubstr<'_> {}
|
||||
|
||||
impl std::hash::Hash for ArcSubstr<'_> {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.as_str().hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for ArcSubstr<'_> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for ArcSubstr<'_> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.as_str().cmp(other.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ArcSubstr<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.as_str().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ArcSubstr<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.as_str().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: string
|
||||
//
|
||||
|
||||
/// An owned [ArcSubstr]
|
||||
pub struct ArcSubstring {
|
||||
pub string: Arc<String>,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl ArcSubstring {
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.string[self.start..self.end]
|
||||
}
|
||||
|
||||
pub fn from_string(string: Arc<String>) -> Self {
|
||||
Self {
|
||||
start: 0,
|
||||
end: string.len(),
|
||||
string,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for ArcSubstring {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.as_str() == other.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for ArcSubstring {}
|
||||
|
||||
impl std::hash::Hash for ArcSubstring {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.as_str().hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for ArcSubstring {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for ArcSubstring {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.as_str().cmp(other.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ArcSubstring {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.as_str().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ArcSubstring {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.as_str().fmt(f)
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
use itertools::Itertools;
|
||||
use std::{collections::HashMap, fmt::Display, str::FromStr};
|
||||
use std::{collections::HashMap, fmt::Display, str::FromStr, sync::Arc};
|
||||
use tracing::trace;
|
||||
use trie_rs::map::{Trie, TrieBuilder};
|
||||
|
||||
@@ -54,7 +54,7 @@ impl FromStr for PathSegment {
|
||||
/// An in-memory cache of s3 paths.
|
||||
#[derive(Debug)]
|
||||
pub struct DatapathIndex {
|
||||
patterns: Trie<u8, Vec<String>>,
|
||||
patterns: Trie<u8, Vec<Arc<String>>>,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
@@ -99,6 +99,8 @@ impl DatapathIndex {
|
||||
|
||||
for s in paths {
|
||||
let s: String = s.into();
|
||||
let s = Arc::new(s);
|
||||
|
||||
let mut segments = Vec::new();
|
||||
for seg in s.split('/') {
|
||||
segments.push(match PathSegment::from_str(&seg) {
|
||||
@@ -136,6 +138,8 @@ impl DatapathIndex {
|
||||
|
||||
while let Some(s) = paths.recv().await {
|
||||
let s: String = s.into();
|
||||
let s = Arc::new(s);
|
||||
|
||||
let mut segments = Vec::new();
|
||||
for seg in s.split('/') {
|
||||
segments.push(match PathSegment::from_str(&seg) {
|
||||
@@ -181,7 +185,10 @@ impl DatapathIndex {
|
||||
///
|
||||
/// Returns an empty iterator if no paths match.
|
||||
/// Returns `None` if the query was invalid.
|
||||
pub fn query(&self, query: impl Into<String>) -> Option<impl Iterator<Item = String> + '_> {
|
||||
pub fn query(
|
||||
&self,
|
||||
query: impl Into<String>,
|
||||
) -> Option<impl Iterator<Item = &Arc<String>> + '_> {
|
||||
let query: String = query.into();
|
||||
let regex = rule::Rule::new(query.clone())?;
|
||||
let key = Self::query_to_key(&query);
|
||||
@@ -191,13 +198,15 @@ impl DatapathIndex {
|
||||
self.patterns
|
||||
.predictive_search::<String, _>(&key)
|
||||
.flat_map(|(_, strings)| strings.iter())
|
||||
.filter(move |s| regex.is_match(s))
|
||||
.cloned(),
|
||||
.filter(move |s| regex.is_match(s)),
|
||||
)
|
||||
}
|
||||
|
||||
/// Like [Self::query], but with a precompiled rule
|
||||
pub fn query_rule<'a>(&'a self, rule: &'a rule::Rule) -> impl Iterator<Item = String> + 'a {
|
||||
pub fn query_rule<'a>(
|
||||
&'a self,
|
||||
rule: &'a rule::Rule,
|
||||
) -> impl Iterator<Item = &'a Arc<String>> + 'a {
|
||||
let key = Self::query_to_key(rule.pattern());
|
||||
trace!("DatapathIndex key is {key}");
|
||||
|
||||
@@ -205,7 +214,6 @@ impl DatapathIndex {
|
||||
.predictive_search::<String, _>(&key)
|
||||
.flat_map(|(_, strings)| strings.iter())
|
||||
.filter(move |s| rule.is_match(s))
|
||||
.cloned()
|
||||
}
|
||||
|
||||
/// Like [Self::query], but returns `true` if any paths match
|
||||
@@ -270,7 +278,7 @@ mod index_tests {
|
||||
.unwrap()
|
||||
.collect();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0], "web/domain=example.com/ts=1234");
|
||||
assert_eq!(results[0].as_ref(), "web/domain=example.com/ts=1234");
|
||||
|
||||
// No match
|
||||
let results: Vec<_> = idx.query("web/domain=other.com/ts=1234").unwrap().collect();
|
||||
@@ -322,12 +330,12 @@ mod index_tests {
|
||||
.unwrap()
|
||||
.collect();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0], "web/domain=example.com/ts=1234");
|
||||
assert_eq!(results[0].as_ref(), "web/domain=example.com/ts=1234");
|
||||
|
||||
// Wildcard time lookup
|
||||
let results: Vec<_> = idx.query("web/domain=example.com/ts=*").unwrap().collect();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0], "web/domain=example.com/ts=1234");
|
||||
assert_eq!(results[0].as_ref(), "web/domain=example.com/ts=1234");
|
||||
|
||||
// Double wildcard lookup
|
||||
let results: Vec<_> = idx.query("web/domain=*/ts=*").unwrap().collect();
|
||||
|
||||
@@ -23,6 +23,9 @@ pub use schema::*;
|
||||
mod wildcardable;
|
||||
pub use wildcardable::*;
|
||||
|
||||
mod arcsubstr;
|
||||
pub use arcsubstr::*;
|
||||
|
||||
#[cfg(feature = "index")]
|
||||
mod index;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user