mirror of
https://github.com/rm-dr/datapath.git
synced 2026-05-16 00:08:59 -07:00
Wrap strings in Arc (greatly reduces memory usage)
This commit is contained in:
Generated
+2
-2
@@ -50,7 +50,7 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "datapath"
|
name = "datapath"
|
||||||
version = "0.0.5"
|
version = "0.0.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"datapath-macro",
|
"datapath-macro",
|
||||||
"itertools",
|
"itertools",
|
||||||
@@ -63,7 +63,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "datapath-macro"
|
name = "datapath-macro"
|
||||||
version = "0.0.5"
|
version = "0.0.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
|||||||
+2
-2
@@ -11,7 +11,7 @@ readme = "README.md"
|
|||||||
authors = ["rm-dr"]
|
authors = ["rm-dr"]
|
||||||
|
|
||||||
# Don't forget to bump datapath-macro below!
|
# Don't forget to bump datapath-macro below!
|
||||||
version = "0.0.5"
|
version = "0.0.6"
|
||||||
|
|
||||||
[workspace.lints.rust]
|
[workspace.lints.rust]
|
||||||
unused_import_braces = "deny"
|
unused_import_braces = "deny"
|
||||||
@@ -70,7 +70,7 @@ cargo_common_metadata = "deny"
|
|||||||
#
|
#
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
datapath-macro = { path = "crates/datapath-macro", version = "0.0.5" }
|
datapath-macro = { path = "crates/datapath-macro", version = "0.0.6" }
|
||||||
datapath = { path = "crates/datapath" }
|
datapath = { path = "crates/datapath" }
|
||||||
|
|
||||||
chrono = "0.4.42"
|
chrono = "0.4.42"
|
||||||
|
|||||||
@@ -0,0 +1,127 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
//
|
||||||
|
// MARK: str
|
||||||
|
//
|
||||||
|
|
||||||
|
/// A reference to a substring of an [Arc<String>]
|
||||||
|
pub struct ArcSubstr<'a> {
|
||||||
|
pub string: &'a Arc<String>,
|
||||||
|
pub start: usize,
|
||||||
|
pub end: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ArcSubstr<'a> {
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
&self.string[self.start..self.end]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_string(string: &'a Arc<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
start: 0,
|
||||||
|
end: string.len(),
|
||||||
|
string,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for ArcSubstr<'_> {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.as_str() == other.as_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for ArcSubstr<'_> {}
|
||||||
|
|
||||||
|
impl std::hash::Hash for ArcSubstr<'_> {
|
||||||
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||||
|
self.as_str().hash(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for ArcSubstr<'_> {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for ArcSubstr<'_> {
|
||||||
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||||
|
self.as_str().cmp(other.as_str())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for ArcSubstr<'_> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
self.as_str().fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for ArcSubstr<'_> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
self.as_str().fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// MARK: string
|
||||||
|
//
|
||||||
|
|
||||||
|
/// An owned [ArcSubstr]
|
||||||
|
pub struct ArcSubstring {
|
||||||
|
pub string: Arc<String>,
|
||||||
|
pub start: usize,
|
||||||
|
pub end: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ArcSubstring {
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
&self.string[self.start..self.end]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_string(string: Arc<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
start: 0,
|
||||||
|
end: string.len(),
|
||||||
|
string,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for ArcSubstring {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.as_str() == other.as_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for ArcSubstring {}
|
||||||
|
|
||||||
|
impl std::hash::Hash for ArcSubstring {
|
||||||
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||||
|
self.as_str().hash(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for ArcSubstring {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for ArcSubstring {
|
||||||
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||||
|
self.as_str().cmp(other.as_str())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for ArcSubstring {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
self.as_str().fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for ArcSubstring {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
self.as_str().fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use std::{collections::HashMap, fmt::Display, str::FromStr};
|
use std::{collections::HashMap, fmt::Display, str::FromStr, sync::Arc};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
use trie_rs::map::{Trie, TrieBuilder};
|
use trie_rs::map::{Trie, TrieBuilder};
|
||||||
|
|
||||||
@@ -54,7 +54,7 @@ impl FromStr for PathSegment {
|
|||||||
/// An in-memory cache of s3 paths.
|
/// An in-memory cache of s3 paths.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct DatapathIndex {
|
pub struct DatapathIndex {
|
||||||
patterns: Trie<u8, Vec<String>>,
|
patterns: Trie<u8, Vec<Arc<String>>>,
|
||||||
len: usize,
|
len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -99,6 +99,8 @@ impl DatapathIndex {
|
|||||||
|
|
||||||
for s in paths {
|
for s in paths {
|
||||||
let s: String = s.into();
|
let s: String = s.into();
|
||||||
|
let s = Arc::new(s);
|
||||||
|
|
||||||
let mut segments = Vec::new();
|
let mut segments = Vec::new();
|
||||||
for seg in s.split('/') {
|
for seg in s.split('/') {
|
||||||
segments.push(match PathSegment::from_str(&seg) {
|
segments.push(match PathSegment::from_str(&seg) {
|
||||||
@@ -136,6 +138,8 @@ impl DatapathIndex {
|
|||||||
|
|
||||||
while let Some(s) = paths.recv().await {
|
while let Some(s) = paths.recv().await {
|
||||||
let s: String = s.into();
|
let s: String = s.into();
|
||||||
|
let s = Arc::new(s);
|
||||||
|
|
||||||
let mut segments = Vec::new();
|
let mut segments = Vec::new();
|
||||||
for seg in s.split('/') {
|
for seg in s.split('/') {
|
||||||
segments.push(match PathSegment::from_str(&seg) {
|
segments.push(match PathSegment::from_str(&seg) {
|
||||||
@@ -181,7 +185,10 @@ impl DatapathIndex {
|
|||||||
///
|
///
|
||||||
/// Returns an empty iterator if no paths match.
|
/// Returns an empty iterator if no paths match.
|
||||||
/// Returns `None` if the query was invalid.
|
/// Returns `None` if the query was invalid.
|
||||||
pub fn query(&self, query: impl Into<String>) -> Option<impl Iterator<Item = String> + '_> {
|
pub fn query(
|
||||||
|
&self,
|
||||||
|
query: impl Into<String>,
|
||||||
|
) -> Option<impl Iterator<Item = &Arc<String>> + '_> {
|
||||||
let query: String = query.into();
|
let query: String = query.into();
|
||||||
let regex = rule::Rule::new(query.clone())?;
|
let regex = rule::Rule::new(query.clone())?;
|
||||||
let key = Self::query_to_key(&query);
|
let key = Self::query_to_key(&query);
|
||||||
@@ -191,13 +198,15 @@ impl DatapathIndex {
|
|||||||
self.patterns
|
self.patterns
|
||||||
.predictive_search::<String, _>(&key)
|
.predictive_search::<String, _>(&key)
|
||||||
.flat_map(|(_, strings)| strings.iter())
|
.flat_map(|(_, strings)| strings.iter())
|
||||||
.filter(move |s| regex.is_match(s))
|
.filter(move |s| regex.is_match(s)),
|
||||||
.cloned(),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Like [Self::query], but with a precompiled rule
|
/// Like [Self::query], but with a precompiled rule
|
||||||
pub fn query_rule<'a>(&'a self, rule: &'a rule::Rule) -> impl Iterator<Item = String> + 'a {
|
pub fn query_rule<'a>(
|
||||||
|
&'a self,
|
||||||
|
rule: &'a rule::Rule,
|
||||||
|
) -> impl Iterator<Item = &'a Arc<String>> + 'a {
|
||||||
let key = Self::query_to_key(rule.pattern());
|
let key = Self::query_to_key(rule.pattern());
|
||||||
trace!("DatapathIndex key is {key}");
|
trace!("DatapathIndex key is {key}");
|
||||||
|
|
||||||
@@ -205,7 +214,6 @@ impl DatapathIndex {
|
|||||||
.predictive_search::<String, _>(&key)
|
.predictive_search::<String, _>(&key)
|
||||||
.flat_map(|(_, strings)| strings.iter())
|
.flat_map(|(_, strings)| strings.iter())
|
||||||
.filter(move |s| rule.is_match(s))
|
.filter(move |s| rule.is_match(s))
|
||||||
.cloned()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Like [Self::query], but returns `true` if any paths match
|
/// Like [Self::query], but returns `true` if any paths match
|
||||||
@@ -270,7 +278,7 @@ mod index_tests {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.collect();
|
.collect();
|
||||||
assert_eq!(results.len(), 1);
|
assert_eq!(results.len(), 1);
|
||||||
assert_eq!(results[0], "web/domain=example.com/ts=1234");
|
assert_eq!(results[0].as_ref(), "web/domain=example.com/ts=1234");
|
||||||
|
|
||||||
// No match
|
// No match
|
||||||
let results: Vec<_> = idx.query("web/domain=other.com/ts=1234").unwrap().collect();
|
let results: Vec<_> = idx.query("web/domain=other.com/ts=1234").unwrap().collect();
|
||||||
@@ -322,12 +330,12 @@ mod index_tests {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.collect();
|
.collect();
|
||||||
assert_eq!(results.len(), 1);
|
assert_eq!(results.len(), 1);
|
||||||
assert_eq!(results[0], "web/domain=example.com/ts=1234");
|
assert_eq!(results[0].as_ref(), "web/domain=example.com/ts=1234");
|
||||||
|
|
||||||
// Wildcard time lookup
|
// Wildcard time lookup
|
||||||
let results: Vec<_> = idx.query("web/domain=example.com/ts=*").unwrap().collect();
|
let results: Vec<_> = idx.query("web/domain=example.com/ts=*").unwrap().collect();
|
||||||
assert_eq!(results.len(), 1);
|
assert_eq!(results.len(), 1);
|
||||||
assert_eq!(results[0], "web/domain=example.com/ts=1234");
|
assert_eq!(results[0].as_ref(), "web/domain=example.com/ts=1234");
|
||||||
|
|
||||||
// Double wildcard lookup
|
// Double wildcard lookup
|
||||||
let results: Vec<_> = idx.query("web/domain=*/ts=*").unwrap().collect();
|
let results: Vec<_> = idx.query("web/domain=*/ts=*").unwrap().collect();
|
||||||
|
|||||||
@@ -23,6 +23,9 @@ pub use schema::*;
|
|||||||
mod wildcardable;
|
mod wildcardable;
|
||||||
pub use wildcardable::*;
|
pub use wildcardable::*;
|
||||||
|
|
||||||
|
mod arcsubstr;
|
||||||
|
pub use arcsubstr::*;
|
||||||
|
|
||||||
#[cfg(feature = "index")]
|
#[cfg(feature = "index")]
|
||||||
mod index;
|
mod index;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user