Compare commits

...

2 Commits

Author SHA1 Message Date
a5837a7d35 More string extractors
Some checks failed
CI / Typos (push) Failing after 22s
CI / Clippy (push) Failing after 1m13s
CI / Build and test (push) Has been cancelled
CI / Build and test (all features) (push) Has started running
2026-03-11 10:57:36 -07:00
ae05cc6d6e Add nix files
Some checks failed
CI / Build and test (all features) (push) Waiting to run
CI / Typos (push) Failing after 16s
CI / Clippy (push) Failing after 3m36s
CI / Build and test (push) Failing after 6m37s
2026-03-11 10:43:52 -07:00
3 changed files with 282 additions and 5 deletions

View File

@@ -4,6 +4,20 @@ use std::sync::Arc;
use crate::{extract::traits::ObjectExtractor, value::PileValue};
fn parse_name(s: &str) -> (&str, Option<&str>) {
match s.find('(') {
None => (s, None),
Some(i) => {
let name = &s[..i];
let rest = &s[i + 1..];
match rest.strip_suffix(')') {
Some(args) => (name, Some(args)),
None => (name, None),
}
}
}
}
pub struct StringExtractor {
item: Arc<SmartString<LazyCompact>>,
}
@@ -17,24 +31,49 @@ impl StringExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for StringExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
Ok(match name.as_str() {
"trim" => Some(PileValue::String(Arc::new(
let (name, args) = parse_name(name.as_str());
Ok(match (name, args) {
("trim", None) => Some(PileValue::String(Arc::new(
self.item.as_str().trim().into(),
))),
"upper" => Some(PileValue::String(Arc::new(
("upper", None) => Some(PileValue::String(Arc::new(
self.item.as_str().to_lowercase().into(),
))),
"lower" => Some(PileValue::String(Arc::new(
("lower", None) => Some(PileValue::String(Arc::new(
self.item.as_str().to_uppercase().into(),
))),
"nonempty" => Some(match self.item.is_empty() {
("nonempty", None) => Some(match self.item.is_empty() {
true => PileValue::Null,
false => PileValue::String(self.item.clone()),
}),
("trimprefix", Some(prefix)) => Some(PileValue::String(Arc::new(
self.item
.as_str()
.strip_prefix(prefix)
.unwrap_or(self.item.as_str())
.into(),
))),
("trimsuffix", Some(suffix)) => Some(PileValue::String(Arc::new(
self.item
.as_str()
.strip_suffix(suffix)
.unwrap_or(self.item.as_str())
.into(),
))),
("split", Some(by)) => Some(PileValue::Array(Arc::new(
self.item
.as_str()
.split(by)
.map(|s| PileValue::String(Arc::new(s.into())))
.collect(),
))),
_ => None,
})
}
@@ -49,3 +88,134 @@ impl ObjectExtractor for StringExtractor {
]);
}
}
#[cfg(test)]
mod tests {
use super::*;
fn extractor(s: &str) -> StringExtractor {
StringExtractor::new(&Arc::new(s.into()))
}
#[expect(clippy::unwrap_used)]
async fn field(ext: &StringExtractor, name: &str) -> Option<PileValue> {
ext.field(&Label::new(name).unwrap()).await.unwrap()
}
fn string(v: Option<PileValue>) -> Option<String> {
match v? {
PileValue::String(s) => Some(s.as_str().to_owned()),
_ => panic!("expected string"),
}
}
fn array(v: Option<PileValue>) -> Vec<String> {
match v.expect("expected Some") {
PileValue::Array(arr) => arr
.iter()
.map(|v| match v {
PileValue::String(s) => s.as_str().to_owned(),
_ => panic!("expected string element"),
})
.collect(),
_ => panic!("expected array"),
}
}
#[tokio::test]
async fn trim() {
assert_eq!(
string(field(&extractor(" hi "), "trim").await),
Some("hi".into())
);
}
#[tokio::test]
async fn trim_no_args() {
assert!(field(&extractor("x"), "trim(foo)").await.is_none());
}
#[tokio::test]
async fn nonempty_with_content() {
assert!(matches!(
field(&extractor("hello"), "nonempty").await,
Some(PileValue::String(_))
));
}
#[tokio::test]
async fn nonempty_empty_string() {
assert!(matches!(
field(&extractor(""), "nonempty").await,
Some(PileValue::Null)
));
}
#[tokio::test]
async fn trimprefix_present() {
assert_eq!(
string(field(&extractor("foobar"), "trimprefix(foo)").await),
Some("bar".into())
);
}
#[tokio::test]
async fn trimprefix_absent() {
assert_eq!(
string(field(&extractor("foobar"), "trimprefix(baz)").await),
Some("foobar".into())
);
}
#[tokio::test]
async fn trimprefix_no_args() {
assert!(field(&extractor("foobar"), "trimprefix").await.is_none());
}
#[tokio::test]
async fn trimsuffix_present() {
assert_eq!(
string(field(&extractor("foobar"), "trimsuffix(bar)").await),
Some("foo".into())
);
}
#[tokio::test]
async fn trimsuffix_absent() {
assert_eq!(
string(field(&extractor("foobar"), "trimsuffix(baz)").await),
Some("foobar".into())
);
}
#[tokio::test]
async fn split_basic() {
assert_eq!(
array(field(&extractor("a,b,c"), "split(,)").await),
vec!["a", "b", "c"]
);
}
#[tokio::test]
async fn split_no_match() {
assert_eq!(
array(field(&extractor("abc"), "split(,)").await),
vec!["abc"]
);
}
#[tokio::test]
async fn split_no_args() {
assert!(field(&extractor("abc"), "split").await.is_none());
}
#[tokio::test]
async fn split_unclosed_paren() {
assert!(field(&extractor("abc"), "split(,").await.is_none());
}
#[tokio::test]
async fn unknown_field() {
assert!(field(&extractor("abc"), "bogus").await.is_none());
}
}

35
default.nix Normal file
View File

@@ -0,0 +1,35 @@
{
pkgs ? import <nixpkgs> { },
}:
pkgs.rustPlatform.buildRustPackage {
pname = "pile";
version = "0.0.1";
src = ./.;
cargoLock.lockFile = ./Cargo.lock;
buildInputs = [
pkgs.pdfium-binaries
pkgs.openssl
]
++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
pkgs.darwin.apple_sdk.frameworks.Security
pkgs.darwin.apple_sdk.frameworks.SystemConfiguration
];
nativeBuildInputs = [
pkgs.pkg-config
pkgs.makeWrapper
];
postInstall = ''
wrapProgram $out/bin/pile \
--prefix LD_LIBRARY_PATH : ${pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ]}
'';
meta = {
description = "pile - flexible file indexing";
mainProgram = "pile";
};
}

72
flake.nix Normal file
View File

@@ -0,0 +1,72 @@
{
description = "pile - personal data indexer";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
rust-overlay = {
url = "github:oxalica/rust-overlay";
inputs.nixpkgs.follows = "nixpkgs";
};
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, rust-overlay, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
overlays = [ (import rust-overlay) ];
pkgs = import nixpkgs { inherit system overlays; };
rustToolchain = pkgs.rust-bin.stable."1.85.0".default;
rustPlatform = pkgs.makeRustPlatform {
cargo = rustToolchain;
rustc = rustToolchain;
};
in {
packages.default = rustPlatform.buildRustPackage {
pname = "pile";
version = "0.0.1";
src = ./.;
cargoLock.lockFile = ./Cargo.lock;
buildInputs = [
pkgs.pdfium-binaries
pkgs.openssl
] ++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
pkgs.darwin.apple_sdk.frameworks.Security
pkgs.darwin.apple_sdk.frameworks.SystemConfiguration
];
nativeBuildInputs = [
pkgs.pkg-config
pkgs.makeWrapper
];
# pdfium-render loads libpdfium dynamically at runtime
postInstall = ''
wrapProgram $out/bin/pile \
--prefix LD_LIBRARY_PATH : ${pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ]}
'';
meta = {
description = "pile - personal data indexer and search tool";
homepage = "https://github.com/markus/pile";
license = pkgs.lib.licenses.mit;
mainProgram = "pile";
};
};
devShells.default = pkgs.mkShell {
buildInputs = [
rustToolchain
pkgs.pdfium-binaries
pkgs.openssl
pkgs.pkg-config
];
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ];
};
});
}