Compare commits

...

3 Commits

Author SHA1 Message Date
478eebb5bf Bump versions
Some checks failed
CI / Typos (push) Successful in 17s
CI / Build and test (push) Failing after 2m12s
CI / Clippy (push) Failing after 3m24s
CI / Build and test (all features) (push) Failing after 7m21s
2026-03-11 11:12:46 -07:00
f8211dc44a More string extractors 2026-03-11 11:12:46 -07:00
cc27056280 Add nix files 2026-03-11 11:12:44 -07:00
8 changed files with 320 additions and 19 deletions

13
.editorconfig Normal file
View File

@@ -0,0 +1,13 @@
root = true
[*]
indent_style = tab
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.yml]
indent_style = space
indent_size = 2

12
Cargo.lock generated
View File

@@ -2481,7 +2481,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "pile"
version = "0.0.1"
version = "0.0.2"
dependencies = [
"anstyle",
"anyhow",
@@ -2504,7 +2504,7 @@ dependencies = [
[[package]]
name = "pile-config"
version = "0.0.1"
version = "0.0.2"
dependencies = [
"serde",
"smartstring",
@@ -2514,7 +2514,7 @@ dependencies = [
[[package]]
name = "pile-dataset"
version = "0.0.1"
version = "0.0.2"
dependencies = [
"axum",
"chrono",
@@ -2535,7 +2535,7 @@ dependencies = [
[[package]]
name = "pile-flac"
version = "0.0.1"
version = "0.0.2"
dependencies = [
"base64",
"itertools 0.14.0",
@@ -2549,7 +2549,7 @@ dependencies = [
[[package]]
name = "pile-toolbox"
version = "0.0.1"
version = "0.0.2"
dependencies = [
"thiserror",
"tokio",
@@ -2557,7 +2557,7 @@ dependencies = [
[[package]]
name = "pile-value"
version = "0.0.1"
version = "0.0.2"
dependencies = [
"async-trait",
"aws-sdk-s3",

View File

@@ -5,7 +5,7 @@ resolver = "2"
[workspace.package]
rust-version = "1.94.0"
edition = "2024"
version = "0.0.1"
version = "0.0.2"
[workspace.lints.rust]
unused_import_braces = "deny"
@@ -73,15 +73,15 @@ pile-value = { path = "crates/pile-value" }
tantivy = "0.25.0"
axum = { version = "0.8.8", features = ["macros", "multipart"] }
utoipa = { version = "5.4.0", features = [
"axum_extras",
"chrono",
"url",
"uuid",
"axum_extras",
"chrono",
"url",
"uuid",
] }
utoipa-swagger-ui = { version = "9.0.2", features = [
"axum",
"debug-embed",
"vendored",
"axum",
"debug-embed",
"vendored",
] }
# Async & Parallelism

26
bump-version.sh Executable file
View File

@@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -euo pipefail
CARGO_TOML="$(dirname "$0")/Cargo.toml"
DEFAULT_NIX="$(dirname "$0")/default.nix"
# Read current version from workspace Cargo.toml
current=$(grep '^version = ' "$CARGO_TOML" | head -1 | sed 's/version = "\(.*\)"/\1/')
echo "Current version: $current"
read -rp "New version: " new
if [[ -z "$new" ]]; then
echo "No version entered. Aborting." >&2
exit 1
fi
# Update Cargo.toml workspace version
sed -i "s/^version = \"$current\"/version = \"$new\"/" "$CARGO_TOML"
# Update default.nix version field
sed -i "s/version = \"$current\";/version = \"$new\";/" "$DEFAULT_NIX"
echo "Bumped $current -> $new in:"
echo " $CARGO_TOML"
echo " $DEFAULT_NIX"

View File

@@ -4,6 +4,20 @@ use std::sync::Arc;
use crate::{extract::traits::ObjectExtractor, value::PileValue};
fn parse_name(s: &str) -> (&str, Option<&str>) {
match s.find('(') {
None => (s, None),
Some(i) => {
let name = &s[..i];
let rest = &s[i + 1..];
match rest.strip_suffix(')') {
Some(args) => (name, Some(args)),
None => (name, None),
}
}
}
}
pub struct StringExtractor {
item: Arc<SmartString<LazyCompact>>,
}
@@ -17,24 +31,49 @@ impl StringExtractor {
#[async_trait::async_trait]
impl ObjectExtractor for StringExtractor {
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
Ok(match name.as_str() {
"trim" => Some(PileValue::String(Arc::new(
let (name, args) = parse_name(name.as_str());
Ok(match (name, args) {
("trim", None) => Some(PileValue::String(Arc::new(
self.item.as_str().trim().into(),
))),
"upper" => Some(PileValue::String(Arc::new(
("upper", None) => Some(PileValue::String(Arc::new(
self.item.as_str().to_lowercase().into(),
))),
"lower" => Some(PileValue::String(Arc::new(
("lower", None) => Some(PileValue::String(Arc::new(
self.item.as_str().to_uppercase().into(),
))),
"nonempty" => Some(match self.item.is_empty() {
("nonempty", None) => Some(match self.item.is_empty() {
true => PileValue::Null,
false => PileValue::String(self.item.clone()),
}),
("trimprefix", Some(prefix)) => Some(PileValue::String(Arc::new(
self.item
.as_str()
.strip_prefix(prefix)
.unwrap_or(self.item.as_str())
.into(),
))),
("trimsuffix", Some(suffix)) => Some(PileValue::String(Arc::new(
self.item
.as_str()
.strip_suffix(suffix)
.unwrap_or(self.item.as_str())
.into(),
))),
("split", Some(by)) => Some(PileValue::Array(Arc::new(
self.item
.as_str()
.split(by)
.map(|s| PileValue::String(Arc::new(s.into())))
.collect(),
))),
_ => None,
})
}
@@ -49,3 +88,134 @@ impl ObjectExtractor for StringExtractor {
]);
}
}
#[cfg(test)]
mod tests {
use super::*;
fn extractor(s: &str) -> StringExtractor {
StringExtractor::new(&Arc::new(s.into()))
}
#[expect(clippy::unwrap_used)]
async fn field(ext: &StringExtractor, name: &str) -> Option<PileValue> {
ext.field(&Label::new(name).unwrap()).await.unwrap()
}
fn string(v: Option<PileValue>) -> Option<String> {
match v? {
PileValue::String(s) => Some(s.as_str().to_owned()),
_ => panic!("expected string"),
}
}
fn array(v: Option<PileValue>) -> Vec<String> {
match v.expect("expected Some") {
PileValue::Array(arr) => arr
.iter()
.map(|v| match v {
PileValue::String(s) => s.as_str().to_owned(),
_ => panic!("expected string element"),
})
.collect(),
_ => panic!("expected array"),
}
}
#[tokio::test]
async fn trim() {
assert_eq!(
string(field(&extractor(" hi "), "trim").await),
Some("hi".into())
);
}
#[tokio::test]
async fn trim_no_args() {
assert!(field(&extractor("x"), "trim(foo)").await.is_none());
}
#[tokio::test]
async fn nonempty_with_content() {
assert!(matches!(
field(&extractor("hello"), "nonempty").await,
Some(PileValue::String(_))
));
}
#[tokio::test]
async fn nonempty_empty_string() {
assert!(matches!(
field(&extractor(""), "nonempty").await,
Some(PileValue::Null)
));
}
#[tokio::test]
async fn trimprefix_present() {
assert_eq!(
string(field(&extractor("foobar"), "trimprefix(foo)").await),
Some("bar".into())
);
}
#[tokio::test]
async fn trimprefix_absent() {
assert_eq!(
string(field(&extractor("foobar"), "trimprefix(baz)").await),
Some("foobar".into())
);
}
#[tokio::test]
async fn trimprefix_no_args() {
assert!(field(&extractor("foobar"), "trimprefix").await.is_none());
}
#[tokio::test]
async fn trimsuffix_present() {
assert_eq!(
string(field(&extractor("foobar"), "trimsuffix(bar)").await),
Some("foo".into())
);
}
#[tokio::test]
async fn trimsuffix_absent() {
assert_eq!(
string(field(&extractor("foobar"), "trimsuffix(baz)").await),
Some("foobar".into())
);
}
#[tokio::test]
async fn split_basic() {
assert_eq!(
array(field(&extractor("a,b,c"), "split(,)").await),
vec!["a", "b", "c"]
);
}
#[tokio::test]
async fn split_no_match() {
assert_eq!(
array(field(&extractor("abc"), "split(,)").await),
vec!["abc"]
);
}
#[tokio::test]
async fn split_no_args() {
assert!(field(&extractor("abc"), "split").await.is_none());
}
#[tokio::test]
async fn split_unclosed_paren() {
assert!(field(&extractor("abc"), "split(,").await.is_none());
}
#[tokio::test]
async fn unknown_field() {
assert!(field(&extractor("abc"), "bogus").await.is_none());
}
}

50
default.nix Normal file
View File

@@ -0,0 +1,50 @@
let
rustOverlay = import (
builtins.fetchTarball {
url = "https://github.com/oxalica/rust-overlay/archive/master.tar.gz";
sha256 = "0qgrkgc695a7gja83dngxrcx4gdg9056gvg5325i5yyjxg0ni6c9";
}
);
pkgsDefault = import <nixpkgs> { overlays = [ rustOverlay ]; };
rustToolchain = pkgsDefault.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
rustPlatformDefault = pkgsDefault.makeRustPlatform {
cargo = rustToolchain;
rustc = rustToolchain;
};
in
{
pkgs ? pkgsDefault,
rustPlatform ? rustPlatformDefault,
}:
rustPlatform.buildRustPackage {
pname = "pile";
version = "0.0.2";
src = ./.;
cargoLock.lockFile = ./Cargo.lock;
buildInputs = [
pkgs.pdfium-binaries
pkgs.openssl
]
++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
pkgs.darwin.apple_sdk.frameworks.Security
pkgs.darwin.apple_sdk.frameworks.SystemConfiguration
];
nativeBuildInputs = [
pkgs.pkg-config
pkgs.makeWrapper
];
postInstall = ''
wrapProgram $out/bin/pile \
--prefix LD_LIBRARY_PATH : ${pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ]}
'';
meta = {
description = "pile - flexible file indexing";
mainProgram = "pile";
};
}

40
flake.nix Normal file
View File

@@ -0,0 +1,40 @@
{
description = "pile - personal data indexer";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
rust-overlay = {
url = "github:oxalica/rust-overlay";
inputs.nixpkgs.follows = "nixpkgs";
};
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, rust-overlay, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
overlays = [ (import rust-overlay) ];
pkgs = import nixpkgs { inherit system overlays; };
rustToolchain = pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
rustPlatform = pkgs.makeRustPlatform {
cargo = rustToolchain;
rustc = rustToolchain;
};
in {
packages.default = import ./default.nix { inherit pkgs rustPlatform; };
devShells.default = pkgs.mkShell {
buildInputs = [
rustToolchain
pkgs.pdfium-binaries
pkgs.openssl
pkgs.pkg-config
];
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ];
};
});
}

2
rust-toolchain.toml Normal file
View File

@@ -0,0 +1,2 @@
[toolchain]
channel = "1.94.0"