Compare commits
3 Commits
179effc32c
...
82dcdbaa27
| Author | SHA1 | Date | |
|---|---|---|---|
| 82dcdbaa27 | |||
| 42f186d77f | |||
| b36b62150c |
13
.editorconfig
Normal file
13
.editorconfig
Normal file
@@ -0,0 +1,13 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
[*.yml]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
12
Cargo.lock
generated
12
Cargo.lock
generated
@@ -2481,7 +2481,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||
|
||||
[[package]]
|
||||
name = "pile"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anyhow",
|
||||
@@ -2504,7 +2504,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pile-config"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"smartstring",
|
||||
@@ -2514,7 +2514,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pile-dataset"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
dependencies = [
|
||||
"axum",
|
||||
"chrono",
|
||||
@@ -2535,7 +2535,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pile-flac"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"itertools 0.14.0",
|
||||
@@ -2549,7 +2549,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pile-toolbox"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
@@ -2557,7 +2557,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pile-value"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"aws-sdk-s3",
|
||||
|
||||
16
Cargo.toml
16
Cargo.toml
@@ -5,7 +5,7 @@ resolver = "2"
|
||||
[workspace.package]
|
||||
rust-version = "1.94.0"
|
||||
edition = "2024"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
|
||||
[workspace.lints.rust]
|
||||
unused_import_braces = "deny"
|
||||
@@ -73,15 +73,15 @@ pile-value = { path = "crates/pile-value" }
|
||||
tantivy = "0.25.0"
|
||||
axum = { version = "0.8.8", features = ["macros", "multipart"] }
|
||||
utoipa = { version = "5.4.0", features = [
|
||||
"axum_extras",
|
||||
"chrono",
|
||||
"url",
|
||||
"uuid",
|
||||
"axum_extras",
|
||||
"chrono",
|
||||
"url",
|
||||
"uuid",
|
||||
] }
|
||||
utoipa-swagger-ui = { version = "9.0.2", features = [
|
||||
"axum",
|
||||
"debug-embed",
|
||||
"vendored",
|
||||
"axum",
|
||||
"debug-embed",
|
||||
"vendored",
|
||||
] }
|
||||
|
||||
# Async & Parallelism
|
||||
|
||||
26
bump-version.sh
Executable file
26
bump-version.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
CARGO_TOML="$(dirname "$0")/Cargo.toml"
|
||||
DEFAULT_NIX="$(dirname "$0")/default.nix"
|
||||
|
||||
# Read current version from workspace Cargo.toml
|
||||
current=$(grep '^version = ' "$CARGO_TOML" | head -1 | sed 's/version = "\(.*\)"/\1/')
|
||||
|
||||
echo "Current version: $current"
|
||||
read -rp "New version: " new
|
||||
|
||||
if [[ -z "$new" ]]; then
|
||||
echo "No version entered. Aborting." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Update Cargo.toml workspace version
|
||||
sed -i "s/^version = \"$current\"/version = \"$new\"/" "$CARGO_TOML"
|
||||
|
||||
# Update default.nix version field
|
||||
sed -i "s/version = \"$current\";/version = \"$new\";/" "$DEFAULT_NIX"
|
||||
|
||||
echo "Bumped $current -> $new in:"
|
||||
echo " $CARGO_TOML"
|
||||
echo " $DEFAULT_NIX"
|
||||
@@ -23,6 +23,13 @@ fn main() {
|
||||
.expect("unexpected OUT_DIR structure")
|
||||
.to_path_buf();
|
||||
|
||||
// If PDFIUM_LIB_DIR is set (e.g. by Nix), use the pre-installed library directly.
|
||||
if let Ok(lib_dir) = env::var("PDFIUM_LIB_DIR") {
|
||||
println!("cargo:rustc-link-search=native={lib_dir}");
|
||||
println!("cargo:rustc-link-lib=dylib=pdfium");
|
||||
return;
|
||||
}
|
||||
|
||||
let lib_path = profile_dir.join("libpdfium.so");
|
||||
|
||||
if !lib_path.exists() {
|
||||
|
||||
@@ -4,6 +4,20 @@ use std::sync::Arc;
|
||||
|
||||
use crate::{extract::traits::ObjectExtractor, value::PileValue};
|
||||
|
||||
fn parse_name(s: &str) -> (&str, Option<&str>) {
|
||||
match s.find('(') {
|
||||
None => (s, None),
|
||||
Some(i) => {
|
||||
let name = &s[..i];
|
||||
let rest = &s[i + 1..];
|
||||
match rest.strip_suffix(')') {
|
||||
Some(args) => (name, Some(args)),
|
||||
None => (name, None),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StringExtractor {
|
||||
item: Arc<SmartString<LazyCompact>>,
|
||||
}
|
||||
@@ -17,24 +31,49 @@ impl StringExtractor {
|
||||
#[async_trait::async_trait]
|
||||
impl ObjectExtractor for StringExtractor {
|
||||
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||
Ok(match name.as_str() {
|
||||
"trim" => Some(PileValue::String(Arc::new(
|
||||
let (name, args) = parse_name(name.as_str());
|
||||
Ok(match (name, args) {
|
||||
("trim", None) => Some(PileValue::String(Arc::new(
|
||||
self.item.as_str().trim().into(),
|
||||
))),
|
||||
|
||||
"upper" => Some(PileValue::String(Arc::new(
|
||||
("upper", None) => Some(PileValue::String(Arc::new(
|
||||
self.item.as_str().to_lowercase().into(),
|
||||
))),
|
||||
|
||||
"lower" => Some(PileValue::String(Arc::new(
|
||||
("lower", None) => Some(PileValue::String(Arc::new(
|
||||
self.item.as_str().to_uppercase().into(),
|
||||
))),
|
||||
|
||||
"nonempty" => Some(match self.item.is_empty() {
|
||||
("nonempty", None) => Some(match self.item.is_empty() {
|
||||
true => PileValue::Null,
|
||||
false => PileValue::String(self.item.clone()),
|
||||
}),
|
||||
|
||||
("trimprefix", Some(prefix)) => Some(PileValue::String(Arc::new(
|
||||
self.item
|
||||
.as_str()
|
||||
.strip_prefix(prefix)
|
||||
.unwrap_or(self.item.as_str())
|
||||
.into(),
|
||||
))),
|
||||
|
||||
("trimsuffix", Some(suffix)) => Some(PileValue::String(Arc::new(
|
||||
self.item
|
||||
.as_str()
|
||||
.strip_suffix(suffix)
|
||||
.unwrap_or(self.item.as_str())
|
||||
.into(),
|
||||
))),
|
||||
|
||||
("split", Some(by)) => Some(PileValue::Array(Arc::new(
|
||||
self.item
|
||||
.as_str()
|
||||
.split(by)
|
||||
.map(|s| PileValue::String(Arc::new(s.into())))
|
||||
.collect(),
|
||||
))),
|
||||
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
@@ -49,3 +88,134 @@ impl ObjectExtractor for StringExtractor {
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn extractor(s: &str) -> StringExtractor {
|
||||
StringExtractor::new(&Arc::new(s.into()))
|
||||
}
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
async fn field(ext: &StringExtractor, name: &str) -> Option<PileValue> {
|
||||
ext.field(&Label::new(name).unwrap()).await.unwrap()
|
||||
}
|
||||
|
||||
fn string(v: Option<PileValue>) -> Option<String> {
|
||||
match v? {
|
||||
PileValue::String(s) => Some(s.as_str().to_owned()),
|
||||
_ => panic!("expected string"),
|
||||
}
|
||||
}
|
||||
|
||||
fn array(v: Option<PileValue>) -> Vec<String> {
|
||||
match v.expect("expected Some") {
|
||||
PileValue::Array(arr) => arr
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
PileValue::String(s) => s.as_str().to_owned(),
|
||||
_ => panic!("expected string element"),
|
||||
})
|
||||
.collect(),
|
||||
_ => panic!("expected array"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn trim() {
|
||||
assert_eq!(
|
||||
string(field(&extractor(" hi "), "trim").await),
|
||||
Some("hi".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn trim_no_args() {
|
||||
assert!(field(&extractor("x"), "trim(foo)").await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn nonempty_with_content() {
|
||||
assert!(matches!(
|
||||
field(&extractor("hello"), "nonempty").await,
|
||||
Some(PileValue::String(_))
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn nonempty_empty_string() {
|
||||
assert!(matches!(
|
||||
field(&extractor(""), "nonempty").await,
|
||||
Some(PileValue::Null)
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn trimprefix_present() {
|
||||
assert_eq!(
|
||||
string(field(&extractor("foobar"), "trimprefix(foo)").await),
|
||||
Some("bar".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn trimprefix_absent() {
|
||||
assert_eq!(
|
||||
string(field(&extractor("foobar"), "trimprefix(baz)").await),
|
||||
Some("foobar".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn trimprefix_no_args() {
|
||||
assert!(field(&extractor("foobar"), "trimprefix").await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn trimsuffix_present() {
|
||||
assert_eq!(
|
||||
string(field(&extractor("foobar"), "trimsuffix(bar)").await),
|
||||
Some("foo".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn trimsuffix_absent() {
|
||||
assert_eq!(
|
||||
string(field(&extractor("foobar"), "trimsuffix(baz)").await),
|
||||
Some("foobar".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn split_basic() {
|
||||
assert_eq!(
|
||||
array(field(&extractor("a,b,c"), "split(,)").await),
|
||||
vec!["a", "b", "c"]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn split_no_match() {
|
||||
assert_eq!(
|
||||
array(field(&extractor("abc"), "split(,)").await),
|
||||
vec!["abc"]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn split_no_args() {
|
||||
assert!(field(&extractor("abc"), "split").await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn split_unclosed_paren() {
|
||||
assert!(field(&extractor("abc"), "split(,").await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unknown_field() {
|
||||
assert!(field(&extractor("abc"), "bogus").await.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
52
default.nix
Normal file
52
default.nix
Normal file
@@ -0,0 +1,52 @@
|
||||
let
|
||||
rustOverlay = import (
|
||||
builtins.fetchTarball {
|
||||
url = "https://github.com/oxalica/rust-overlay/archive/master.tar.gz";
|
||||
sha256 = "0qgrkgc695a7gja83dngxrcx4gdg9056gvg5325i5yyjxg0ni6c9";
|
||||
}
|
||||
);
|
||||
pkgsDefault = import <nixpkgs> { overlays = [ rustOverlay ]; };
|
||||
rustToolchain = pkgsDefault.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
|
||||
rustPlatformDefault = pkgsDefault.makeRustPlatform {
|
||||
cargo = rustToolchain;
|
||||
rustc = rustToolchain;
|
||||
};
|
||||
in
|
||||
{
|
||||
pkgs ? pkgsDefault,
|
||||
pileRustPlatform ? rustPlatformDefault,
|
||||
}:
|
||||
|
||||
pileRustPlatform.buildRustPackage {
|
||||
pname = "pile";
|
||||
version = "0.0.2";
|
||||
src = ./.;
|
||||
|
||||
cargoLock.lockFile = ./Cargo.lock;
|
||||
|
||||
PDFIUM_LIB_DIR = "${pkgs.pdfium-binaries}/lib";
|
||||
|
||||
buildInputs = [
|
||||
pkgs.pdfium-binaries
|
||||
pkgs.openssl
|
||||
]
|
||||
++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
|
||||
pkgs.darwin.apple_sdk.frameworks.Security
|
||||
pkgs.darwin.apple_sdk.frameworks.SystemConfiguration
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkgs.pkg-config
|
||||
pkgs.makeWrapper
|
||||
];
|
||||
|
||||
postInstall = ''
|
||||
wrapProgram $out/bin/pile \
|
||||
--prefix LD_LIBRARY_PATH : ${pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ]}
|
||||
'';
|
||||
|
||||
meta = {
|
||||
description = "pile - flexible file indexing";
|
||||
mainProgram = "pile";
|
||||
};
|
||||
}
|
||||
40
flake.nix
Normal file
40
flake.nix
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
description = "pile - personal data indexer";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
rust-overlay = {
|
||||
url = "github:oxalica/rust-overlay";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, rust-overlay, flake-utils }:
|
||||
flake-utils.lib.eachDefaultSystem (system:
|
||||
let
|
||||
overlays = [ (import rust-overlay) ];
|
||||
pkgs = import nixpkgs { inherit system overlays; };
|
||||
|
||||
rustToolchain = pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
|
||||
|
||||
rustPlatform = pkgs.makeRustPlatform {
|
||||
cargo = rustToolchain;
|
||||
rustc = rustToolchain;
|
||||
};
|
||||
|
||||
in {
|
||||
packages.default = import ./default.nix { inherit pkgs; pileRustPlatform = rustPlatform; };
|
||||
|
||||
devShells.default = pkgs.mkShell {
|
||||
buildInputs = [
|
||||
rustToolchain
|
||||
pkgs.pdfium-binaries
|
||||
pkgs.openssl
|
||||
pkgs.pkg-config
|
||||
];
|
||||
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ];
|
||||
};
|
||||
});
|
||||
}
|
||||
2
rust-toolchain.toml
Normal file
2
rust-toolchain.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "1.94.0"
|
||||
Reference in New Issue
Block a user