Add pile serve

This commit is contained in:
2026-03-10 16:26:10 -07:00
parent 5445872ad1
commit 350f3e3dc9
8 changed files with 516 additions and 5 deletions

301
Cargo.lock generated
View File

@@ -105,6 +105,15 @@ version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "arc-swap"
version = "1.8.2"
@@ -478,6 +487,71 @@ dependencies = [
"tracing",
]
[[package]]
name = "axum"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
dependencies = [
"axum-core",
"axum-macros",
"bytes",
"form_urlencoded",
"futures-util",
"http 1.4.0",
"http-body 1.0.1",
"http-body-util",
"hyper 1.8.1",
"hyper-util",
"itoa",
"matchit",
"memchr",
"mime",
"multer",
"percent-encoding",
"pin-project-lite",
"serde_core",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-core"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
dependencies = [
"bytes",
"futures-core",
"http 1.4.0",
"http-body 1.0.1",
"http-body-util",
"mime",
"pin-project-lite",
"sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-macros"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "base16ct"
version = "0.1.1"
@@ -815,7 +889,7 @@ dependencies = [
"crc",
"digest 0.10.7",
"rustversion",
"spin",
"spin 0.10.0",
]
[[package]]
@@ -997,6 +1071,17 @@ dependencies = [
"serde_core",
]
[[package]]
name = "derive_arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "digest"
version = "0.10.7"
@@ -1086,6 +1171,15 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "encoding_rs"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
]
[[package]]
name = "epub"
version = "1.2.4"
@@ -1096,7 +1190,7 @@ dependencies = [
"percent-encoding",
"regex",
"xml-rs",
"zip",
"zip 0.6.6",
]
[[package]]
@@ -1171,6 +1265,7 @@ checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
dependencies = [
"crc32fast",
"miniz_oxide",
"zlib-rs",
]
[[package]]
@@ -1537,6 +1632,7 @@ dependencies = [
"http 1.4.0",
"http-body 1.0.1",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"pin-utils",
@@ -1989,6 +2085,12 @@ dependencies = [
"regex-automata",
]
[[package]]
name = "matchit"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
[[package]]
name = "md-5"
version = "0.10.6"
@@ -2035,6 +2137,16 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "mime_guess"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
dependencies = [
"mime",
"unicase",
]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
@@ -2062,6 +2174,23 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "multer"
version = "3.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b"
dependencies = [
"bytes",
"encoding_rs",
"futures-util",
"http 1.4.0",
"httparse",
"memchr",
"mime",
"spin 0.9.8",
"version_check",
]
[[package]]
name = "murmurhash32"
version = "0.3.1"
@@ -2248,6 +2377,7 @@ version = "0.0.1"
dependencies = [
"anstyle",
"anyhow",
"axum",
"clap",
"indicatif",
"pile-config",
@@ -2262,6 +2392,8 @@ dependencies = [
"tracing",
"tracing-indicatif",
"tracing-subscriber",
"utoipa",
"utoipa-swagger-ui",
]
[[package]]
@@ -2550,6 +2682,40 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
[[package]]
name = "rust-embed"
version = "8.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04113cb9355a377d83f06ef1f0a45b8ab8cd7d8b1288160717d66df5c7988d27"
dependencies = [
"rust-embed-impl",
"rust-embed-utils",
"walkdir",
]
[[package]]
name = "rust-embed-impl"
version = "8.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da0902e4c7c8e997159ab384e6d0fc91c221375f6894346ae107f47dd0f3ccaa"
dependencies = [
"proc-macro2",
"quote",
"rust-embed-utils",
"syn 2.0.117",
"walkdir",
]
[[package]]
name = "rust-embed-utils"
version = "8.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bcdef0be6fe7f6fa333b1073c949729274b05f123a0ad7efcb8efd878e5c3b1"
dependencies = [
"sha2 0.10.9",
"walkdir",
]
[[package]]
name = "rust-stemmers"
version = "1.2.0"
@@ -2789,6 +2955,17 @@ dependencies = [
"zmij",
]
[[package]]
name = "serde_path_to_error"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
dependencies = [
"itoa",
"serde",
"serde_core",
]
[[package]]
name = "serde_spanned"
version = "1.0.4"
@@ -2798,6 +2975,18 @@ dependencies = [
"serde_core",
]
[[package]]
name = "serde_urlencoded"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
dependencies = [
"form_urlencoded",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "sha1"
version = "0.10.6"
@@ -2955,6 +3144,12 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "spin"
version = "0.10.0"
@@ -3049,6 +3244,12 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "sync_wrapper"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
[[package]]
name = "synstructure"
version = "0.13.2"
@@ -3421,8 +3622,14 @@ version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
"sync_wrapper",
"tokio",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@@ -3443,6 +3650,7 @@ version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
@@ -3545,6 +3753,12 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "unicase"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
[[package]]
name = "unicode-bidi"
version = "0.3.18"
@@ -3632,6 +3846,57 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "utoipa"
version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993"
dependencies = [
"indexmap",
"serde",
"serde_json",
"utoipa-gen",
]
[[package]]
name = "utoipa-gen"
version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b"
dependencies = [
"proc-macro2",
"quote",
"regex",
"syn 2.0.117",
"url",
"uuid",
]
[[package]]
name = "utoipa-swagger-ui"
version = "9.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d047458f1b5b65237c2f6dc6db136945667f40a7668627b3490b9513a3d43a55"
dependencies = [
"axum",
"base64",
"mime_guess",
"regex",
"rust-embed",
"serde",
"serde_json",
"url",
"utoipa",
"utoipa-swagger-ui-vendored",
"zip 3.0.0",
]
[[package]]
name = "utoipa-swagger-ui-vendored"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2eebbbfe4093922c2b6734d7c679ebfebd704a0d7e56dfcb0d05818ce28977d"
[[package]]
name = "uuid"
version = "1.21.0"
@@ -4309,12 +4574,44 @@ dependencies = [
"flate2",
]
[[package]]
name = "zip"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12598812502ed0105f607f941c386f43d441e00148fce9dec3ca5ffb0bde9308"
dependencies = [
"arbitrary",
"crc32fast",
"flate2",
"indexmap",
"memchr",
"zopfli",
]
[[package]]
name = "zlib-rs"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513"
[[package]]
name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
[[package]]
name = "zopfli"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
dependencies = [
"bumpalo",
"crc32fast",
"log",
"simd-adler32",
]
[[package]]
name = "zstd"
version = "0.13.3"

View File

@@ -68,8 +68,20 @@ pile-config = { path = "crates/pile-config" }
pile-flac = { path = "crates/pile-flac" }
pile-dataset = { path = "crates/pile-dataset" }
# Clients
# Clients & servers
tantivy = "0.25.0"
axum = { version = "0.8.8", features = ["macros", "multipart"] }
utoipa = { version = "5.4.0", features = [
"axum_extras",
"chrono",
"url",
"uuid",
] }
utoipa-swagger-ui = { version = "9.0.2", features = [
"axum",
"debug-embed",
"vendored",
] }
# Async & Parallelism
tokio = { version = "1.49.0", features = ["full"] }

View File

@@ -17,7 +17,7 @@ use crate::{Item, PileValue, extract::MetaExtractor};
#[derive(Debug, Clone)]
pub struct FtsLookupResult {
pub score: f32,
pub source_name: Label,
pub source: Label,
pub key: String,
}
@@ -270,7 +270,7 @@ impl DbFtsIndex {
out.push(FtsLookupResult {
score,
source_name,
source: source_name,
key,
});
}

View File

@@ -26,3 +26,7 @@ signal-hook = { workspace = true }
anstyle = { workspace = true }
toml = { workspace = true }
serde_json = { workspace = true }
axum = { workspace = true }
utoipa = { workspace = true }
utoipa-swagger-ui = { workspace = true }

View File

@@ -10,6 +10,7 @@ mod index;
mod init;
mod lookup;
mod probe;
mod serve;
use crate::{Cli, GlobalContext};
@@ -54,6 +55,12 @@ pub enum SubCommand {
#[command(flatten)]
cmd: probe::ProbeCommand,
},
/// Expose a dataset via an http api
Serve {
#[command(flatten)]
cmd: serve::cli::ServeCommand,
},
}
impl CliCmdDispatch for SubCommand {
@@ -65,6 +72,7 @@ impl CliCmdDispatch for SubCommand {
Self::Index { cmd } => cmd.start(ctx),
Self::Lookup { cmd } => cmd.start(ctx),
Self::Probe { cmd } => cmd.start(ctx),
Self::Serve { cmd } => cmd.start(ctx),
Self::Docs {} => {
print_help_recursively(&mut Cli::command(), None);

View File

@@ -0,0 +1,89 @@
use axum::{
Json, Router,
extract::{DefaultBodyLimit, State},
http::StatusCode,
response::{IntoResponse, Response},
routing::post,
};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use utoipa::{OpenApi, ToSchema};
use utoipa_swagger_ui::SwaggerUi;
use crate::command::serve::cli::ServeState;
#[derive(OpenApi)]
#[openapi(
tags(),
paths(lookup),
components(schemas(LookupRequest, LookupResponse, LookupResult))
)]
pub(super) struct Api;
#[inline]
pub(super) fn router(state: Arc<ServeState>) -> Router<()> {
let docs_path = "/docs";
let docs = SwaggerUi::new(docs_path).url(format!("{}/openapi.json", docs_path), Api::openapi());
Router::new()
.route("/lookup", post(lookup))
.merge(docs)
.with_state(state)
.layer(DefaultBodyLimit::max(32 * 1024 * 1024))
}
//
// MARK: lookup
//
#[derive(Serialize, Deserialize, ToSchema, Debug)]
pub struct LookupRequest {
pub query: String,
#[serde(default)]
pub limit: Option<usize>,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
struct LookupResponse {
pub results: Vec<LookupResult>,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct LookupResult {
pub score: f32,
pub source: String,
pub key: String,
}
/// Search a user's captures
#[utoipa::path(
post,
path = "/lookup",
responses(
(status = 200, description = "Search results", body = Vec<LookupResponse>),
(status = 400, description = "Invalid request"),
(status = 401, description = "Unauthorized"),
(status = 404, description = "URL not found"),
(status = 500, description = "Internal server error"),
)
)]
async fn lookup(State(state): State<Arc<ServeState>>, Json(body): Json<LookupRequest>) -> Response {
let results: Vec<LookupResult> =
match state.ds.fts_lookup(&body.query, body.limit.unwrap_or(10)) {
Ok(x) => x
.into_iter()
.map(|x| LookupResult {
key: x.key,
score: x.score,
source: x.source.into(),
})
.collect(),
Err(error) => {
return (StatusCode::INTERNAL_SERVER_ERROR, format!("{error:?}")).into_response();
}
};
return (StatusCode::OK, Json(LookupResponse { results })).into_response();
}

View File

@@ -0,0 +1,99 @@
use anyhow::{Context, Result};
use clap::Args;
use pile_dataset::Datasets;
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
use std::{fmt::Debug, path::PathBuf, sync::Arc};
use tracing::{error, info};
use crate::{CliCmd, GlobalContext, command::serve::api};
pub(super) struct ServeState {
pub ds: Datasets,
}
#[derive(Debug, Args)]
pub struct ServeCommand {
/// Address to bind to
#[arg(default_value = "0.0.0.0:9000")]
addr: String,
/// Path to dataset config
#[arg(long, short = 'c', default_value = "./pile.toml")]
config: PathBuf,
/// If provided, refresh fts if it is out-of-date
#[arg(long)]
refresh: bool,
/// Number of threads to use for indexing
#[arg(long, short = 'j', default_value = "3")]
jobs: usize,
}
impl CliCmd for ServeCommand {
async fn run(
self,
_ctx: GlobalContext,
flag: CancelFlag,
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
let ds = Datasets::open(&self.config)
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
if self.refresh && ds.needs_fts().await.context("while checking dataset fts")? {
info!("FTS index is missing or out-of-date, regenerating");
ds.fts_refresh(self.jobs, Some(flag.clone()))
.await
.map_err(|x| {
x.map_err(|x| {
anyhow::Error::from(x).context(format!(
"while refreshing fts for {}",
self.config.display()
))
})
})?;
}
let app = api::router(Arc::new(ServeState { ds }))
.into_make_service_with_connect_info::<std::net::SocketAddr>();
let listener = match tokio::net::TcpListener::bind(self.addr.clone()).await {
Ok(x) => x,
Err(error) => {
match error.kind() {
std::io::ErrorKind::AddrInUse => {
error!(
message = "Cannot bind to address, already in use",
addr = self.addr
);
}
_ => {
error!(message = "Error while starting server", ?error);
}
}
std::process::exit(1);
}
};
match listener.local_addr() {
Ok(x) => info!("listening on http://{x}/docs"),
Err(error) => {
error!(message = "Could not determine local address", ?error);
return Err(anyhow::Error::from(error).into());
}
}
match axum::serve(listener, app)
.with_graceful_shutdown(async move { flag.await_cancel().await })
.await
{
Ok(_) => {}
Err(error) => {
error!(message = "Error while serving api", ?error);
return Err(anyhow::Error::from(error).into());
}
}
return Err(CancelableTaskError::Cancelled);
}
}

View File

@@ -0,0 +1,2 @@
pub mod api;
pub mod cli;