diff --git a/Cargo.lock b/Cargo.lock index c84775d..3653aec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arc-swap" version = "1.8.2" @@ -478,6 +487,71 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +dependencies = [ + "axum-core", + "axum-macros", + "bytes", + "form_urlencoded", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "multer", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-macros" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "base16ct" version = "0.1.1" @@ -815,7 +889,7 @@ dependencies = [ "crc", "digest 0.10.7", "rustversion", - "spin", + "spin 0.10.0", ] [[package]] @@ -997,6 +1071,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "digest" version = "0.10.7" @@ -1086,6 +1171,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + [[package]] name = "epub" version = "1.2.4" @@ -1096,7 +1190,7 @@ dependencies = [ "percent-encoding", "regex", "xml-rs", - "zip", + "zip 0.6.6", ] [[package]] @@ -1171,6 +1265,7 @@ checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", + "zlib-rs", ] [[package]] @@ -1537,6 +1632,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -1989,6 +2085,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "md-5" version = "0.10.6" @@ -2035,6 +2137,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -2062,6 +2174,23 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "multer" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http 1.4.0", + "httparse", + "memchr", + "mime", + "spin 0.9.8", + "version_check", +] + [[package]] name = "murmurhash32" version = "0.3.1" @@ -2248,6 +2377,7 @@ version = "0.0.1" dependencies = [ "anstyle", "anyhow", + "axum", "clap", "indicatif", "pile-config", @@ -2262,6 +2392,8 @@ dependencies = [ "tracing", "tracing-indicatif", "tracing-subscriber", + "utoipa", + "utoipa-swagger-ui", ] [[package]] @@ -2550,6 +2682,40 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" +[[package]] +name = "rust-embed" +version = "8.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04113cb9355a377d83f06ef1f0a45b8ab8cd7d8b1288160717d66df5c7988d27" +dependencies = [ + "rust-embed-impl", + "rust-embed-utils", + "walkdir", +] + +[[package]] +name = "rust-embed-impl" +version = "8.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0902e4c7c8e997159ab384e6d0fc91c221375f6894346ae107f47dd0f3ccaa" +dependencies = [ + "proc-macro2", + "quote", + "rust-embed-utils", + "syn 2.0.117", + "walkdir", +] + +[[package]] +name = "rust-embed-utils" +version = "8.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bcdef0be6fe7f6fa333b1073c949729274b05f123a0ad7efcb8efd878e5c3b1" +dependencies = [ + "sha2 0.10.9", + "walkdir", +] + [[package]] name = "rust-stemmers" version = "1.2.0" @@ -2789,6 +2955,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_spanned" version = "1.0.4" @@ -2798,6 +2975,18 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2955,6 +3144,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "spin" version = "0.10.0" @@ -3049,6 +3244,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" + [[package]] name = "synstructure" version = "0.13.2" @@ -3421,8 +3622,14 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -3443,6 +3650,7 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3545,6 +3753,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + [[package]] name = "unicode-bidi" version = "0.3.18" @@ -3632,6 +3846,57 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "utoipa" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993" +dependencies = [ + "indexmap", + "serde", + "serde_json", + "utoipa-gen", +] + +[[package]] +name = "utoipa-gen" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn 2.0.117", + "url", + "uuid", +] + +[[package]] +name = "utoipa-swagger-ui" +version = "9.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d047458f1b5b65237c2f6dc6db136945667f40a7668627b3490b9513a3d43a55" +dependencies = [ + "axum", + "base64", + "mime_guess", + "regex", + "rust-embed", + "serde", + "serde_json", + "url", + "utoipa", + "utoipa-swagger-ui-vendored", + "zip 3.0.0", +] + +[[package]] +name = "utoipa-swagger-ui-vendored" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2eebbbfe4093922c2b6734d7c679ebfebd704a0d7e56dfcb0d05818ce28977d" + [[package]] name = "uuid" version = "1.21.0" @@ -4309,12 +4574,44 @@ dependencies = [ "flate2", ] +[[package]] +name = "zip" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12598812502ed0105f607f941c386f43d441e00148fce9dec3ca5ffb0bde9308" +dependencies = [ + "arbitrary", + "crc32fast", + "flate2", + "indexmap", + "memchr", + "zopfli", +] + +[[package]] +name = "zlib-rs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" + [[package]] name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" +[[package]] +name = "zopfli" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] + [[package]] name = "zstd" version = "0.13.3" diff --git a/Cargo.toml b/Cargo.toml index 00b2e0f..43bac9b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,8 +68,20 @@ pile-config = { path = "crates/pile-config" } pile-flac = { path = "crates/pile-flac" } pile-dataset = { path = "crates/pile-dataset" } -# Clients +# Clients & servers tantivy = "0.25.0" +axum = { version = "0.8.8", features = ["macros", "multipart"] } +utoipa = { version = "5.4.0", features = [ + "axum_extras", + "chrono", + "url", + "uuid", +] } +utoipa-swagger-ui = { version = "9.0.2", features = [ + "axum", + "debug-embed", + "vendored", +] } # Async & Parallelism tokio = { version = "1.49.0", features = ["full"] } diff --git a/crates/pile-dataset/src/index/index_fts.rs b/crates/pile-dataset/src/index/index_fts.rs index 11efe8a..2d5a03c 100644 --- a/crates/pile-dataset/src/index/index_fts.rs +++ b/crates/pile-dataset/src/index/index_fts.rs @@ -17,7 +17,7 @@ use crate::{Item, PileValue, extract::MetaExtractor}; #[derive(Debug, Clone)] pub struct FtsLookupResult { pub score: f32, - pub source_name: Label, + pub source: Label, pub key: String, } @@ -270,7 +270,7 @@ impl DbFtsIndex { out.push(FtsLookupResult { score, - source_name, + source: source_name, key, }); } diff --git a/crates/pile/Cargo.toml b/crates/pile/Cargo.toml index 08618ab..35c18e1 100644 --- a/crates/pile/Cargo.toml +++ b/crates/pile/Cargo.toml @@ -26,3 +26,7 @@ signal-hook = { workspace = true } anstyle = { workspace = true } toml = { workspace = true } serde_json = { workspace = true } + +axum = { workspace = true } +utoipa = { workspace = true } +utoipa-swagger-ui = { workspace = true } diff --git a/crates/pile/src/command/mod.rs b/crates/pile/src/command/mod.rs index d4bfa0b..5c4bb26 100644 --- a/crates/pile/src/command/mod.rs +++ b/crates/pile/src/command/mod.rs @@ -10,6 +10,7 @@ mod index; mod init; mod lookup; mod probe; +mod serve; use crate::{Cli, GlobalContext}; @@ -54,6 +55,12 @@ pub enum SubCommand { #[command(flatten)] cmd: probe::ProbeCommand, }, + + /// Expose a dataset via an http api + Serve { + #[command(flatten)] + cmd: serve::cli::ServeCommand, + }, } impl CliCmdDispatch for SubCommand { @@ -65,6 +72,7 @@ impl CliCmdDispatch for SubCommand { Self::Index { cmd } => cmd.start(ctx), Self::Lookup { cmd } => cmd.start(ctx), Self::Probe { cmd } => cmd.start(ctx), + Self::Serve { cmd } => cmd.start(ctx), Self::Docs {} => { print_help_recursively(&mut Cli::command(), None); diff --git a/crates/pile/src/command/serve/api.rs b/crates/pile/src/command/serve/api.rs new file mode 100644 index 0000000..9e0e59a --- /dev/null +++ b/crates/pile/src/command/serve/api.rs @@ -0,0 +1,89 @@ +use axum::{ + Json, Router, + extract::{DefaultBodyLimit, State}, + http::StatusCode, + response::{IntoResponse, Response}, + routing::post, +}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use utoipa::{OpenApi, ToSchema}; +use utoipa_swagger_ui::SwaggerUi; + +use crate::command::serve::cli::ServeState; + +#[derive(OpenApi)] +#[openapi( + tags(), + paths(lookup), + components(schemas(LookupRequest, LookupResponse, LookupResult)) +)] +pub(super) struct Api; + +#[inline] +pub(super) fn router(state: Arc) -> Router<()> { + let docs_path = "/docs"; + let docs = SwaggerUi::new(docs_path).url(format!("{}/openapi.json", docs_path), Api::openapi()); + + Router::new() + .route("/lookup", post(lookup)) + .merge(docs) + .with_state(state) + .layer(DefaultBodyLimit::max(32 * 1024 * 1024)) +} + +// +// MARK: lookup +// + +#[derive(Serialize, Deserialize, ToSchema, Debug)] +pub struct LookupRequest { + pub query: String, + + #[serde(default)] + pub limit: Option, +} + +#[derive(Debug, Serialize, Deserialize, ToSchema)] +struct LookupResponse { + pub results: Vec, +} + +#[derive(Debug, Serialize, Deserialize, ToSchema)] +pub struct LookupResult { + pub score: f32, + pub source: String, + pub key: String, +} + +/// Search a user's captures +#[utoipa::path( + post, + path = "/lookup", + responses( + (status = 200, description = "Search results", body = Vec), + (status = 400, description = "Invalid request"), + (status = 401, description = "Unauthorized"), + (status = 404, description = "URL not found"), + (status = 500, description = "Internal server error"), + ) +)] +async fn lookup(State(state): State>, Json(body): Json) -> Response { + let results: Vec = + match state.ds.fts_lookup(&body.query, body.limit.unwrap_or(10)) { + Ok(x) => x + .into_iter() + .map(|x| LookupResult { + key: x.key, + score: x.score, + source: x.source.into(), + }) + .collect(), + + Err(error) => { + return (StatusCode::INTERNAL_SERVER_ERROR, format!("{error:?}")).into_response(); + } + }; + + return (StatusCode::OK, Json(LookupResponse { results })).into_response(); +} diff --git a/crates/pile/src/command/serve/cli.rs b/crates/pile/src/command/serve/cli.rs new file mode 100644 index 0000000..d1f954b --- /dev/null +++ b/crates/pile/src/command/serve/cli.rs @@ -0,0 +1,99 @@ +use anyhow::{Context, Result}; +use clap::Args; +use pile_dataset::Datasets; +use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError}; +use std::{fmt::Debug, path::PathBuf, sync::Arc}; +use tracing::{error, info}; + +use crate::{CliCmd, GlobalContext, command::serve::api}; + +pub(super) struct ServeState { + pub ds: Datasets, +} + +#[derive(Debug, Args)] +pub struct ServeCommand { + /// Address to bind to + #[arg(default_value = "0.0.0.0:9000")] + addr: String, + + /// Path to dataset config + #[arg(long, short = 'c', default_value = "./pile.toml")] + config: PathBuf, + + /// If provided, refresh fts if it is out-of-date + #[arg(long)] + refresh: bool, + + /// Number of threads to use for indexing + #[arg(long, short = 'j', default_value = "3")] + jobs: usize, +} + +impl CliCmd for ServeCommand { + async fn run( + self, + _ctx: GlobalContext, + flag: CancelFlag, + ) -> Result> { + let ds = Datasets::open(&self.config) + .with_context(|| format!("while opening dataset for {}", self.config.display()))?; + + if self.refresh && ds.needs_fts().await.context("while checking dataset fts")? { + info!("FTS index is missing or out-of-date, regenerating"); + ds.fts_refresh(self.jobs, Some(flag.clone())) + .await + .map_err(|x| { + x.map_err(|x| { + anyhow::Error::from(x).context(format!( + "while refreshing fts for {}", + self.config.display() + )) + }) + })?; + } + + let app = api::router(Arc::new(ServeState { ds })) + .into_make_service_with_connect_info::(); + + let listener = match tokio::net::TcpListener::bind(self.addr.clone()).await { + Ok(x) => x, + Err(error) => { + match error.kind() { + std::io::ErrorKind::AddrInUse => { + error!( + message = "Cannot bind to address, already in use", + addr = self.addr + ); + } + _ => { + error!(message = "Error while starting server", ?error); + } + } + + std::process::exit(1); + } + }; + + match listener.local_addr() { + Ok(x) => info!("listening on http://{x}/docs"), + Err(error) => { + error!(message = "Could not determine local address", ?error); + return Err(anyhow::Error::from(error).into()); + } + } + + match axum::serve(listener, app) + .with_graceful_shutdown(async move { flag.await_cancel().await }) + .await + { + Ok(_) => {} + Err(error) => { + error!(message = "Error while serving api", ?error); + return Err(anyhow::Error::from(error).into()); + } + } + + return Err(CancelableTaskError::Cancelled); + } +} diff --git a/crates/pile/src/command/serve/mod.rs b/crates/pile/src/command/serve/mod.rs new file mode 100644 index 0000000..95f6506 --- /dev/null +++ b/crates/pile/src/command/serve/mod.rs @@ -0,0 +1,2 @@ +pub mod api; +pub mod cli;