From 4f0024f75dc699915ee4afb8c3cbcb58a288c960 Mon Sep 17 00:00:00 2001 From: Mark Date: Fri, 27 Mar 2026 03:07:34 -0700 Subject: [PATCH] Tweak schema api --- .../pile-dataset/src/serve/config_schema.rs | 31 +++++ crates/pile-dataset/src/serve/mod.rs | 35 +++-- crates/pile-dataset/src/serve/schema.rs | 127 ++++++++++++++++-- .../src/serve/{field.rs => schema_field.rs} | 32 ++--- 4 files changed, 186 insertions(+), 39 deletions(-) create mode 100644 crates/pile-dataset/src/serve/config_schema.rs rename crates/pile-dataset/src/serve/{field.rs => schema_field.rs} (84%) diff --git a/crates/pile-dataset/src/serve/config_schema.rs b/crates/pile-dataset/src/serve/config_schema.rs new file mode 100644 index 0000000..483e1ed --- /dev/null +++ b/crates/pile-dataset/src/serve/config_schema.rs @@ -0,0 +1,31 @@ +use axum::{ + Json, + extract::State, + http::StatusCode, + response::{IntoResponse, Response}, +}; +use std::{collections::HashMap, sync::Arc}; + +pub use pile_config::FieldSpec; + +use crate::Datasets; + +pub type FieldsResponse = HashMap; + +/// Retrieve this dataset's schema. +#[utoipa::path( + get, + path = "/config/schema", + responses( + (status = 200, description = "This dataset's schema"), + ) +)] +pub async fn config_schema(State(state): State>) -> Response { + let fields: FieldsResponse = state + .config + .schema + .iter() + .map(|(k, v)| (k.as_str().to_owned(), v.clone())) + .collect(); + (StatusCode::OK, Json(fields)).into_response() +} diff --git a/crates/pile-dataset/src/serve/mod.rs b/crates/pile-dataset/src/serve/mod.rs index 66a64d1..db4b093 100644 --- a/crates/pile-dataset/src/serve/mod.rs +++ b/crates/pile-dataset/src/serve/mod.rs @@ -17,26 +17,36 @@ pub use item::*; mod extract; pub use extract::*; -mod field; -pub use field::*; - mod items; pub use items::*; +mod config_schema; +pub use config_schema::*; + +mod schema_field; +pub use schema_field::*; + mod schema; pub use schema::*; #[derive(OpenApi)] #[openapi( tags(), - paths(lookup, item_get, get_extract, items_list, get_field, get_schema), + paths( + lookup, + item_get, + get_extract, + items_list, + config_schema, + schema_field, + schema_all + ), components(schemas( LookupRequest, LookupResponse, LookupResult, ItemQuery, ExtractQuery, - FieldQuery, ItemsQuery, ItemsResponse, ItemRef @@ -56,9 +66,10 @@ impl Datasets { .route("/lookup", post(lookup)) .route("/item", get(item_get)) .route("/extract", get(get_extract)) - .route("/field", get(get_field)) .route("/items", get(items_list)) - .route("/schema", get(get_schema)) + .route("/config/schema", get(config_schema)) + .route("/schema", get(schema_all)) + .route("/schema/{field}", get(schema_field)) .with_state(self.clone()); if let Some(prefix) = prefix { @@ -71,8 +82,14 @@ impl Datasets { Some(prefix) => format!("{prefix}/docs"), }; - let docs = SwaggerUi::new(docs_path.clone()) - .url(format!("{}/openapi.json", docs_path), Api::openapi()); + let api = Api::openapi(); + let api = match prefix { + None => api, + Some(prefix) => utoipa::openapi::OpenApi::default().nest(prefix, api), + }; + + let docs = + SwaggerUi::new(docs_path.clone()).url(format!("{}/openapi.json", docs_path), api); router = router.merge(docs); } diff --git a/crates/pile-dataset/src/serve/schema.rs b/crates/pile-dataset/src/serve/schema.rs index 037c80a..b86a753 100644 --- a/crates/pile-dataset/src/serve/schema.rs +++ b/crates/pile-dataset/src/serve/schema.rs @@ -1,31 +1,130 @@ use axum::{ Json, - extract::State, + extract::{Query, State}, http::StatusCode, response::{IntoResponse, Response}, }; +use pile_config::Label; +use pile_value::{extract::traits::ExtractState, value::PileValue}; +use serde::{Deserialize, Serialize}; use std::{collections::HashMap, sync::Arc}; - -pub use pile_config::FieldSpec; +use utoipa::IntoParams; use crate::Datasets; -pub type FieldsResponse = HashMap; +#[derive(Deserialize, IntoParams)] +pub struct SchemaQuery { + source: String, + key: String, -/// Retrieve this dataset's schema. + #[serde(default)] + hidden: bool, +} + +#[derive(Serialize)] +#[serde(untagged)] +pub enum ApiValue { + Binary { binary: bool, mime: String }, + Object { object: bool }, + Array(Vec), + String(String), + Number(serde_json::Number), + Null, +} + +pub type SchemaResponse = HashMap; + +async fn pile_value_to_api( + state: &ExtractState, + value: PileValue, +) -> Result { + match value { + PileValue::String(s) => Ok(ApiValue::String(s.to_string())), + PileValue::U64(n) => Ok(ApiValue::Number(n.into())), + PileValue::I64(n) => Ok(ApiValue::Number(n.into())), + PileValue::Null => Ok(ApiValue::Null), + + PileValue::Blob { mime, .. } => Ok(ApiValue::Binary { + binary: true, + mime: mime.to_string(), + }), + + PileValue::Array(arr) => { + let mut out = Vec::with_capacity(arr.len()); + for item in arr.iter() { + out.push(Box::pin(pile_value_to_api(state, item.clone())).await?); + } + Ok(ApiValue::Array(out)) + } + + PileValue::ObjectExtractor(_) | PileValue::ListExtractor(_) | PileValue::Item(_) => { + Ok(ApiValue::Object { object: true }) + } + } +} + +/// Get all schema field values for a single item. #[utoipa::path( get, path = "/schema", + params( + ("source" = String, Query, description = "Source label"), + ("key" = String, Query, description = "Item key"), + ("hidden" = bool, Query, description = "Include hidden fields (default: false)"), + ), responses( - (status = 200, description = "This dataset's schema"), + (status = 200, description = "Schema field values as a map of label to value"), + (status = 400, description = "Invalid source label"), + (status = 404, description = "Item not found"), + (status = 500, description = "Internal server error"), ) )] -pub async fn get_schema(State(state): State>) -> Response { - let fields: FieldsResponse = state - .config - .schema - .iter() - .map(|(k, v)| (k.as_str().to_owned(), v.clone())) - .collect(); - (StatusCode::OK, Json(fields)).into_response() +pub async fn schema_all( + State(state): State>, + Query(params): Query, +) -> Response { + let label = match Label::try_from(params.source.clone()) { + Ok(l) => l, + Err(e) => return (StatusCode::BAD_REQUEST, format!("{e:?}")).into_response(), + }; + + let Some(item) = state.get(&label, ¶ms.key).await else { + return StatusCode::NOT_FOUND.into_response(); + }; + + let extract_state = ExtractState { ignore_mime: false }; + let item = PileValue::Item(item); + + let mut result: SchemaResponse = HashMap::new(); + + for (field_label, field_spec) in &state.config.schema { + if field_spec.hidden && !params.hidden { + continue; + } + + let mut value = None; + for path in &field_spec.path { + match item.query(&extract_state, path).await { + Ok(Some(PileValue::Null)) | Ok(None) => continue, + Ok(Some(v)) => { + value = Some(v); + break; + } + Err(e) => { + return (StatusCode::INTERNAL_SERVER_ERROR, format!("{e:?}")).into_response(); + } + } + } + + let Some(v) = value else { continue }; + + let api_value = match pile_value_to_api(&extract_state, v).await { + Ok(v) => v, + Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("{e:?}")).into_response(), + }; + + result.insert(field_label.as_str().to_owned(), api_value); + } + + (StatusCode::OK, Json(result)).into_response() } diff --git a/crates/pile-dataset/src/serve/field.rs b/crates/pile-dataset/src/serve/schema_field.rs similarity index 84% rename from crates/pile-dataset/src/serve/field.rs rename to crates/pile-dataset/src/serve/schema_field.rs index 47f9ba7..cd1b5bf 100644 --- a/crates/pile-dataset/src/serve/field.rs +++ b/crates/pile-dataset/src/serve/schema_field.rs @@ -1,6 +1,6 @@ use axum::{ Json, - extract::{Query, State}, + extract::{Path, Query, State}, http::{StatusCode, header}, response::{IntoResponse, Response}, }; @@ -9,29 +9,28 @@ use pile_value::{extract::traits::ExtractState, value::PileValue}; use serde::Deserialize; use std::{sync::Arc, time::Instant}; use tracing::debug; -use utoipa::{IntoParams, ToSchema}; +use utoipa::IntoParams; use crate::Datasets; -#[derive(Deserialize, ToSchema, IntoParams)] -pub struct FieldQuery { +#[derive(Deserialize, IntoParams)] +pub struct SchemaFieldQuery { source: String, key: String, - field: String, #[serde(default)] download: bool, name: Option, } -/// Extract a specific field from an item's metadata. +/// Extract a specific schema field from an item's metadata. #[utoipa::path( get, - path = "/field", + path = "/schema/{field}", params( + ("field" = String, Path, description = "Schema field"), ("source" = String, Query, description = "Source label"), ("key" = String, Query, description = "Item key"), - ("field" = String, Query, description = "Schema field"), ("name" = Option, Query, description = "Downloaded filename; defaults to the last segment of the key"), ), responses( @@ -41,9 +40,10 @@ pub struct FieldQuery { (status = 500, description = "Internal server error"), ) )] -pub async fn get_field( +pub async fn schema_field( State(state): State>, - Query(params): Query, + Path(field): Path, + Query(params): Query, ) -> Response { let start = Instant::now(); @@ -53,22 +53,22 @@ pub async fn get_field( }; debug!( - message = "Serving /field", + message = "Serving /schema/{field}", source = params.source, key = params.key, - field = params.field, + field = field, ); let Some(item) = state.get(&label, ¶ms.key).await else { return StatusCode::NOT_FOUND.into_response(); }; - let field = match Label::new(¶ms.field) { + let field_label = match Label::new(&field) { Some(x) => x, None => return StatusCode::NOT_FOUND.into_response(), }; - let paths = match state.config.schema.get(&field) { + let paths = match state.config.schema.get(&field_label) { Some(x) => &x.path, None => return StatusCode::NOT_FOUND.into_response(), }; @@ -93,10 +93,10 @@ pub async fn get_field( }; debug!( - message = "Served /field", + message = "Served /schema/{field}", source = params.source, key = params.key, - field = params.field, + field = field, time_ms = start.elapsed().as_millis() );