Add hash extractor

This commit is contained in:
2026-04-03 08:57:37 -07:00
parent e6e340d082
commit 4d4e9c93a2
9 changed files with 139 additions and 39 deletions

View File

@@ -11,7 +11,6 @@ workspace = true
pile-dataset = { workspace = true, features = ["axum"] }
reqwest = { version = "0.12", features = ["json", "stream"] }
futures-core = "0.3"
serde = { workspace = true }
thiserror = { workspace = true }
bytes = { workspace = true }

View File

@@ -3,10 +3,8 @@ use axum::{
routing::any,
};
use bytes::Bytes;
use futures_core::Stream;
use reqwest::{Client, StatusCode, header};
use serde::Deserialize;
use std::pin::Pin;
use thiserror::Error;
use tracing::{trace, warn};
@@ -120,26 +118,6 @@ impl DatasetClient {
check_status(resp).await?.json().await.map_err(Into::into)
}
/// `GET /item` — stream the raw bytes of an item.
///
/// The returned stream yields chunks as they arrive from the server.
pub async fn get_item(
&self,
source: &str,
key: &str,
) -> Result<Pin<Box<dyn Stream<Item = Result<Bytes, reqwest::Error>> + Send>>, ClientError> {
let url = format!("{}/item", self.base_url);
trace!(url, source, key, "GET /item");
let resp = self
.client
.get(url)
.query(&[("source", source), ("key", key)])
.send()
.await?;
Ok(Box::pin(check_status(resp).await?.bytes_stream()))
}
/// `GET /extract` — extract a field from an item by object path (e.g. `$.flac.title`).
pub async fn get_extract(
&self,