Add hash extractor
Some checks failed
CI / Typos (push) Failing after 18s
CI / Build and test (push) Successful in 1m43s
CI / Clippy (push) Successful in 2m31s
Docker / build-and-push (push) Successful in 4m36s
CI / Build and test (all features) (push) Successful in 6m52s

This commit is contained in:
2026-04-02 22:56:31 -07:00
parent e6e340d082
commit 9b8f825667
9 changed files with 140 additions and 39 deletions

View File

@@ -11,7 +11,6 @@ workspace = true
pile-dataset = { workspace = true, features = ["axum"] }
reqwest = { version = "0.12", features = ["json", "stream"] }
futures-core = "0.3"
serde = { workspace = true }
thiserror = { workspace = true }
bytes = { workspace = true }

View File

@@ -3,10 +3,8 @@ use axum::{
routing::any,
};
use bytes::Bytes;
use futures_core::Stream;
use reqwest::{Client, StatusCode, header};
use serde::Deserialize;
use std::pin::Pin;
use thiserror::Error;
use tracing::{trace, warn};
@@ -120,26 +118,6 @@ impl DatasetClient {
check_status(resp).await?.json().await.map_err(Into::into)
}
/// `GET /item` — stream the raw bytes of an item.
///
/// The returned stream yields chunks as they arrive from the server.
pub async fn get_item(
&self,
source: &str,
key: &str,
) -> Result<Pin<Box<dyn Stream<Item = Result<Bytes, reqwest::Error>> + Send>>, ClientError> {
let url = format!("{}/item", self.base_url);
trace!(url, source, key, "GET /item");
let resp = self
.client
.get(url)
.query(&[("source", source), ("key", key)])
.send()
.await?;
Ok(Box::pin(check_status(resp).await?.bytes_stream()))
}
/// `GET /extract` — extract a field from an item by object path (e.g. `$.flac.title`).
pub async fn get_extract(
&self,