Filter by mime

This commit is contained in:
2026-03-15 10:20:15 -07:00
parent 8041fc7531
commit 979fbb9b0d
30 changed files with 258 additions and 93 deletions

View File

@@ -2,6 +2,7 @@ use chrono::{DateTime, Utc};
use pile_config::{ConfigToml, Label, Source, objectpath::ObjectPath};
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
use pile_value::{
extract::traits::ExtractState,
source::{DataSource, DirDataSource, S3DataSource, misc::path_ts_earliest},
value::{Item, PileValue},
};
@@ -187,6 +188,7 @@ impl Datasets {
/// Returns `None` if the item or field is not found.
pub async fn get_field(
&self,
state: &ExtractState,
source: &Label,
key: &str,
path: &ObjectPath,
@@ -196,11 +198,11 @@ impl Datasets {
};
let item = PileValue::Item(item);
let Some(value) = item.query(path).await? else {
let Some(value) = item.query(state, path).await? else {
return Ok(None);
};
Ok(Some(value.to_json().await?))
Ok(Some(value.to_json(state).await?))
}
//
@@ -210,6 +212,7 @@ impl Datasets {
/// Refresh this dataset's fts index.
pub async fn fts_refresh(
&self,
state: &ExtractState,
_threads: usize,
flag: Option<CancelFlag>,
) -> Result<(), CancelableTaskError<DatasetError>> {
@@ -265,9 +268,10 @@ impl Datasets {
let item = item_result.map_err(DatasetError::from)?;
let db = Arc::clone(&db_index);
let state = state.clone();
join_set.spawn(async move {
let key = item.key();
let result = db.entry_to_document(&item).await;
let result = db.entry_to_document(&state, &item).await;
(key, result)
});

View File

@@ -1,5 +1,8 @@
use pile_config::{ConfigToml, DatasetFts, Label};
use pile_value::value::{Item, PileValue};
use pile_value::{
extract::traits::ExtractState,
value::{Item, PileValue},
};
use std::{path::PathBuf, sync::LazyLock};
use tantivy::{
DocAddress, Index, ReloadPolicy, TantivyDocument, TantivyError,
@@ -63,6 +66,7 @@ impl DbFtsIndex {
/// Turn an entry into a tantivy document
pub async fn entry_to_document(
&self,
state: &ExtractState,
item: &Item,
) -> Result<Option<TantivyDocument>, TantivyError> {
let mut doc = TantivyDocument::default();
@@ -75,7 +79,7 @@ impl DbFtsIndex {
let mut empty = true;
for name in self.fts_cfg().fields.keys() {
let x = self.get_field(&item, name).await?;
let x = self.get_field(state, &item, name).await?;
let val = match x {
Some(x) => x,
@@ -99,6 +103,7 @@ impl DbFtsIndex {
pub async fn get_field(
&self,
state: &ExtractState,
extractor: &PileValue,
field_name: &Label,
) -> Result<Option<String>, std::io::Error> {
@@ -112,7 +117,7 @@ impl DbFtsIndex {
// Try paths in order, using the first value we find
'outer: for path in field.path.as_slice() {
let val = match extractor.query(path).await? {
let val = match extractor.query(state, path).await? {
Some(x) => x,
None => return Ok(None),
};

View File

@@ -5,7 +5,7 @@ use axum::{
response::{IntoResponse, Response},
};
use pile_config::{Label, objectpath::ObjectPath};
use pile_value::value::PileValue;
use pile_value::{extract::traits::ExtractState, value::PileValue};
use serde::Deserialize;
use std::{sync::Arc, time::Instant};
use tracing::debug;
@@ -62,8 +62,10 @@ pub async fn get_field(
return StatusCode::NOT_FOUND.into_response();
};
let state = ExtractState { ignore_mime: false };
let item = PileValue::Item(item);
let value = match item.query(&path).await {
let value = match item.query(&state, &path).await {
Ok(Some(v)) => v,
Ok(None) => return StatusCode::NOT_FOUND.into_response(),
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("{e:?}")).into_response(),
@@ -90,7 +92,7 @@ pub async fn get_field(
bytes.as_ref().clone(),
)
.into_response(),
_ => match value.to_json().await {
_ => match value.to_json(&state).await {
Ok(json) => (StatusCode::OK, Json(json)).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, format!("{e:?}")).into_response(),
},