diff --git a/crates/pile-dataset/src/index/index_fts.rs b/crates/pile-dataset/src/index/index_fts.rs index 2b44652..f40b8f8 100644 --- a/crates/pile-dataset/src/index/index_fts.rs +++ b/crates/pile-dataset/src/index/index_fts.rs @@ -1,4 +1,4 @@ -use pile_config::{ConfigToml, DatasetFts, Label}; +use pile_config::{ConfigToml, DatasetFts, Label, objectpath::ObjectPath}; use pile_value::{ extract::traits::ExtractState, value::{Item, PileValue}, @@ -10,7 +10,7 @@ use tantivy::{ query::QueryParser, schema::{self, Schema, Value as TantivyValue}, }; -use tracing::{debug, trace, warn}; +use tracing::warn; #[derive(Debug, Clone)] pub struct FtsLookupResult { @@ -79,18 +79,12 @@ impl DbFtsIndex { let mut empty = true; for name in self.fts_cfg().fields.keys() { - let x = self.get_field(state, &item, name).await?; + let vals = self.get_field(state, &item, name).await?; + let field = self.schema.get_field(name)?; - let val = match x { - Some(x) => x, - None => continue, - }; - - empty = false; - - let field = self.schema.get_field(name); - if let Ok(field) = field { - doc.add_text(field, val); + for v in vals { + empty = false; + doc.add_text(field, v); } } @@ -106,110 +100,29 @@ impl DbFtsIndex { state: &ExtractState, extractor: &PileValue, field_name: &Label, - ) -> Result, std::io::Error> { + ) -> Result, std::io::Error> { let field = match self.cfg.schema.get(field_name) { Some(x) => x, None => { warn!("Unknown field {field_name:?}"); - return Ok(None); + return Ok(Vec::new()); } }; // Try paths in order, using the first value we find - 'outer: for path in field.path.as_slice() { + for path in field.path.as_slice() { let val = match extractor.query(state, path).await? { Some(x) => x, - None => return Ok(None), + None => continue, }; - let mut val = match val { - PileValue::Null => { - trace!( - message = "Skipping field, is null", - field = field_name.to_string(), - ?path, - // value = ?val - ); - continue; - } - x => x.clone(), - }; - - loop { - val = match val { - PileValue::String(x) => return Ok(Some(x.to_string())), - PileValue::U64(x) => return Ok(Some(x.to_string())), - PileValue::I64(x) => return Ok(Some(x.to_string())), - - PileValue::Array(x) => { - if x.len() == 1 { - x[0].clone() - } else if x.len() > 1 { - debug!( - message = "Skipping field, is array with more than one element", - field = field_name.to_string(), - ?path, - ); - continue 'outer; - } else { - debug!( - message = "Skipping field, is empty array", - field = field_name.to_string(), - ?path, - ); - continue 'outer; - } - } - - PileValue::Null => { - trace!( - message = "Skipping field, is null", - field = field_name.to_string(), - ?path, - ); - continue 'outer; - } - - PileValue::ObjectExtractor(_) => { - trace!( - message = "Skipping field, is object", - field = field_name.to_string(), - ?path, - ); - continue 'outer; - } - - PileValue::Item(_) => { - trace!( - message = "Skipping field, is item", - field = field_name.to_string(), - ?path, - ); - continue 'outer; - } - - PileValue::ListExtractor(_) => { - trace!( - message = "Skipping field, is ListExtractor", - field = field_name.to_string(), - ?path, - ); - continue 'outer; - } - - PileValue::Blob { .. } => { - trace!( - message = "Skipping field, is blob", - field = field_name.to_string(), - ?path, - ); - continue 'outer; - } - } + let val = val_to_string(state, &val, path, field_name).await?; + if !val.is_empty() { + return Ok(val); } } - return Ok(None); + return Ok(Vec::new()); } /// Run the given query on this table's ftx index. @@ -298,3 +211,42 @@ impl DbFtsIndex { return Ok(out); } } + +async fn val_to_string( + state: &ExtractState, + val: &PileValue, + path: &ObjectPath, + field_name: &str, +) -> Result, std::io::Error> { + match val { + PileValue::String(x) => return Ok(vec![x.to_string()]), + PileValue::U64(x) => return Ok(vec![x.to_string()]), + PileValue::I64(x) => return Ok(vec![x.to_string()]), + + PileValue::Array(x) => { + let mut out = Vec::new(); + for x in x.iter() { + out.extend(Box::pin(val_to_string(state, x, path, field_name)).await?); + } + return Ok(out); + } + + #[expect(clippy::unwrap_used)] + PileValue::ListExtractor(x) => { + let mut out = Vec::new(); + let len = x.len(state).await?; + for i in 0..len { + let v = x.get(state, i).await?; + out.extend(Box::pin(val_to_string(state, &v.unwrap(), path, field_name)).await?); + } + return Ok(out); + } + + PileValue::Null => {} + PileValue::ObjectExtractor(_) => {} + PileValue::Item(_) => {} + PileValue::Blob { .. } => {} + } + + return Ok(Vec::new()); +} diff --git a/crates/pile/src/command/annotate.rs b/crates/pile/src/command/annotate.rs index e2bd83f..9f95fb5 100644 --- a/crates/pile/src/command/annotate.rs +++ b/crates/pile/src/command/annotate.rs @@ -77,18 +77,16 @@ impl CliCmd for AnnotateCommand { }; let item = PileValue::Item(item.clone()); - let Some(value) = index - .get_field(&state, &item, &field) - .await - .with_context(|| { - format!("while extracting field from {}", path.display()) - })? - else { - continue; - }; + let vals = + index + .get_field(&state, &item, &field) + .await + .with_context(|| { + format!("while extracting field from {}", path.display()) + })?; // TODO: implement sidecar writing - let _ = (&dest_path, &value); + let _ = (&dest_path, &vals); todo!("write_sidecar not yet implemented"); #[expect(unreachable_code)]