Flatten arrays for FTS index
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
use pile_config::{ConfigToml, DatasetFts, Label};
|
||||
use pile_config::{ConfigToml, DatasetFts, Label, objectpath::ObjectPath};
|
||||
use pile_value::{
|
||||
extract::traits::ExtractState,
|
||||
value::{Item, PileValue},
|
||||
@@ -10,7 +10,7 @@ use tantivy::{
|
||||
query::QueryParser,
|
||||
schema::{self, Schema, Value as TantivyValue},
|
||||
};
|
||||
use tracing::{debug, trace, warn};
|
||||
use tracing::warn;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FtsLookupResult {
|
||||
@@ -79,18 +79,12 @@ impl DbFtsIndex {
|
||||
|
||||
let mut empty = true;
|
||||
for name in self.fts_cfg().fields.keys() {
|
||||
let x = self.get_field(state, &item, name).await?;
|
||||
let vals = self.get_field(state, &item, name).await?;
|
||||
let field = self.schema.get_field(name)?;
|
||||
|
||||
let val = match x {
|
||||
Some(x) => x,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
empty = false;
|
||||
|
||||
let field = self.schema.get_field(name);
|
||||
if let Ok(field) = field {
|
||||
doc.add_text(field, val);
|
||||
for v in vals {
|
||||
empty = false;
|
||||
doc.add_text(field, v);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,110 +100,29 @@ impl DbFtsIndex {
|
||||
state: &ExtractState,
|
||||
extractor: &PileValue,
|
||||
field_name: &Label,
|
||||
) -> Result<Option<String>, std::io::Error> {
|
||||
) -> Result<Vec<String>, std::io::Error> {
|
||||
let field = match self.cfg.schema.get(field_name) {
|
||||
Some(x) => x,
|
||||
None => {
|
||||
warn!("Unknown field {field_name:?}");
|
||||
return Ok(None);
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
};
|
||||
|
||||
// Try paths in order, using the first value we find
|
||||
'outer: for path in field.path.as_slice() {
|
||||
for path in field.path.as_slice() {
|
||||
let val = match extractor.query(state, path).await? {
|
||||
Some(x) => x,
|
||||
None => return Ok(None),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let mut val = match val {
|
||||
PileValue::Null => {
|
||||
trace!(
|
||||
message = "Skipping field, is null",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
// value = ?val
|
||||
);
|
||||
continue;
|
||||
}
|
||||
x => x.clone(),
|
||||
};
|
||||
|
||||
loop {
|
||||
val = match val {
|
||||
PileValue::String(x) => return Ok(Some(x.to_string())),
|
||||
PileValue::U64(x) => return Ok(Some(x.to_string())),
|
||||
PileValue::I64(x) => return Ok(Some(x.to_string())),
|
||||
|
||||
PileValue::Array(x) => {
|
||||
if x.len() == 1 {
|
||||
x[0].clone()
|
||||
} else if x.len() > 1 {
|
||||
debug!(
|
||||
message = "Skipping field, is array with more than one element",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
);
|
||||
continue 'outer;
|
||||
} else {
|
||||
debug!(
|
||||
message = "Skipping field, is empty array",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
);
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
|
||||
PileValue::Null => {
|
||||
trace!(
|
||||
message = "Skipping field, is null",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
);
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
PileValue::ObjectExtractor(_) => {
|
||||
trace!(
|
||||
message = "Skipping field, is object",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
);
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
PileValue::Item(_) => {
|
||||
trace!(
|
||||
message = "Skipping field, is item",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
);
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
PileValue::ListExtractor(_) => {
|
||||
trace!(
|
||||
message = "Skipping field, is ListExtractor",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
);
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
PileValue::Blob { .. } => {
|
||||
trace!(
|
||||
message = "Skipping field, is blob",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
);
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
let val = val_to_string(state, &val, path, field_name).await?;
|
||||
if !val.is_empty() {
|
||||
return Ok(val);
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(None);
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
/// Run the given query on this table's ftx index.
|
||||
@@ -298,3 +211,42 @@ impl DbFtsIndex {
|
||||
return Ok(out);
|
||||
}
|
||||
}
|
||||
|
||||
async fn val_to_string(
|
||||
state: &ExtractState,
|
||||
val: &PileValue,
|
||||
path: &ObjectPath,
|
||||
field_name: &str,
|
||||
) -> Result<Vec<String>, std::io::Error> {
|
||||
match val {
|
||||
PileValue::String(x) => return Ok(vec![x.to_string()]),
|
||||
PileValue::U64(x) => return Ok(vec![x.to_string()]),
|
||||
PileValue::I64(x) => return Ok(vec![x.to_string()]),
|
||||
|
||||
PileValue::Array(x) => {
|
||||
let mut out = Vec::new();
|
||||
for x in x.iter() {
|
||||
out.extend(Box::pin(val_to_string(state, x, path, field_name)).await?);
|
||||
}
|
||||
return Ok(out);
|
||||
}
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
PileValue::ListExtractor(x) => {
|
||||
let mut out = Vec::new();
|
||||
let len = x.len(state).await?;
|
||||
for i in 0..len {
|
||||
let v = x.get(state, i).await?;
|
||||
out.extend(Box::pin(val_to_string(state, &v.unwrap(), path, field_name)).await?);
|
||||
}
|
||||
return Ok(out);
|
||||
}
|
||||
|
||||
PileValue::Null => {}
|
||||
PileValue::ObjectExtractor(_) => {}
|
||||
PileValue::Item(_) => {}
|
||||
PileValue::Blob { .. } => {}
|
||||
}
|
||||
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user