Extractor rewrite
This commit is contained in:
@@ -1,9 +1,6 @@
|
||||
use itertools::Itertools;
|
||||
use pile_config::{Case, ConfigToml, DatasetFts, FieldSpecPost, Label};
|
||||
use std::{
|
||||
path::PathBuf,
|
||||
sync::{Arc, LazyLock},
|
||||
};
|
||||
use pile_config::{ConfigToml, DatasetFts, Label};
|
||||
use pile_value::value::{Item, PileValue};
|
||||
use std::{path::PathBuf, sync::LazyLock};
|
||||
use tantivy::{
|
||||
DocAddress, Index, ReloadPolicy, TantivyDocument, TantivyError,
|
||||
collector::Collector,
|
||||
@@ -12,8 +9,6 @@ use tantivy::{
|
||||
};
|
||||
use tracing::{debug, trace, warn};
|
||||
|
||||
use crate::{Item, PileValue, extract::MetaExtractor};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FtsLookupResult {
|
||||
pub score: f32,
|
||||
@@ -76,11 +71,11 @@ impl DbFtsIndex {
|
||||
doc.add_text(self.schema.get_field("_meta_source")?, item.source_name());
|
||||
doc.add_text(self.schema.get_field("_meta_key")?, key);
|
||||
|
||||
let extractor = PileValue::ObjectExtractor(Arc::new(MetaExtractor::new(item)));
|
||||
let item = PileValue::Item(item.clone());
|
||||
|
||||
let mut empty = true;
|
||||
for name in self.fts_cfg().fields.keys() {
|
||||
let x = self.get_field(&extractor, name).await?;
|
||||
let x = self.get_field(&item, name).await?;
|
||||
|
||||
let val = match x {
|
||||
Some(x) => x,
|
||||
@@ -135,13 +130,6 @@ impl DbFtsIndex {
|
||||
x => x.clone(),
|
||||
};
|
||||
|
||||
for post in &field.post {
|
||||
val = match apply(post, &val) {
|
||||
Some(x) => x,
|
||||
None => return Ok(None),
|
||||
};
|
||||
}
|
||||
|
||||
loop {
|
||||
val = match val {
|
||||
PileValue::String(x) => return Ok(Some(x.to_string())),
|
||||
@@ -186,6 +174,15 @@ impl DbFtsIndex {
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
PileValue::Item(_) => {
|
||||
trace!(
|
||||
message = "Skipping field, is item",
|
||||
field = field_name.to_string(),
|
||||
?path,
|
||||
);
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
PileValue::ListExtractor(_) => {
|
||||
trace!(
|
||||
message = "Skipping field, is ListExtractor",
|
||||
@@ -296,104 +293,3 @@ impl DbFtsIndex {
|
||||
return Ok(out);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply(post: &FieldSpecPost, val: &PileValue) -> Option<PileValue> {
|
||||
Some(match post {
|
||||
FieldSpecPost::NotEmpty { notempty: false } => val.clone(),
|
||||
FieldSpecPost::NotEmpty { notempty: true } => match val {
|
||||
PileValue::Null => return None,
|
||||
PileValue::String(x) if x.is_empty() => return None,
|
||||
PileValue::Array(x) if x.is_empty() => return None,
|
||||
x => x.clone(),
|
||||
},
|
||||
|
||||
FieldSpecPost::SetCase { case: Case::Lower } => match val {
|
||||
PileValue::Null => return None,
|
||||
PileValue::U64(_) => return None,
|
||||
PileValue::I64(_) => return None,
|
||||
PileValue::Blob { .. } => return None,
|
||||
PileValue::ObjectExtractor(_) => return None,
|
||||
PileValue::ListExtractor(_) => return None,
|
||||
PileValue::String(x) => PileValue::String(Arc::new(x.as_str().to_lowercase().into())),
|
||||
|
||||
PileValue::Array(x) => PileValue::Array(Arc::new(
|
||||
x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?,
|
||||
)),
|
||||
},
|
||||
|
||||
FieldSpecPost::SetCase { case: Case::Upper } => match val {
|
||||
PileValue::Null => return None,
|
||||
PileValue::U64(_) => return None,
|
||||
PileValue::I64(_) => return None,
|
||||
PileValue::Blob { .. } => return None,
|
||||
PileValue::ObjectExtractor(_) => return None,
|
||||
PileValue::ListExtractor(_) => return None,
|
||||
PileValue::String(x) => PileValue::String(Arc::new(x.as_str().to_uppercase().into())),
|
||||
|
||||
PileValue::Array(x) => PileValue::Array(Arc::new(
|
||||
x.iter()
|
||||
.map(|x| apply(post, x))
|
||||
.collect::<Option<Vec<_>>>()?,
|
||||
)),
|
||||
},
|
||||
|
||||
FieldSpecPost::TrimSuffix { trim_suffix } => match val {
|
||||
PileValue::Null => return None,
|
||||
PileValue::U64(_) => return None,
|
||||
PileValue::I64(_) => return None,
|
||||
PileValue::Blob { .. } => return None,
|
||||
PileValue::ObjectExtractor(_) => return None,
|
||||
PileValue::ListExtractor(_) => return None,
|
||||
|
||||
PileValue::String(x) => PileValue::String(Arc::new(
|
||||
x.strip_suffix(trim_suffix).unwrap_or(x.as_str()).into(),
|
||||
)),
|
||||
|
||||
PileValue::Array(x) => PileValue::Array(Arc::new(
|
||||
x.iter()
|
||||
.map(|x| apply(post, x))
|
||||
.collect::<Option<Vec<_>>>()?,
|
||||
)),
|
||||
},
|
||||
|
||||
FieldSpecPost::TrimPrefix { trim_prefix } => match val {
|
||||
PileValue::Null => return None,
|
||||
PileValue::U64(_) => return None,
|
||||
PileValue::I64(_) => return None,
|
||||
PileValue::Blob { .. } => return None,
|
||||
PileValue::ObjectExtractor(_) => return None,
|
||||
PileValue::ListExtractor(_) => return None,
|
||||
|
||||
PileValue::String(x) => PileValue::String(Arc::new(
|
||||
x.strip_prefix(trim_prefix).unwrap_or(x.as_str()).into(),
|
||||
)),
|
||||
|
||||
PileValue::Array(x) => PileValue::Array(Arc::new(
|
||||
x.iter()
|
||||
.map(|x| apply(post, x))
|
||||
.collect::<Option<Vec<_>>>()?,
|
||||
)),
|
||||
},
|
||||
|
||||
FieldSpecPost::Join { join } => match val {
|
||||
PileValue::Null => return None,
|
||||
PileValue::U64(_) => return None,
|
||||
PileValue::I64(_) => return None,
|
||||
PileValue::Blob { .. } => return None,
|
||||
PileValue::ObjectExtractor(_) => return None,
|
||||
PileValue::ListExtractor(_) => return None,
|
||||
|
||||
PileValue::String(x) => PileValue::String(x.clone()),
|
||||
|
||||
PileValue::Array(x) => PileValue::String(Arc::new(
|
||||
x.iter()
|
||||
.map(|x| apply(post, x))
|
||||
.map(|x| x.and_then(|x| x.as_str().map(|x| x.to_owned())))
|
||||
.collect::<Option<Vec<_>>>()?
|
||||
.into_iter()
|
||||
.join(join)
|
||||
.into(),
|
||||
)),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user