lazily-evaluated extractors

This commit is contained in:
2026-02-22 09:23:57 -08:00
parent d16d16be26
commit 751ff787e2
19 changed files with 525 additions and 391 deletions

View File

@@ -9,8 +9,6 @@ workspace = true
[dependencies]
serde = { workspace = true }
itertools = { workspace = true }
serde_json = { workspace = true }
smartstring = { workspace = true }
[dev-dependencies]

View File

@@ -1,6 +1,4 @@
use itertools::Itertools;
use serde::Deserialize;
use serde_json::Value;
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
@@ -12,107 +10,6 @@ pub enum FieldSpecPost {
NotEmpty { notempty: bool },
}
impl FieldSpecPost {
pub fn apply(&self, val: &Value) -> Option<Value> {
Some(match self {
Self::NotEmpty { notempty: false } => val.clone(),
Self::NotEmpty { notempty: true } => match val {
Value::Null => return None,
Value::String(x) if x.is_empty() => return None,
Value::Array(x) if x.is_empty() => return None,
x => x.clone(),
},
Self::SetCase { case: Case::Lower } => match val {
Value::Null => return None,
Value::Bool(_) | Value::Number(_) => val.clone(),
Value::String(x) => Value::String(x.to_lowercase()),
Value::Array(x) => {
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
}
Value::Object(x) => Value::Object(
x.iter()
.map(|x| (x.0.to_lowercase(), self.apply(x.1)))
.map(|x| x.1.map(|y| (x.0, y)))
.collect::<Option<_>>()?,
),
},
Self::SetCase { case: Case::Upper } => match val {
Value::Null => return None,
Value::Bool(_) | Value::Number(_) => val.clone(),
Value::String(x) => Value::String(x.to_uppercase()),
Value::Array(x) => {
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
}
Value::Object(x) => Value::Object(
x.iter()
.map(|x| (x.0.to_uppercase(), self.apply(x.1)))
.map(|x| x.1.map(|y| (x.0, y)))
.collect::<Option<_>>()?,
),
},
Self::TrimSuffix { trim_suffix } => match val {
Value::Null => return None,
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
Value::String(x) => {
Value::String(x.strip_suffix(trim_suffix).unwrap_or(x).to_owned())
}
Value::Array(x) => {
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
}
Value::Object(x) => Value::Object(
x.iter()
.map(|x| {
(
x.0.strip_suffix(trim_suffix).unwrap_or(x.0).to_owned(),
self.apply(x.1),
)
})
.map(|x| x.1.map(|y| (x.0, y)))
.collect::<Option<_>>()?,
),
},
Self::TrimPrefix { trim_prefix } => match val {
Value::Null => return None,
Value::Object(_) => return None,
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
Value::String(x) => {
Value::String(x.strip_prefix(trim_prefix).unwrap_or(x).to_owned())
}
Value::Array(x) => {
Value::Array(x.iter().map(|x| self.apply(x)).collect::<Option<_>>()?)
}
},
Self::Join { join } => match val {
Value::Null => return None,
Value::Object(_) => return None,
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
Value::String(x) => Value::String(x.clone()),
Value::Array(x) => Value::String(
x.iter()
.map(|x| self.apply(x))
.collect::<Option<Vec<_>>>()?
.into_iter()
.join(join),
),
},
})
}
}
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum Case {

View File

@@ -12,14 +12,13 @@ pile-config = { workspace = true }
pile-toolbox = { workspace = true }
pile-flac = { workspace = true }
serde_json = { workspace = true }
itertools = { workspace = true }
walkdir = { workspace = true }
tantivy = { workspace = true }
tracing = { workspace = true }
jsonpath-rust = { workspace = true }
chrono = { workspace = true }
toml = { workspace = true }
thiserror = { workspace = true }
rayon = { workspace = true }
smartstring = { workspace = true }

View File

@@ -8,7 +8,11 @@ use rayon::{
use std::{
io::ErrorKind,
path::PathBuf,
sync::{Arc, mpsc::Receiver},
sync::{
Arc,
atomic::{AtomicU64, Ordering},
mpsc::Receiver,
},
thread::JoinHandle,
time::Instant,
};
@@ -144,15 +148,14 @@ impl Dataset {
let mut total = 0u64;
while let Ok(batch) = read_rx.recv() {
let batch = batch.map_err(DatasetError::from)?;
let len = batch.len() as u64;
let batch = batch?;
if let Some(flag) = &flag
&& flag.is_cancelled()
{
return Err(CancelableTaskError::Cancelled);
}
let this = AtomicU64::new(0);
let start = Instant::now();
write_pool
.install(|| {
@@ -170,6 +173,7 @@ impl Dataset {
}
})
.map(|(key, doc)| {
this.fetch_add(1, Ordering::Relaxed);
index_writer
.add_document(doc)
.map_err(|err| (key, err))
@@ -180,9 +184,10 @@ impl Dataset {
})
.map_err(|(_key, err)| DatasetError::from(err))?;
total += len;
let this = this.load(Ordering::Relaxed);
total += this;
let time_ms = start.elapsed().as_millis();
debug!("Added a batch of {len} in {time_ms} ms ({total} total)");
debug!("Added a batch of {this} in {time_ms} ms ({total} total)");
}
if let Some(flag) = flag.as_ref()
@@ -334,6 +339,13 @@ fn start_read_task(
}
}
}
if !batch.is_empty() {
match read_tx.send(Ok(batch)) {
Ok(()) => {}
Err(_) => return,
};
}
});
return (read_task, read_rx);

View File

@@ -0,0 +1,64 @@
use pile_config::Label;
use pile_flac::{FlacBlock, FlacReader};
use std::{collections::HashMap, fs::File, io::BufReader, sync::OnceLock};
use crate::{FileItem, PileValue, extract::Extractor};
pub struct FlacExtractor<'a> {
item: &'a FileItem,
output: OnceLock<HashMap<Label, PileValue<'a, FileItem>>>,
}
impl<'a> FlacExtractor<'a> {
pub fn new(item: &'a FileItem) -> Self {
Self {
item,
output: OnceLock::new(),
}
}
fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a, FileItem>>, std::io::Error> {
if let Some(x) = self.output.get() {
return Ok(x);
}
let file = File::open(&self.item.path)?;
let reader = FlacReader::new(BufReader::new(file));
let mut output: HashMap<Label, Vec<_>> = HashMap::new();
for block in reader {
if let FlacBlock::VorbisComment(comment) = block.unwrap() {
for (k, v) in comment.comment.comments {
match Label::new(k.to_string().to_lowercase()) {
Some(k) => output.entry(k).or_default().push(PileValue::String(v)),
None => continue,
}
}
// We should only have one comment block,
// stop reading when we find it
break;
}
}
let output = output
.into_iter()
.map(|(k, v)| (k, PileValue::Array(v)))
.collect();
return Ok(self.output.get_or_init(|| output));
}
}
impl Extractor<FileItem> for FlacExtractor<'_> {
fn field<'a>(
&'a self,
name: &Label,
) -> Result<Option<&'a PileValue<'a, FileItem>>, std::io::Error> {
Ok(self.get_inner()?.get(name))
}
fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
Ok(self.get_inner()?.keys().cloned().collect())
}
}

View File

@@ -0,0 +1,77 @@
use pile_config::Label;
use std::{collections::HashMap, path::Component, sync::OnceLock};
use crate::{FileItem, Key, PileValue, extract::Extractor};
pub struct FsExtractor<'a> {
item: &'a FileItem,
output: OnceLock<HashMap<Label, PileValue<'a, FileItem>>>,
}
impl<'a> FsExtractor<'a> {
pub fn new(item: &'a FileItem) -> Self {
Self {
item,
output: OnceLock::new(),
}
}
fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a, FileItem>>, std::io::Error> {
if let Some(x) = self.output.get() {
return Ok(x);
}
#[expect(clippy::unwrap_used)]
let output = HashMap::from([
(
Label::new("extension").unwrap(),
self.item
.path
.extension()
.and_then(|x| x.to_str())
.map(|x| PileValue::String(x.into()))
.unwrap_or(PileValue::Null),
),
(
Label::new("path").unwrap(),
self.item
.path
.to_string()
.map(|x| PileValue::String(x.into()))
.unwrap_or(PileValue::Null),
),
(
Label::new("segments").unwrap(),
self.item
.path
.components()
.map(|x| match x {
Component::CurDir => Some(".".to_owned()),
Component::Normal(x) => x.to_str().map(|x| x.to_owned()),
Component::ParentDir => Some("..".to_owned()),
Component::RootDir => Some("/".to_owned()),
Component::Prefix(x) => x.as_os_str().to_str().map(|x| x.to_owned()),
})
.map(|x| x.map(|x| PileValue::String(x.into())))
.collect::<Option<Vec<_>>>()
.map(PileValue::Array)
.unwrap_or(PileValue::Null),
),
]);
return Ok(self.output.get_or_init(|| output));
}
}
impl Extractor<FileItem> for FsExtractor<'_> {
fn field<'a>(
&'a self,
name: &Label,
) -> Result<Option<&'a PileValue<'a, FileItem>>, std::io::Error> {
Ok(self.get_inner()?.get(name))
}
fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
Ok(self.get_inner()?.keys().cloned().collect())
}
}

View File

@@ -0,0 +1,18 @@
use pile_config::Label;
use std::collections::HashMap;
use crate::{Item, PileValue, extract::Extractor};
pub struct MapExtractor<'a, I: Item> {
pub(super) inner: HashMap<Label, PileValue<'a, I>>,
}
impl<I: Item> Extractor<I> for MapExtractor<'_, I> {
fn field<'a>(&'a self, name: &Label) -> Result<Option<&'a PileValue<'a, I>>, std::io::Error> {
Ok(self.inner.get(name))
}
fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
Ok(self.inner.keys().cloned().collect())
}
}

View File

@@ -0,0 +1,68 @@
mod flac;
use std::{collections::HashMap, rc::Rc};
pub use flac::*;
mod fs;
pub use fs::*;
mod map;
pub use map::*;
use pile_config::Label;
/// An attachment that extracts metadata from an [Item].
///
/// Metadata is exposed as an immutable map of {label: value},
/// much like a json object.
pub trait Extractor<I: crate::Item> {
/// Get the field at `name` from `item`.
/// - returns `None` if `name` is not a valid field
/// - returns `Some(Null)` if `name` is not available
fn field<'a>(
&'a self,
name: &pile_config::Label,
) -> Result<Option<&'a crate::PileValue<'a, I>>, std::io::Error>;
/// Return all fields in this extractor.
/// `Self::field` must return [Some] for all these keys
/// and [None] for all others.
fn fields(&self) -> Result<Vec<Label>, std::io::Error>;
}
pub struct MetaExtractor<'a, I: crate::Item> {
inner: MapExtractor<'a, I>,
}
impl<'a> MetaExtractor<'a, crate::FileItem> {
#[expect(clippy::unwrap_used)]
pub fn new(item: &'a crate::FileItem) -> Self {
let inner = MapExtractor {
inner: HashMap::from([
(
Label::new("flac").unwrap(),
crate::PileValue::Extractor(Rc::new(FlacExtractor::new(item))),
),
(
Label::new("fs").unwrap(),
crate::PileValue::Extractor(Rc::new(FsExtractor::new(item))),
),
]),
};
Self { inner }
}
}
impl Extractor<crate::FileItem> for MetaExtractor<'_, crate::FileItem> {
fn field<'a>(
&'a self,
name: &pile_config::Label,
) -> Result<Option<&'a crate::PileValue<'a, crate::FileItem>>, std::io::Error> {
self.inner.field(name)
}
#[expect(clippy::unwrap_used)]
fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
return Ok(vec![Label::new("flac").unwrap(), Label::new("fs").unwrap()]);
}
}

View File

@@ -1,7 +1,6 @@
use jsonpath_rust::JsonPath;
use pile_config::{ConfigToml, DatasetFts, Label};
use serde_json::Value;
use std::{path::PathBuf, sync::LazyLock};
use itertools::Itertools;
use pile_config::{Case, ConfigToml, DatasetFts, FieldSpecPost, Label};
use std::{path::PathBuf, rc::Rc, sync::LazyLock};
use tantivy::{
DocAddress, Index, ReloadPolicy, TantivyDocument, TantivyError,
collector::Collector,
@@ -10,7 +9,7 @@ use tantivy::{
};
use tracing::{debug, trace, warn};
use crate::{Item, Key};
use crate::{Item, Key, PileValue, extract::MetaExtractor};
#[derive(Debug, Clone)]
pub struct FtsLookupResult {
@@ -84,10 +83,17 @@ impl DbFtsIndex {
doc.add_text(self.schema.get_field("_meta_source")?, item.source_name());
doc.add_text(self.schema.get_field("_meta_key")?, key);
let json = item.json()?;
let item = match item.as_file() {
Some(x) => x,
None => return Ok(None),
};
let extractor = MetaExtractor::new(item);
let extractor = PileValue::Extractor(Rc::new(extractor));
let mut empty = true;
for name in self.fts_cfg().fields.keys() {
let x = self.get_field(&json, name)?;
let x = self.get_field(&extractor, name)?;
let val = match x {
Some(x) => x,
@@ -109,9 +115,9 @@ impl DbFtsIndex {
// MARK: read
//
pub fn get_field(
pub fn get_field<I: Item>(
&self,
json: &Value,
extractor: &PileValue<'_, I>,
field_name: &Label,
) -> Result<Option<String>, std::io::Error> {
let field = match self.cfg.schema.get(field_name) {
@@ -124,41 +130,23 @@ impl DbFtsIndex {
// Try paths in order, using the first value we find
'outer: for path in field.path.as_slice() {
let val = match json.query(path) {
Ok(mut x) => {
if x.len() > 1 {
warn!(
message = "Path returned more than one value, this is not supported. Skipping.",
?path,
field = field_name.to_string()
);
continue;
}
let segments = path
.split('.')
.map(|x| Label::new(x).unwrap_or_else(|| panic!("wtf {x}")))
.collect::<Vec<_>>();
match x.pop() {
Some(x) => x,
None => continue,
}
}
Err(error) => {
warn!(
message = "Invalid path, skipping",
?path,
field = field_name.to_string(),
?error
);
continue;
}
let val = match extractor.query(&segments)? {
Some(x) => x,
None => return Ok(None),
};
let mut val = match val {
Value::Null => {
PileValue::Null => {
trace!(
message = "Skipping field, is null",
field = field_name.to_string(),
path,
value = ?val
// value = ?val
);
continue;
}
@@ -166,7 +154,7 @@ impl DbFtsIndex {
};
for post in &field.post {
val = match post.apply(&val) {
val = match apply(post, &val) {
Some(x) => x,
None => return Ok(None),
};
@@ -175,7 +163,7 @@ impl DbFtsIndex {
loop {
val = match val {
#[expect(clippy::unwrap_used)]
Value::Array(ref mut x) => {
PileValue::Array(ref mut x) => {
if x.len() == 1 {
x.pop().unwrap()
} else if x.len() > 1 {
@@ -183,7 +171,7 @@ impl DbFtsIndex {
message = "Skipping field, is array with more than one element",
field = field_name.to_string(),
path,
value = ?val
//value = ?val
);
continue 'outer;
} else {
@@ -191,32 +179,30 @@ impl DbFtsIndex {
message = "Skipping field, is empty array",
field = field_name.to_string(),
path,
value = ?val
//value = ?val
);
continue 'outer;
}
}
Value::Null => {
PileValue::Null => {
trace!(
message = "Skipping field, is null",
field = field_name.to_string(),
path,
value = ?val
//value = ?val
);
continue 'outer;
}
Value::Object(_) => {
PileValue::Extractor(_) => {
trace!(
message = "Skipping field, is object",
field = field_name.to_string(),
path,
value = ?val
//value = ?val
);
continue 'outer;
}
Value::Bool(x) => return Ok(Some(x.to_string())),
Value::Number(x) => return Ok(Some(x.to_string())),
Value::String(x) => return Ok(Some(x)),
PileValue::String(x) => return Ok(Some(x.to_string())),
}
}
}
@@ -310,3 +296,80 @@ impl DbFtsIndex {
return Ok(out);
}
}
pub fn apply<'a, I: Item>(
post: &FieldSpecPost,
val: &PileValue<'a, I>,
) -> Option<PileValue<'a, I>> {
Some(match post {
FieldSpecPost::NotEmpty { notempty: false } => val.clone(),
FieldSpecPost::NotEmpty { notempty: true } => match val {
PileValue::Null => return None,
PileValue::String(x) if x.is_empty() => return None,
PileValue::Array(x) if x.is_empty() => return None,
x => x.clone(),
},
FieldSpecPost::SetCase { case: Case::Lower } => match val {
PileValue::Null => return None,
PileValue::Extractor(_) => return None,
PileValue::String(x) => PileValue::String(x.to_lowercase().into()),
PileValue::Array(x) => {
PileValue::Array(x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?)
}
},
FieldSpecPost::SetCase { case: Case::Upper } => match val {
PileValue::Null => return None,
PileValue::Extractor(_) => return None,
PileValue::String(x) => PileValue::String(x.to_uppercase().into()),
PileValue::Array(x) => {
PileValue::Array(x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?)
}
},
FieldSpecPost::TrimSuffix { trim_suffix } => match val {
PileValue::Null => return None,
PileValue::Extractor(_) => return None,
PileValue::String(x) => {
PileValue::String(x.strip_suffix(trim_suffix).unwrap_or(x).into())
}
PileValue::Array(x) => {
PileValue::Array(x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?)
}
},
FieldSpecPost::TrimPrefix { trim_prefix } => match val {
PileValue::Null => return None,
PileValue::Extractor(_) => return None,
PileValue::String(x) => {
PileValue::String(x.strip_prefix(trim_prefix).unwrap_or(x).into())
}
PileValue::Array(x) => {
PileValue::Array(x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?)
}
},
FieldSpecPost::Join { join } => match val {
PileValue::Null => return None,
PileValue::Extractor(_) => return None,
PileValue::String(x) => PileValue::String(x.clone()),
PileValue::Array(x) => PileValue::String(
x.iter()
.map(|x| apply(post, x))
.map(|x| x.and_then(|x| x.as_str().map(|x| x.to_owned())))
.collect::<Option<Vec<_>>>()?
.into_iter()
.join(join)
.into(),
),
},
})
}

View File

@@ -0,0 +1,62 @@
use pile_config::Label;
use std::{fmt::Debug, path::PathBuf};
//
// MARK: key
//
pub trait Key: Debug + Clone + Send + Sync + 'static {
/// Convert this key to a string, returning `None`
/// if we encounter any kind of error.
fn to_string(&self) -> Option<String>;
fn from_string(str: &str) -> Option<Self>;
}
impl Key for PathBuf {
fn from_string(str: &str) -> Option<Self> {
str.parse().ok()
}
fn to_string(&self) -> Option<String> {
self.to_str().map(|x| x.to_owned())
}
}
//
// MARK: item
//
/// A pointer to raw data
pub trait Item: Debug + Send + Sync + 'static {
type Key: Key;
fn source_name(&self) -> &str;
fn key(&self) -> &Self::Key;
fn as_file(&self) -> Option<&FileItem>;
}
#[derive(Clone, Debug)]
pub struct FileItem {
/// Path to this file.
/// Must be relative to source root dir.
pub path: PathBuf,
pub source_name: Label,
}
impl Item for FileItem {
type Key = PathBuf;
fn source_name(&self) -> &str {
&self.source_name
}
fn key(&self) -> &Self::Key {
&self.path
}
fn as_file(&self) -> Option<&FileItem> {
Some(self)
}
}

View File

@@ -1,66 +0,0 @@
use std::{fmt::Debug, fs::File, io::BufReader, path::PathBuf};
use pile_config::Label;
use pile_flac::{FlacBlock, FlacReader};
use serde_json::{Map, Value};
use crate::Item;
pub struct FlacItem {
pub(crate) path: PathBuf,
pub(crate) source_name: Label,
}
impl Debug for FlacItem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FlacItem")
.field("path", &self.path)
.finish()
}
}
impl Item for FlacItem {
type Key = PathBuf;
fn source_name(&self) -> &str {
&self.source_name
}
fn key(&self) -> &Self::Key {
&self.path
}
fn json(&self) -> Result<serde_json::Value, std::io::Error> {
let file = File::open(&self.path)?;
let reader = FlacReader::new(BufReader::new(file));
let mut output = Map::new();
for block in reader {
if let FlacBlock::VorbisComment(comment) = block.unwrap() {
for (k, v) in comment.comment.comments {
let k = k.to_string();
let v = Value::String(v.into());
let e = output.get_mut(&k);
match e {
None => {
output.insert(k.clone(), Value::Array(vec![v]));
}
Some(e) => {
// We always insert an array
#[expect(clippy::unwrap_used)]
e.as_array_mut().unwrap().push(v);
}
}
}
// We should only have one comment block,
// stop reading when we find it
break;
}
}
return Ok(serde_json::Value::Object(output));
}
}

View File

@@ -1,2 +0,0 @@
mod flac;
pub use flac::*;

View File

@@ -7,6 +7,12 @@ pub use misc::*;
mod dataset;
pub use dataset::*;
mod item;
pub use item::*;
mod value;
pub use value::*;
pub mod extract;
pub mod index;
pub mod item;
pub mod source;

View File

@@ -4,7 +4,7 @@ use pile_config::Label;
use std::path::PathBuf;
use walkdir::WalkDir;
use crate::{DataSource, Item, item::FlacItem, path_ts_latest};
use crate::{DataSource, Item, item::FileItem, path_ts_latest};
#[derive(Debug)]
pub struct DirDataSource {
@@ -33,7 +33,7 @@ impl DataSource for DirDataSource {
return Ok(None);
}
return Ok(Some(Box::new(FlacItem {
return Ok(Some(Box::new(FileItem {
source_name: self.name.clone(),
path: key.to_owned(),
})));
@@ -59,9 +59,9 @@ impl DataSource for DirDataSource {
let path = entry.into_path();
let item: Box<dyn Item<Key = Self::Key>> =
match path.extension().map(|x| x.to_str()).flatten() {
match path.extension().and_then(|x| x.to_str()) {
None => return None,
Some("flac") => Box::new(FlacItem {
Some("flac") => Box::new(FileItem {
source_name: self.name.clone(),
path: path.clone(),
}),

View File

@@ -1,5 +1,7 @@
use chrono::{DateTime, Utc};
use std::{error::Error, fmt::Debug, path::PathBuf};
use std::error::Error;
use crate::{Item, Key};
/// A read-only set of [Item]s.
pub trait DataSource {
@@ -23,37 +25,3 @@ pub trait DataSource {
/// Return the time of the latest change to the data in this source
fn latest_change(&self) -> Result<Option<DateTime<Utc>>, Self::Error>;
}
pub trait Item: Debug + Send + Sync + 'static {
type Key: Key;
/// Get this item's unstructured schema
///
/// TODO: don't use json, use a lazily-evaluated type that supports binary
fn json(&self) -> Result<serde_json::Value, std::io::Error>;
fn source_name(&self) -> &str;
fn key(&self) -> &Self::Key;
}
//
// MARK: key
//
pub trait Key: Debug + Clone + Send + Sync + 'static {
/// Convert this key to a string, returning `None`
/// if we encounter any kind of error.
fn to_string(&self) -> Option<String>;
fn from_string(str: &str) -> Option<Self>;
}
impl Key for PathBuf {
fn from_string(str: &str) -> Option<Self> {
str.parse().ok()
}
fn to_string(&self) -> Option<String> {
self.to_str().map(|x| x.to_owned())
}
}

View File

@@ -0,0 +1,86 @@
use std::rc::Rc;
use pile_config::Label;
use serde_json::{Map, Value};
use smartstring::{LazyCompact, SmartString};
use crate::{Item, extract::Extractor};
/// An immutable, lazily-computed value similar to [serde_json::Value].
pub enum PileValue<'a, I: crate::Item> {
Null,
/// A string
String(SmartString<LazyCompact>),
/// An array of values
Array(Vec<PileValue<'a, I>>),
/// A lazily-computed map of {label: value}
Extractor(Rc<dyn Extractor<I> + 'a>),
}
impl<I: Item> Clone for PileValue<'_, I> {
fn clone(&self) -> Self {
match self {
Self::Null => Self::Null,
Self::String(x) => Self::String(x.clone()),
Self::Array(x) => Self::Array(x.clone()),
Self::Extractor(x) => Self::Extractor(x.clone()),
}
}
}
impl<'a, I: Item> PileValue<'a, I> {
pub fn query(&'a self, query: &[Label]) -> Result<Option<&'a Self>, std::io::Error> {
let mut out = Some(self);
for q in query {
out = match &out {
None => return Ok(None),
Some(Self::Null) => None,
Some(Self::Array(_)) => None,
Some(Self::String(_)) => None,
Some(Self::Extractor(e)) => e.field(q)?,
};
}
return Ok(out);
}
pub fn as_str(&self) -> Option<&str> {
match self {
Self::String(x) => Some(x),
_ => None,
}
}
pub fn to_json(&self) -> Result<Value, std::io::Error> {
Ok(match self {
Self::Null => Value::Null,
Self::String(x) => Value::String(x.to_string()),
Self::Array(x) => Value::Array(
x.iter()
.map(|x| x.to_json())
.collect::<Result<Vec<_>, _>>()?,
),
Self::Extractor(e) => {
let keys = e.fields()?;
let map = keys
.iter()
.map(|k| {
#[expect(clippy::expect_used)]
let v = e.field(k)?.expect("key must be valid");
let v = v.to_json()?;
Ok((k.to_string(), v))
})
.collect::<Result<Map<String, Value>, std::io::Error>>()?;
Value::Object(map)
}
})
}
}

View File

@@ -42,8 +42,8 @@ impl CliCmd for LookupCommand {
let ds = Dataset::open(&self.config)
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
if self.refresh {
if ds.needs_fts().context("while checking dataset fts")? {
if self.refresh
&& ds.needs_fts().context("while checking dataset fts")? {
info!("FTS index is missing or out-of-date, regenerating");
ds.fts_refresh(self.jobs, Some(flag)).map_err(|x| {
x.map_err(|x| {
@@ -54,7 +54,6 @@ impl CliCmd for LookupCommand {
})
})?;
}
}
let results = ds
.fts_lookup(&self.query, self.topn)