299 lines
7.6 KiB
Rust
299 lines
7.6 KiB
Rust
use mime::Mime;
|
|
use pile_config::objectpath::{ObjectPath, PathSegment};
|
|
use serde_json::{Map, Value};
|
|
use smartstring::{LazyCompact, SmartString};
|
|
use std::{fmt::Debug, fs::File, io::Cursor, path::PathBuf, sync::Arc};
|
|
|
|
use crate::{
|
|
extract::{
|
|
blob::BinaryExtractor,
|
|
item::ItemExtractor,
|
|
misc::{ArrayExtractor, MapExtractor, VecExtractor},
|
|
string::StringExtractor,
|
|
traits::{ExtractState, ListExtractor, ObjectExtractor},
|
|
},
|
|
value::{Item, ItemReader},
|
|
};
|
|
|
|
#[derive(Clone)]
|
|
pub struct ArcBytes(pub Arc<Vec<u8>>);
|
|
impl Debug for ArcBytes {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("ArcBytes")
|
|
.field("len()", &self.0.len())
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
impl AsRef<[u8]> for ArcBytes {
|
|
fn as_ref(&self) -> &[u8] {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum BinaryPileValue {
|
|
/// A binary blob
|
|
Blob { mime: Mime, bytes: ArcBytes },
|
|
|
|
/// An pointer to a file
|
|
File { mime: Mime, path: PathBuf },
|
|
}
|
|
|
|
impl BinaryPileValue {
|
|
/// Open the item for reading.
|
|
pub async fn read(&self) -> Result<ItemReader, std::io::Error> {
|
|
match self {
|
|
Self::File { path, .. } => Ok(ItemReader::File(File::open(path)?)),
|
|
Self::Blob { bytes, .. } => Ok(ItemReader::Vec(Cursor::new(bytes.clone()))),
|
|
}
|
|
}
|
|
|
|
pub fn mime(&self) -> &Mime {
|
|
match self {
|
|
Self::Blob { mime, .. } => mime,
|
|
Self::File { mime, .. } => mime,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// An immutable, cheaply-cloneable, lazily-computed value.
|
|
/// Very similar to [serde_json::Value].
|
|
pub enum PileValue {
|
|
Null,
|
|
U64(u64),
|
|
I64(i64),
|
|
|
|
/// A string
|
|
String(Arc<SmartString<LazyCompact>>),
|
|
|
|
/// An array of values
|
|
Array(Arc<Vec<PileValue>>),
|
|
|
|
/// A lazily-computed map of {label: value}
|
|
ObjectExtractor(Arc<dyn ObjectExtractor>),
|
|
|
|
/// A lazily-computed array
|
|
ListExtractor(Arc<dyn ListExtractor>),
|
|
|
|
/// An pointer to an item in this dataset
|
|
Item(Item),
|
|
|
|
/// Binary data
|
|
Binary(BinaryPileValue),
|
|
}
|
|
|
|
impl Clone for PileValue {
|
|
fn clone(&self) -> Self {
|
|
match self {
|
|
Self::Null => Self::Null,
|
|
Self::U64(x) => Self::U64(*x),
|
|
Self::I64(x) => Self::I64(*x),
|
|
Self::String(x) => Self::String(x.clone()),
|
|
Self::Array(x) => Self::Array(x.clone()),
|
|
Self::ObjectExtractor(x) => Self::ObjectExtractor(x.clone()),
|
|
Self::ListExtractor(x) => Self::ListExtractor(x.clone()),
|
|
Self::Item(i) => Self::Item(i.clone()),
|
|
Self::Binary(b) => Self::Binary(b.clone()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PileValue {
|
|
pub fn object_extractor(&self) -> Arc<dyn ObjectExtractor> {
|
|
match self {
|
|
Self::Null => Arc::new(MapExtractor::default()),
|
|
Self::U64(_) => Arc::new(MapExtractor::default()),
|
|
Self::I64(_) => Arc::new(MapExtractor::default()),
|
|
Self::Array(_) => Arc::new(MapExtractor::default()),
|
|
Self::String(s) => Arc::new(StringExtractor::new(s)),
|
|
Self::ListExtractor(_) => Arc::new(MapExtractor::default()),
|
|
Self::ObjectExtractor(e) => e.clone(),
|
|
Self::Item(i) => Arc::new(ItemExtractor::new(i)),
|
|
Self::Binary(b) => Arc::new(BinaryExtractor::new(b)),
|
|
}
|
|
}
|
|
|
|
pub fn list_extractor(&self) -> Arc<dyn ListExtractor> {
|
|
match self {
|
|
Self::Null => Arc::new(VecExtractor::default()),
|
|
Self::U64(_) => Arc::new(VecExtractor::default()),
|
|
Self::I64(_) => Arc::new(VecExtractor::default()),
|
|
Self::Array(a) => Arc::new(ArrayExtractor::new(a.clone())),
|
|
Self::String(_) => Arc::new(VecExtractor::default()),
|
|
Self::ListExtractor(e) => e.clone(),
|
|
Self::ObjectExtractor(e) => e
|
|
.as_list()
|
|
.unwrap_or_else(|| Arc::new(VecExtractor::default())),
|
|
Self::Item(_) => Arc::new(VecExtractor::default()),
|
|
Self::Binary(_) => Arc::new(VecExtractor::default()),
|
|
}
|
|
}
|
|
|
|
pub async fn query(
|
|
&self,
|
|
state: &ExtractState,
|
|
query: &ObjectPath,
|
|
) -> Result<Option<Self>, std::io::Error> {
|
|
let mut out: Option<PileValue> = Some(self.clone());
|
|
|
|
for s in &query.segments {
|
|
match s {
|
|
PathSegment::Root => out = Some(self.clone()),
|
|
PathSegment::Field { name, args } => {
|
|
let e = match out.map(|x| x.object_extractor()) {
|
|
Some(e) => e,
|
|
None => {
|
|
out = None;
|
|
continue;
|
|
}
|
|
};
|
|
|
|
out = e.field(state, name, args.as_deref()).await?;
|
|
}
|
|
|
|
PathSegment::Index(idx) => {
|
|
let e = match out.map(|x| x.list_extractor()) {
|
|
Some(e) => e,
|
|
None => {
|
|
out = None;
|
|
continue;
|
|
}
|
|
};
|
|
|
|
let idx = if *idx >= 0 {
|
|
usize::try_from(*idx).ok()
|
|
} else {
|
|
usize::try_from(e.len(state).await? as i64 - idx).ok()
|
|
};
|
|
|
|
let idx = match idx {
|
|
Some(idx) => idx,
|
|
None => {
|
|
out = None;
|
|
continue;
|
|
}
|
|
};
|
|
|
|
out = e.get(state, idx).await?;
|
|
}
|
|
|
|
PathSegment::Range {
|
|
start,
|
|
end,
|
|
inclusive,
|
|
} => {
|
|
let e = match out.map(|x| x.list_extractor()) {
|
|
Some(e) => e,
|
|
None => {
|
|
out = None;
|
|
continue;
|
|
}
|
|
};
|
|
|
|
let len = e.len(state).await? as i64;
|
|
|
|
let start_idx = if *start >= 0 { *start } else { len + start };
|
|
let end_idx = if *end >= 0 { *end } else { len + end };
|
|
let end_idx = if *inclusive { end_idx + 1 } else { end_idx };
|
|
|
|
let start_idx = start_idx.max(0) as usize;
|
|
let end_idx = (end_idx.max(0) as usize).min(len as usize);
|
|
|
|
let mut items = Vec::new();
|
|
for i in start_idx..end_idx {
|
|
match e.get(state, i).await? {
|
|
Some(v) => items.push(v),
|
|
None => break,
|
|
}
|
|
}
|
|
|
|
// TODO: lazy view?
|
|
out = Some(PileValue::Array(Arc::new(items)));
|
|
}
|
|
}
|
|
}
|
|
|
|
return Ok(out.clone());
|
|
}
|
|
|
|
/// Like `to_json`, but counts populated fields instead of collecting values.
|
|
///
|
|
/// - Leaf values (non-null scalars, arrays, blobs) contribute `Some(1)`.
|
|
/// - `Null` contributes `None`.
|
|
/// - `ObjectExtractor` is recursed into; returns `Some(Object(map))` with
|
|
/// only the fields that had data, or `None` if all fields were absent.
|
|
/// - `Array` / `ListExtractor` are treated as opaque leaf values (not descended into).
|
|
pub async fn count_fields(
|
|
&self,
|
|
state: &ExtractState,
|
|
) -> Result<Option<Value>, std::io::Error> {
|
|
Ok(match self {
|
|
Self::Null => None,
|
|
|
|
Self::U64(_)
|
|
| Self::I64(_)
|
|
| Self::String(_)
|
|
| Self::Binary(BinaryPileValue::Blob { .. }) => Some(Value::Number(1u64.into())),
|
|
|
|
Self::Array(x) => (!x.is_empty()).then(|| Value::Number(1u64.into())),
|
|
Self::ListExtractor(x) => (x.len(state).await? > 0).then(|| Value::Number(1u64.into())),
|
|
|
|
Self::ObjectExtractor(_)
|
|
| Self::Item(_)
|
|
| Self::Binary(BinaryPileValue::File { .. }) => {
|
|
let e = self.object_extractor();
|
|
let keys = e.fields().await?;
|
|
let mut map = Map::new();
|
|
for k in &keys {
|
|
let v = match e.field(state, k, None).await? {
|
|
Some(x) => x,
|
|
None => continue,
|
|
};
|
|
if let Some(counted) = Box::pin(v.count_fields(state)).await? {
|
|
map.insert(k.to_string(), counted);
|
|
}
|
|
}
|
|
if map.is_empty() {
|
|
None
|
|
} else {
|
|
Some(Value::Object(map))
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
pub fn as_str(&self) -> Option<&str> {
|
|
match self {
|
|
Self::String(x) => Some(x),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
pub async fn to_json(&self, state: &ExtractState) -> Result<Value, std::io::Error> {
|
|
Ok(match self {
|
|
Self::Null => Value::Null,
|
|
Self::U64(x) => Value::Number((*x).into()),
|
|
Self::I64(x) => Value::Number((*x).into()),
|
|
Self::String(x) => Value::String(x.to_string()),
|
|
|
|
// TODO: replace with something meaningful?
|
|
Self::Binary(BinaryPileValue::Blob { mime, bytes }) => {
|
|
Value::String(format!("<Blob ({mime}, {} bytes)>", bytes.0.len()))
|
|
}
|
|
|
|
Self::Array(_) | Self::ListExtractor(_) => {
|
|
let e = self.list_extractor();
|
|
return e.to_json(state).await;
|
|
}
|
|
|
|
Self::ObjectExtractor(_)
|
|
| Self::Item(_)
|
|
| Self::Binary(BinaryPileValue::File { .. }) => {
|
|
let e = self.object_extractor();
|
|
return e.to_json(state).await;
|
|
}
|
|
})
|
|
}
|
|
}
|