Owned items, static values
This commit is contained in:
@@ -1,24 +1,27 @@
|
|||||||
use epub::doc::EpubDoc;
|
use epub::doc::EpubDoc;
|
||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{collections::HashMap, sync::OnceLock};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
sync::{Arc, OnceLock},
|
||||||
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
||||||
|
|
||||||
pub struct EpubMetaExtractor<'a> {
|
pub struct EpubMetaExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> EpubMetaExtractor<'a> {
|
impl EpubMetaExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
@@ -61,13 +64,13 @@ impl<'a> EpubMetaExtractor<'a> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut output: HashMap<Label, PileValue<'a>> = HashMap::new();
|
let mut output: HashMap<Label, PileValue> = HashMap::new();
|
||||||
|
|
||||||
#[expect(clippy::unwrap_used)]
|
#[expect(clippy::unwrap_used)]
|
||||||
for (key, val) in raw_meta {
|
for (key, val) in raw_meta {
|
||||||
let label = Label::new(key).unwrap();
|
let label = Label::new(key).unwrap();
|
||||||
let value = match val {
|
let value = match val {
|
||||||
Some(s) => PileValue::String(s.into()),
|
Some(s) => PileValue::String(Arc::new(s.into())),
|
||||||
None => PileValue::Null,
|
None => PileValue::Null,
|
||||||
};
|
};
|
||||||
output.insert(label, value);
|
output.insert(label, value);
|
||||||
@@ -78,12 +81,9 @@ impl<'a> EpubMetaExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for EpubMetaExtractor<'_> {
|
impl ObjectExtractor for EpubMetaExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -1,24 +1,27 @@
|
|||||||
use epub::doc::EpubDoc;
|
use epub::doc::EpubDoc;
|
||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{collections::HashMap, sync::OnceLock};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
sync::{Arc, OnceLock},
|
||||||
|
};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
||||||
|
|
||||||
pub struct EpubTextExtractor<'a> {
|
pub struct EpubTextExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> EpubTextExtractor<'a> {
|
impl EpubTextExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
@@ -61,7 +64,7 @@ impl<'a> EpubTextExtractor<'a> {
|
|||||||
#[expect(clippy::unwrap_used)]
|
#[expect(clippy::unwrap_used)]
|
||||||
let output = HashMap::from([(
|
let output = HashMap::from([(
|
||||||
Label::new("text").unwrap(),
|
Label::new("text").unwrap(),
|
||||||
PileValue::String(raw_text.into()),
|
PileValue::String(Arc::new(raw_text.into())),
|
||||||
)]);
|
)]);
|
||||||
|
|
||||||
let _ = self.output.set(output);
|
let _ = self.output.set(output);
|
||||||
@@ -88,12 +91,9 @@ fn strip_html(html: &str) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for EpubTextExtractor<'_> {
|
impl ObjectExtractor for EpubTextExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -12,13 +12,13 @@ use crate::{
|
|||||||
extract::{MapExtractor, ObjectExtractor},
|
extract::{MapExtractor, ObjectExtractor},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct EpubExtractor<'a> {
|
pub struct EpubExtractor {
|
||||||
inner: MapExtractor<'a>,
|
inner: MapExtractor,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> EpubExtractor<'a> {
|
impl EpubExtractor {
|
||||||
#[expect(clippy::unwrap_used)]
|
#[expect(clippy::unwrap_used)]
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
let inner = MapExtractor {
|
let inner = MapExtractor {
|
||||||
inner: HashMap::from([
|
inner: HashMap::from([
|
||||||
(
|
(
|
||||||
@@ -37,19 +37,8 @@ impl<'a> EpubExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for EpubExtractor<'_> {
|
impl ObjectExtractor for EpubExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
|
||||||
name: &pile_config::Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
if name.as_str() == "text" {
|
|
||||||
match self.inner.inner.get(name).unwrap() {
|
|
||||||
PileValue::ObjectExtractor(x) => return x.field(name).await,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
self.inner.field(name).await
|
self.inner.field(name).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +1,27 @@
|
|||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{collections::HashMap, io::BufReader, sync::OnceLock};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
io::BufReader,
|
||||||
|
sync::{Arc, OnceLock},
|
||||||
|
};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
||||||
|
|
||||||
pub struct ExifExtractor<'a> {
|
pub struct ExifExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ExifExtractor<'a> {
|
impl ExifExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
@@ -52,7 +56,7 @@ impl<'a> ExifExtractor<'a> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut output: HashMap<Label, PileValue<'a>> = HashMap::new();
|
let mut output: HashMap<Label, PileValue> = HashMap::new();
|
||||||
|
|
||||||
for (tag_name, value) in raw_fields {
|
for (tag_name, value) in raw_fields {
|
||||||
let Some(label) = tag_to_label(&tag_name) else {
|
let Some(label) = tag_to_label(&tag_name) else {
|
||||||
@@ -61,7 +65,7 @@ impl<'a> ExifExtractor<'a> {
|
|||||||
// First occurrence wins (PRIMARY IFD comes before THUMBNAIL)
|
// First occurrence wins (PRIMARY IFD comes before THUMBNAIL)
|
||||||
output
|
output
|
||||||
.entry(label)
|
.entry(label)
|
||||||
.or_insert_with(|| PileValue::String(value.into()));
|
.or_insert_with(|| PileValue::String(Arc::new(value.into())));
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(self.output.get_or_init(|| output));
|
return Ok(self.output.get_or_init(|| output));
|
||||||
@@ -78,12 +82,9 @@ fn tag_to_label(tag: &str) -> Option<Label> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for ExifExtractor<'_> {
|
impl ObjectExtractor for ExifExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -12,24 +12,16 @@ use crate::{
|
|||||||
extract::{ListExtractor, ObjectExtractor},
|
extract::{ListExtractor, ObjectExtractor},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct FlacImagesExtractor<'a> {
|
pub struct FlacImagesExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<Vec<PileValue<'a>>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FlacImagesExtractor<'a> {
|
impl FlacImagesExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self { item: item.clone() }
|
||||||
item,
|
|
||||||
output: OnceLock::new(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&Vec<PileValue<'a>>, std::io::Error> {
|
async fn get_images(&self) -> Result<Vec<PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
|
||||||
return Ok(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
let reader = SyncReadBridge::new_current(self.item.read().await?);
|
let reader = SyncReadBridge::new_current(self.item.read().await?);
|
||||||
let raw_images = tokio::task::spawn_blocking(move || {
|
let raw_images = tokio::task::spawn_blocking(move || {
|
||||||
let reader = FlacReader::new(BufReader::new(reader));
|
let reader = FlacReader::new(BufReader::new(reader));
|
||||||
@@ -48,39 +40,35 @@ impl<'a> FlacImagesExtractor<'a> {
|
|||||||
.await
|
.await
|
||||||
.map_err(std::io::Error::other)??;
|
.map_err(std::io::Error::other)??;
|
||||||
|
|
||||||
let images = raw_images
|
Ok(raw_images
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(mime, data)| PileValue::Blob {
|
.map(|(mime, data)| PileValue::Blob {
|
||||||
mime,
|
mime,
|
||||||
bytes: Arc::new(data),
|
bytes: Arc::new(data),
|
||||||
})
|
})
|
||||||
.collect();
|
.collect())
|
||||||
|
|
||||||
let _ = self.output.set(images);
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
return Ok(self.output.get().unwrap());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ListExtractor for FlacImagesExtractor<'_> {
|
impl ListExtractor for FlacImagesExtractor {
|
||||||
async fn get<'a>(&'a self, idx: usize) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
async fn get<'a>(&'a self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
Ok(self.get_inner().await?.get(idx))
|
Ok(self.get_images().await?.into_iter().nth(idx))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn len(&self) -> Result<usize, std::io::Error> {
|
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||||
Ok(self.get_inner().await?.len())
|
Ok(self.get_images().await?.len())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FlacExtractor<'a> {
|
pub struct FlacExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
images: Option<PileValue<'a>>,
|
images: Option<PileValue>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FlacExtractor<'a> {
|
impl FlacExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
let is_flac = match item {
|
let is_flac = match item {
|
||||||
Item::File { path, .. } => path.to_str().unwrap_or_default().ends_with(".flac"),
|
Item::File { path, .. } => path.to_str().unwrap_or_default().ends_with(".flac"),
|
||||||
Item::S3 { key, .. } => key.ends_with(".flac"),
|
Item::S3 { key, .. } => key.ends_with(".flac"),
|
||||||
@@ -90,18 +78,18 @@ impl<'a> FlacExtractor<'a> {
|
|||||||
is_flac.then(|| PileValue::ListExtractor(Arc::new(FlacImagesExtractor::new(item))));
|
is_flac.then(|| PileValue::ListExtractor(Arc::new(FlacImagesExtractor::new(item))));
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
images,
|
images,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
let key = match self.item {
|
let key = match &self.item {
|
||||||
Item::File { path, .. } => path.to_str().unwrap_or_default().to_owned(),
|
Item::File { path, .. } => path.to_str().unwrap_or_default().to_owned(),
|
||||||
Item::S3 { key, .. } => key.to_string(),
|
Item::S3 { key, .. } => key.to_string(),
|
||||||
};
|
};
|
||||||
@@ -132,18 +120,18 @@ impl<'a> FlacExtractor<'a> {
|
|||||||
.await
|
.await
|
||||||
.map_err(std::io::Error::other)??;
|
.map_err(std::io::Error::other)??;
|
||||||
|
|
||||||
let mut output: HashMap<Label, Vec<PileValue<'a>>> = HashMap::new();
|
let mut output: HashMap<Label, Vec<PileValue>> = HashMap::new();
|
||||||
for (k, v) in raw_tags {
|
for (k, v) in raw_tags {
|
||||||
if let Some(label) = Label::new(k) {
|
if let Some(label) = Label::new(k) {
|
||||||
output
|
output
|
||||||
.entry(label)
|
.entry(label)
|
||||||
.or_default()
|
.or_default()
|
||||||
.push(PileValue::String(v.into()));
|
.push(PileValue::String(Arc::new(v.into())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let output: HashMap<Label, PileValue<'a>> = output
|
let output: HashMap<Label, PileValue> = output
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(k, v)| (k, PileValue::Array(v)))
|
.map(|(k, v)| (k, PileValue::Array(Arc::new(v))))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let _ = self.output.set(output);
|
let _ = self.output.set(output);
|
||||||
@@ -153,17 +141,14 @@ impl<'a> FlacExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for FlacExtractor<'_> {
|
impl ObjectExtractor for FlacExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
if name.as_str() == "images"
|
if name.as_str() == "images"
|
||||||
&& let Some(ref images) = self.images
|
&& let Some(ref images) = self.images
|
||||||
{
|
{
|
||||||
return Ok(Some(images));
|
return Ok(Some(images.clone()));
|
||||||
}
|
}
|
||||||
Ok(self.get_inner().await?.get(name))
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -1,27 +1,31 @@
|
|||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{collections::HashMap, path::Component, sync::OnceLock};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
path::Component,
|
||||||
|
sync::{Arc, OnceLock},
|
||||||
|
};
|
||||||
|
|
||||||
use crate::{Item, PileValue, extract::ObjectExtractor};
|
use crate::{Item, PileValue, extract::ObjectExtractor};
|
||||||
|
|
||||||
pub struct FsExtractor<'a> {
|
pub struct FsExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FsExtractor<'a> {
|
impl FsExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
let Item::File { path, .. } = self.item else {
|
let Item::File { path, .. } = &self.item else {
|
||||||
return Ok(self.output.get_or_init(HashMap::new));
|
return Ok(self.output.get_or_init(HashMap::new));
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -31,13 +35,13 @@ impl<'a> FsExtractor<'a> {
|
|||||||
Label::new("extension").unwrap(),
|
Label::new("extension").unwrap(),
|
||||||
path.extension()
|
path.extension()
|
||||||
.and_then(|x| x.to_str())
|
.and_then(|x| x.to_str())
|
||||||
.map(|x| PileValue::String(x.into()))
|
.map(|x| PileValue::String(Arc::new(x.into())))
|
||||||
.unwrap_or(PileValue::Null),
|
.unwrap_or(PileValue::Null),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
Label::new("path").unwrap(),
|
Label::new("path").unwrap(),
|
||||||
path.to_str()
|
path.to_str()
|
||||||
.map(|x| PileValue::String(x.into()))
|
.map(|x| PileValue::String(Arc::new(x.into())))
|
||||||
.unwrap_or(PileValue::Null),
|
.unwrap_or(PileValue::Null),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
@@ -50,9 +54,9 @@ impl<'a> FsExtractor<'a> {
|
|||||||
Component::RootDir => Some("/".to_owned()),
|
Component::RootDir => Some("/".to_owned()),
|
||||||
Component::Prefix(x) => x.as_os_str().to_str().map(|x| x.to_owned()),
|
Component::Prefix(x) => x.as_os_str().to_str().map(|x| x.to_owned()),
|
||||||
})
|
})
|
||||||
.map(|x| x.map(|x| PileValue::String(x.into())))
|
.map(|x| x.map(|x| PileValue::String(Arc::new(x.into()))))
|
||||||
.collect::<Option<Vec<_>>>()
|
.collect::<Option<Vec<_>>>()
|
||||||
.map(PileValue::Array)
|
.map(|v| PileValue::Array(Arc::new(v)))
|
||||||
.unwrap_or(PileValue::Null),
|
.unwrap_or(PileValue::Null),
|
||||||
),
|
),
|
||||||
]);
|
]);
|
||||||
@@ -62,12 +66,9 @@ impl<'a> FsExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for FsExtractor<'_> {
|
impl ObjectExtractor for FsExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner()?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner()?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -1,23 +1,28 @@
|
|||||||
use id3::Tag;
|
use id3::Tag;
|
||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{borrow::Cow, collections::HashMap, io::BufReader, sync::OnceLock};
|
use std::{
|
||||||
|
borrow::Cow,
|
||||||
|
collections::HashMap,
|
||||||
|
io::BufReader,
|
||||||
|
sync::{Arc, OnceLock},
|
||||||
|
};
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
||||||
|
|
||||||
pub struct Id3Extractor<'a> {
|
pub struct Id3Extractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Id3Extractor<'a> {
|
impl Id3Extractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
@@ -50,7 +55,7 @@ impl<'a> Id3Extractor<'a> {
|
|||||||
Err(e) => return Err(e.into()),
|
Err(e) => return Err(e.into()),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut output: HashMap<Label, Vec<PileValue<'a>>> = HashMap::new();
|
let mut output: HashMap<Label, Vec<PileValue>> = HashMap::new();
|
||||||
for frame in tag.frames() {
|
for frame in tag.frames() {
|
||||||
if let Some(text) = frame.content().text() {
|
if let Some(text) = frame.content().text() {
|
||||||
let name = frame_id_to_field(frame.id());
|
let name = frame_id_to_field(frame.id());
|
||||||
@@ -58,14 +63,14 @@ impl<'a> Id3Extractor<'a> {
|
|||||||
output
|
output
|
||||||
.entry(key)
|
.entry(key)
|
||||||
.or_default()
|
.or_default()
|
||||||
.push(PileValue::String(text.into()));
|
.push(PileValue::String(Arc::new(text.into())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let output = output
|
let output = output
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(k, v)| (k, PileValue::Array(v)))
|
.map(|(k, v)| (k, PileValue::Array(Arc::new(v))))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
return Ok(self.output.get_or_init(|| output));
|
return Ok(self.output.get_or_init(|| output));
|
||||||
@@ -114,12 +119,9 @@ fn frame_id_to_field(id: &str) -> Cow<'static, str> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for Id3Extractor<'_> {
|
impl ObjectExtractor for Id3Extractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -3,17 +3,14 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use crate::{PileValue, extract::ObjectExtractor};
|
use crate::{PileValue, extract::ObjectExtractor};
|
||||||
|
|
||||||
pub struct MapExtractor<'a> {
|
pub struct MapExtractor {
|
||||||
pub(crate) inner: HashMap<Label, PileValue<'a>>,
|
pub(crate) inner: HashMap<Label, PileValue>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for MapExtractor<'_> {
|
impl ObjectExtractor for MapExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.inner.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.inner.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ pub use map::*;
|
|||||||
mod sidecar;
|
mod sidecar;
|
||||||
pub use sidecar::*;
|
pub use sidecar::*;
|
||||||
|
|
||||||
use crate::Item;
|
use crate::{Item, PileValue};
|
||||||
|
|
||||||
/// An attachment that extracts metadata from an [Item].
|
/// An attachment that extracts metadata from an [Item].
|
||||||
///
|
///
|
||||||
@@ -39,10 +39,7 @@ pub trait ObjectExtractor: Send + Sync {
|
|||||||
/// Get the field at `name` from `item`.
|
/// Get the field at `name` from `item`.
|
||||||
/// - returns `None` if `name` is not a valid field
|
/// - returns `None` if `name` is not a valid field
|
||||||
/// - returns `Some(Null)` if `name` is not available
|
/// - returns `Some(Null)` if `name` is not available
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error>;
|
||||||
&'a self,
|
|
||||||
name: &pile_config::Label,
|
|
||||||
) -> Result<Option<&'a crate::PileValue<'a>>, std::io::Error>;
|
|
||||||
|
|
||||||
/// Return all fields in this extractor.
|
/// Return all fields in this extractor.
|
||||||
/// `Self::field` must return [Some] for all these keys
|
/// `Self::field` must return [Some] for all these keys
|
||||||
@@ -59,10 +56,7 @@ pub trait ListExtractor: Send + Sync {
|
|||||||
/// Indices start at zero, and must be consecutive.
|
/// Indices start at zero, and must be consecutive.
|
||||||
/// - returns `None` if `idx` is out of range
|
/// - returns `None` if `idx` is out of range
|
||||||
/// - returns `Some(Null)` if `None` is at `idx`
|
/// - returns `Some(Null)` if `None` is at `idx`
|
||||||
async fn get<'a>(
|
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error>;
|
||||||
&'a self,
|
|
||||||
idx: usize,
|
|
||||||
) -> Result<Option<&'a crate::PileValue<'a>>, std::io::Error>;
|
|
||||||
|
|
||||||
async fn len(&self) -> Result<usize, std::io::Error>;
|
async fn len(&self) -> Result<usize, std::io::Error>;
|
||||||
|
|
||||||
@@ -71,13 +65,13 @@ pub trait ListExtractor: Send + Sync {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MetaExtractor<'a> {
|
pub struct MetaExtractor {
|
||||||
inner: MapExtractor<'a>,
|
inner: MapExtractor,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> MetaExtractor<'a> {
|
impl MetaExtractor {
|
||||||
#[expect(clippy::unwrap_used)]
|
#[expect(clippy::unwrap_used)]
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
let inner = MapExtractor {
|
let inner = MapExtractor {
|
||||||
inner: HashMap::from([
|
inner: HashMap::from([
|
||||||
(
|
(
|
||||||
@@ -120,11 +114,8 @@ impl<'a> MetaExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for MetaExtractor<'_> {
|
impl ObjectExtractor for MetaExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
|
||||||
name: &pile_config::Label,
|
|
||||||
) -> Result<Option<&'a crate::PileValue<'a>>, std::io::Error> {
|
|
||||||
self.inner.field(name).await
|
self.inner.field(name).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -22,13 +22,13 @@ use crate::{
|
|||||||
extract::{MapExtractor, ObjectExtractor},
|
extract::{MapExtractor, ObjectExtractor},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct PdfExtractor<'a> {
|
pub struct PdfExtractor {
|
||||||
inner: MapExtractor<'a>,
|
inner: MapExtractor,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> PdfExtractor<'a> {
|
impl PdfExtractor {
|
||||||
#[expect(clippy::unwrap_used)]
|
#[expect(clippy::unwrap_used)]
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
let mut inner_map = HashMap::new();
|
let mut inner_map = HashMap::new();
|
||||||
inner_map.insert(
|
inner_map.insert(
|
||||||
Label::new("text").unwrap(),
|
Label::new("text").unwrap(),
|
||||||
@@ -56,28 +56,8 @@ impl<'a> PdfExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for PdfExtractor<'_> {
|
impl ObjectExtractor for PdfExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &pile_config::Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
|
||||||
name: &pile_config::Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
if name.as_str() == "text" {
|
|
||||||
match self.inner.inner.get(name).unwrap() {
|
|
||||||
PileValue::ObjectExtractor(x) => return x.field(name).await,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "pdfium")]
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
if name.as_str() == "cover" {
|
|
||||||
match self.inner.inner.get(name).unwrap() {
|
|
||||||
PileValue::ObjectExtractor(x) => return x.field(name).await,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
self.inner.field(name).await
|
self.inner.field(name).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,20 +10,20 @@ use tracing::trace;
|
|||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
use crate::{Item, PileValue, SyncReadBridge, extract::ObjectExtractor};
|
||||||
|
|
||||||
pub struct PdfCoverExtractor<'a> {
|
pub struct PdfCoverExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> PdfCoverExtractor<'a> {
|
impl PdfCoverExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
@@ -84,12 +84,9 @@ impl<'a> PdfCoverExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for PdfCoverExtractor<'_> {
|
impl ObjectExtractor for PdfCoverExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -1,26 +1,30 @@
|
|||||||
use pdf::file::FileOptions;
|
use pdf::file::FileOptions;
|
||||||
use pdf::primitive::{Date, TimeRel};
|
use pdf::primitive::{Date, TimeRel};
|
||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{collections::HashMap, io::BufReader, sync::OnceLock};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
io::BufReader,
|
||||||
|
sync::{Arc, OnceLock},
|
||||||
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::extract::ObjectExtractor;
|
use crate::extract::ObjectExtractor;
|
||||||
use crate::{Item, PileValue, SyncReadBridge};
|
use crate::{Item, PileValue, SyncReadBridge};
|
||||||
|
|
||||||
pub struct PdfMetaExtractor<'a> {
|
pub struct PdfMetaExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> PdfMetaExtractor<'a> {
|
impl PdfMetaExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
@@ -80,7 +84,7 @@ impl<'a> PdfMetaExtractor<'a> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut output: HashMap<Label, PileValue<'a>> = HashMap::new();
|
let mut output: HashMap<Label, PileValue> = HashMap::new();
|
||||||
|
|
||||||
#[expect(clippy::unwrap_used)]
|
#[expect(clippy::unwrap_used)]
|
||||||
output.insert(
|
output.insert(
|
||||||
@@ -92,7 +96,7 @@ impl<'a> PdfMetaExtractor<'a> {
|
|||||||
for (key, val) in raw_meta {
|
for (key, val) in raw_meta {
|
||||||
let label = Label::new(key).unwrap();
|
let label = Label::new(key).unwrap();
|
||||||
let value = match val {
|
let value = match val {
|
||||||
Some(s) => PileValue::String(s.into()),
|
Some(s) => PileValue::String(Arc::new(s.into())),
|
||||||
None => PileValue::Null,
|
None => PileValue::Null,
|
||||||
};
|
};
|
||||||
output.insert(label, value);
|
output.insert(label, value);
|
||||||
@@ -115,12 +119,9 @@ fn format_date(d: &Date) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for PdfMetaExtractor<'_> {
|
impl ObjectExtractor for PdfMetaExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -2,86 +2,45 @@ use image::ImageFormat;
|
|||||||
use pdfium_render::prelude::*;
|
use pdfium_render::prelude::*;
|
||||||
use std::{
|
use std::{
|
||||||
io::{BufReader, Cursor},
|
io::{BufReader, Cursor},
|
||||||
sync::{Arc, OnceLock},
|
sync::Arc,
|
||||||
};
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::{Item, PileValue, SyncReadBridge, extract::ListExtractor};
|
use crate::{Item, PileValue, SyncReadBridge, extract::ListExtractor};
|
||||||
|
|
||||||
pub struct PdfPagesExtractor<'a> {
|
pub struct PdfPagesExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
bytes: OnceLock<Arc<Vec<u8>>>,
|
|
||||||
pages: OnceLock<Vec<OnceLock<PileValue<'a>>>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> PdfPagesExtractor<'a> {
|
impl PdfPagesExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self { item: item.clone() }
|
||||||
item,
|
|
||||||
bytes: OnceLock::new(),
|
|
||||||
pages: OnceLock::new(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_bytes(&self) -> Result<&Arc<Vec<u8>>, std::io::Error> {
|
async fn get_bytes(&self) -> Result<Vec<u8>, std::io::Error> {
|
||||||
if let Some(x) = self.bytes.get() {
|
|
||||||
return Ok(x);
|
|
||||||
}
|
|
||||||
let reader = SyncReadBridge::new_current(self.item.read().await?);
|
let reader = SyncReadBridge::new_current(self.item.read().await?);
|
||||||
let bytes = tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
let mut b = Vec::new();
|
let mut b = Vec::new();
|
||||||
std::io::Read::read_to_end(&mut BufReader::new(reader), &mut b)?;
|
std::io::Read::read_to_end(&mut BufReader::new(reader), &mut b)?;
|
||||||
Ok::<_, std::io::Error>(b)
|
Ok::<_, std::io::Error>(b)
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(std::io::Error::other)??;
|
.map_err(std::io::Error::other)?
|
||||||
let _ = self.bytes.set(Arc::new(bytes));
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
return Ok(self.bytes.get().unwrap());
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn init_pages(&self) -> Result<&Vec<OnceLock<PileValue<'a>>>, std::io::Error> {
|
|
||||||
if let Some(x) = self.pages.get() {
|
|
||||||
return Ok(x);
|
|
||||||
}
|
|
||||||
let bytes = Arc::clone(self.get_bytes().await?);
|
|
||||||
let count = tokio::task::spawn_blocking(move || {
|
|
||||||
let pdfium = Pdfium::default();
|
|
||||||
let doc = pdfium
|
|
||||||
.load_pdf_from_byte_slice(&bytes, None)
|
|
||||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
|
||||||
Ok::<_, std::io::Error>(doc.pages().len() as usize)
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.map_err(std::io::Error::other)?;
|
|
||||||
let slots = match count {
|
|
||||||
Ok(n) => (0..n).map(|_| OnceLock::new()).collect(),
|
|
||||||
Err(error) => {
|
|
||||||
trace!(message = "Could not read pdf page count", ?error, key = ?self.item.key());
|
|
||||||
Vec::new()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
return Ok(self.pages.get_or_init(|| slots));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ListExtractor for PdfPagesExtractor<'_> {
|
impl ListExtractor for PdfPagesExtractor {
|
||||||
async fn get<'a>(&'a self, idx: usize) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
let pages = self.init_pages().await?;
|
let bytes = self.get_bytes().await?;
|
||||||
let Some(slot) = pages.get(idx) else {
|
|
||||||
return Ok(None);
|
|
||||||
};
|
|
||||||
if let Some(v) = slot.get() {
|
|
||||||
return Ok(Some(v));
|
|
||||||
}
|
|
||||||
|
|
||||||
let bytes = Arc::clone(self.get_bytes().await?);
|
|
||||||
let png = tokio::task::spawn_blocking(move || {
|
let png = tokio::task::spawn_blocking(move || {
|
||||||
let pdfium = Pdfium::default();
|
let pdfium = Pdfium::default();
|
||||||
let doc = pdfium
|
let doc = pdfium
|
||||||
.load_pdf_from_byte_slice(&bytes, None)
|
.load_pdf_from_byte_slice(&bytes, None)
|
||||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||||
|
if idx >= doc.pages().len() as usize {
|
||||||
|
return Ok::<_, std::io::Error>(None);
|
||||||
|
}
|
||||||
let render_config = PdfRenderConfig::new().set_target_width(1024);
|
let render_config = PdfRenderConfig::new().set_target_width(1024);
|
||||||
let page = doc
|
let page = doc
|
||||||
.pages()
|
.pages()
|
||||||
@@ -95,13 +54,14 @@ impl ListExtractor for PdfPagesExtractor<'_> {
|
|||||||
image
|
image
|
||||||
.write_to(&mut Cursor::new(&mut png_bytes), ImageFormat::Png)
|
.write_to(&mut Cursor::new(&mut png_bytes), ImageFormat::Png)
|
||||||
.map_err(|e| std::io::Error::other(e.to_string()))?;
|
.map_err(|e| std::io::Error::other(e.to_string()))?;
|
||||||
Ok::<_, std::io::Error>(png_bytes)
|
Ok(Some(png_bytes))
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(std::io::Error::other)?;
|
.map_err(std::io::Error::other)?;
|
||||||
|
|
||||||
let value = match png {
|
let value = match png {
|
||||||
Ok(bytes) => PileValue::Blob {
|
Ok(None) => return Ok(None),
|
||||||
|
Ok(Some(bytes)) => PileValue::Blob {
|
||||||
mime: mime::IMAGE_PNG,
|
mime: mime::IMAGE_PNG,
|
||||||
bytes: Arc::new(bytes),
|
bytes: Arc::new(bytes),
|
||||||
},
|
},
|
||||||
@@ -110,10 +70,26 @@ impl ListExtractor for PdfPagesExtractor<'_> {
|
|||||||
PileValue::Null
|
PileValue::Null
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
return Ok(Some(slot.get_or_init(|| value)));
|
Ok(Some(value))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn len(&self) -> Result<usize, std::io::Error> {
|
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||||
Ok(self.init_pages().await?.len())
|
let bytes = self.get_bytes().await?;
|
||||||
|
let count = tokio::task::spawn_blocking(move || {
|
||||||
|
let pdfium = Pdfium::default();
|
||||||
|
let doc = pdfium
|
||||||
|
.load_pdf_from_byte_slice(&bytes, None)
|
||||||
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||||
|
Ok::<_, std::io::Error>(doc.pages().len() as usize)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(std::io::Error::other)?;
|
||||||
|
match count {
|
||||||
|
Ok(n) => Ok(n),
|
||||||
|
Err(error) => {
|
||||||
|
trace!(message = "Could not read pdf page count", ?error, key = ?self.item.key());
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,26 +1,30 @@
|
|||||||
use pdf::content::{Op, TextDrawAdjusted};
|
use pdf::content::{Op, TextDrawAdjusted};
|
||||||
use pdf::file::FileOptions;
|
use pdf::file::FileOptions;
|
||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{collections::HashMap, io::BufReader, sync::OnceLock};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
io::BufReader,
|
||||||
|
sync::{Arc, OnceLock},
|
||||||
|
};
|
||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use crate::extract::ObjectExtractor;
|
use crate::extract::ObjectExtractor;
|
||||||
use crate::{Item, PileValue, SyncReadBridge};
|
use crate::{Item, PileValue, SyncReadBridge};
|
||||||
|
|
||||||
pub struct PdfTextExtractor<'a> {
|
pub struct PdfTextExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> PdfTextExtractor<'a> {
|
impl PdfTextExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
@@ -87,7 +91,7 @@ impl<'a> PdfTextExtractor<'a> {
|
|||||||
#[expect(clippy::unwrap_used)]
|
#[expect(clippy::unwrap_used)]
|
||||||
let output = HashMap::from([(
|
let output = HashMap::from([(
|
||||||
Label::new("text").unwrap(),
|
Label::new("text").unwrap(),
|
||||||
PileValue::String(raw_text.into()),
|
PileValue::String(Arc::new(raw_text.into())),
|
||||||
)]);
|
)]);
|
||||||
|
|
||||||
return Ok(self.output.get_or_init(|| output));
|
return Ok(self.output.get_or_init(|| output));
|
||||||
@@ -95,12 +99,9 @@ impl<'a> PdfTextExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for PdfTextExtractor<'_> {
|
impl ObjectExtractor for PdfTextExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -6,32 +6,29 @@ use crate::{
|
|||||||
extract::{ObjectExtractor, TomlExtractor},
|
extract::{ObjectExtractor, TomlExtractor},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct SidecarExtractor<'a> {
|
pub struct SidecarExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<Option<TomlExtractor<'a>>>,
|
output: OnceLock<Option<TomlExtractor>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> SidecarExtractor<'a> {
|
impl SidecarExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for SidecarExtractor<'_> {
|
impl ObjectExtractor for SidecarExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
match self
|
match self
|
||||||
.output
|
.output
|
||||||
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new))
|
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new))
|
||||||
{
|
{
|
||||||
Some(x) => Ok(x.field(name).await?),
|
Some(x) => Ok(x.field(name).await?),
|
||||||
None => Ok(Some(&PileValue::Null)),
|
None => Ok(Some(PileValue::Null)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,34 +1,39 @@
|
|||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{collections::HashMap, sync::OnceLock};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
sync::{Arc, OnceLock},
|
||||||
|
};
|
||||||
|
|
||||||
use crate::{AsyncReader, Item, PileValue, extract::ObjectExtractor};
|
use crate::{AsyncReader, Item, PileValue, extract::ObjectExtractor};
|
||||||
|
|
||||||
fn toml_to_pile(value: toml::Value) -> PileValue<'static> {
|
fn toml_to_pile(value: toml::Value) -> PileValue {
|
||||||
match value {
|
match value {
|
||||||
toml::Value::String(s) => PileValue::String(s.into()),
|
toml::Value::String(s) => PileValue::String(Arc::new(s.into())),
|
||||||
toml::Value::Integer(i) => PileValue::String(i.to_string().into()),
|
toml::Value::Integer(i) => PileValue::String(Arc::new(i.to_string().into())),
|
||||||
toml::Value::Float(f) => PileValue::String(f.to_string().into()),
|
toml::Value::Float(f) => PileValue::String(Arc::new(f.to_string().into())),
|
||||||
toml::Value::Boolean(b) => PileValue::String(b.to_string().into()),
|
toml::Value::Boolean(b) => PileValue::String(Arc::new(b.to_string().into())),
|
||||||
toml::Value::Datetime(d) => PileValue::String(d.to_string().into()),
|
toml::Value::Datetime(d) => PileValue::String(Arc::new(d.to_string().into())),
|
||||||
toml::Value::Array(a) => PileValue::Array(a.into_iter().map(toml_to_pile).collect()),
|
toml::Value::Array(a) => {
|
||||||
|
PileValue::Array(Arc::new(a.into_iter().map(toml_to_pile).collect()))
|
||||||
|
}
|
||||||
toml::Value::Table(_) => PileValue::Null,
|
toml::Value::Table(_) => PileValue::Null,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct TomlExtractor<'a> {
|
pub struct TomlExtractor {
|
||||||
item: &'a Item,
|
item: Item,
|
||||||
output: OnceLock<HashMap<Label, PileValue<'a>>>,
|
output: OnceLock<HashMap<Label, PileValue>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> TomlExtractor<'a> {
|
impl TomlExtractor {
|
||||||
pub fn new(item: &'a Item) -> Self {
|
pub fn new(item: &Item) -> Self {
|
||||||
Self {
|
Self {
|
||||||
item,
|
item: item.clone(),
|
||||||
output: OnceLock::new(),
|
output: OnceLock::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue<'a>>, std::io::Error> {
|
async fn get_inner(&self) -> Result<&HashMap<Label, PileValue>, std::io::Error> {
|
||||||
if let Some(x) = self.output.get() {
|
if let Some(x) = self.output.get() {
|
||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
@@ -39,7 +44,7 @@ impl<'a> TomlExtractor<'a> {
|
|||||||
Err(_) => return Ok(self.output.get_or_init(HashMap::new)),
|
Err(_) => return Ok(self.output.get_or_init(HashMap::new)),
|
||||||
};
|
};
|
||||||
|
|
||||||
let output: HashMap<Label, PileValue<'_>> = match toml {
|
let output: HashMap<Label, PileValue> = match toml {
|
||||||
toml::Value::Table(t) => t
|
toml::Value::Table(t) => t
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(k, v)| Label::new(&k).map(|label| (label, toml_to_pile(v))))
|
.filter_map(|(k, v)| Label::new(&k).map(|label| (label, toml_to_pile(v))))
|
||||||
@@ -52,12 +57,9 @@ impl<'a> TomlExtractor<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ObjectExtractor for TomlExtractor<'_> {
|
impl ObjectExtractor for TomlExtractor {
|
||||||
async fn field<'a>(
|
async fn field(&self, name: &Label) -> Result<Option<PileValue>, std::io::Error> {
|
||||||
&'a self,
|
Ok(self.get_inner().await?.get(name).cloned())
|
||||||
name: &Label,
|
|
||||||
) -> Result<Option<&'a PileValue<'a>>, std::io::Error> {
|
|
||||||
Ok(self.get_inner().await?.get(name))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ impl DbFtsIndex {
|
|||||||
|
|
||||||
pub async fn get_field(
|
pub async fn get_field(
|
||||||
&self,
|
&self,
|
||||||
extractor: &PileValue<'_>,
|
extractor: &PileValue,
|
||||||
field_name: &Label,
|
field_name: &Label,
|
||||||
) -> Result<Option<String>, std::io::Error> {
|
) -> Result<Option<String>, std::io::Error> {
|
||||||
let field = match self.cfg.schema.get(field_name) {
|
let field = match self.cfg.schema.get(field_name) {
|
||||||
@@ -148,16 +148,14 @@ impl DbFtsIndex {
|
|||||||
PileValue::U64(x) => return Ok(Some(x.to_string())),
|
PileValue::U64(x) => return Ok(Some(x.to_string())),
|
||||||
PileValue::I64(x) => return Ok(Some(x.to_string())),
|
PileValue::I64(x) => return Ok(Some(x.to_string())),
|
||||||
|
|
||||||
#[expect(clippy::unwrap_used)]
|
PileValue::Array(x) => {
|
||||||
PileValue::Array(ref mut x) => {
|
|
||||||
if x.len() == 1 {
|
if x.len() == 1 {
|
||||||
x.pop().unwrap()
|
x[0].clone()
|
||||||
} else if x.len() > 1 {
|
} else if x.len() > 1 {
|
||||||
debug!(
|
debug!(
|
||||||
message = "Skipping field, is array with more than one element",
|
message = "Skipping field, is array with more than one element",
|
||||||
field = field_name.to_string(),
|
field = field_name.to_string(),
|
||||||
?path,
|
?path,
|
||||||
//value = ?val
|
|
||||||
);
|
);
|
||||||
continue 'outer;
|
continue 'outer;
|
||||||
} else {
|
} else {
|
||||||
@@ -299,7 +297,7 @@ impl DbFtsIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn apply<'a>(post: &FieldSpecPost, val: &PileValue<'a>) -> Option<PileValue<'a>> {
|
pub fn apply(post: &FieldSpecPost, val: &PileValue) -> Option<PileValue> {
|
||||||
Some(match post {
|
Some(match post {
|
||||||
FieldSpecPost::NotEmpty { notempty: false } => val.clone(),
|
FieldSpecPost::NotEmpty { notempty: false } => val.clone(),
|
||||||
FieldSpecPost::NotEmpty { notempty: true } => match val {
|
FieldSpecPost::NotEmpty { notempty: true } => match val {
|
||||||
@@ -316,11 +314,11 @@ pub fn apply<'a>(post: &FieldSpecPost, val: &PileValue<'a>) -> Option<PileValue<
|
|||||||
PileValue::Blob { .. } => return None,
|
PileValue::Blob { .. } => return None,
|
||||||
PileValue::ObjectExtractor(_) => return None,
|
PileValue::ObjectExtractor(_) => return None,
|
||||||
PileValue::ListExtractor(_) => return None,
|
PileValue::ListExtractor(_) => return None,
|
||||||
PileValue::String(x) => PileValue::String(x.to_lowercase().into()),
|
PileValue::String(x) => PileValue::String(Arc::new(x.as_str().to_lowercase().into())),
|
||||||
|
|
||||||
PileValue::Array(x) => {
|
PileValue::Array(x) => PileValue::Array(Arc::new(
|
||||||
PileValue::Array(x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?)
|
x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?,
|
||||||
}
|
)),
|
||||||
},
|
},
|
||||||
|
|
||||||
FieldSpecPost::SetCase { case: Case::Upper } => match val {
|
FieldSpecPost::SetCase { case: Case::Upper } => match val {
|
||||||
@@ -330,11 +328,13 @@ pub fn apply<'a>(post: &FieldSpecPost, val: &PileValue<'a>) -> Option<PileValue<
|
|||||||
PileValue::Blob { .. } => return None,
|
PileValue::Blob { .. } => return None,
|
||||||
PileValue::ObjectExtractor(_) => return None,
|
PileValue::ObjectExtractor(_) => return None,
|
||||||
PileValue::ListExtractor(_) => return None,
|
PileValue::ListExtractor(_) => return None,
|
||||||
PileValue::String(x) => PileValue::String(x.to_uppercase().into()),
|
PileValue::String(x) => PileValue::String(Arc::new(x.as_str().to_uppercase().into())),
|
||||||
|
|
||||||
PileValue::Array(x) => {
|
PileValue::Array(x) => PileValue::Array(Arc::new(
|
||||||
PileValue::Array(x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?)
|
x.iter()
|
||||||
}
|
.map(|x| apply(post, x))
|
||||||
|
.collect::<Option<Vec<_>>>()?,
|
||||||
|
)),
|
||||||
},
|
},
|
||||||
|
|
||||||
FieldSpecPost::TrimSuffix { trim_suffix } => match val {
|
FieldSpecPost::TrimSuffix { trim_suffix } => match val {
|
||||||
@@ -345,13 +345,15 @@ pub fn apply<'a>(post: &FieldSpecPost, val: &PileValue<'a>) -> Option<PileValue<
|
|||||||
PileValue::ObjectExtractor(_) => return None,
|
PileValue::ObjectExtractor(_) => return None,
|
||||||
PileValue::ListExtractor(_) => return None,
|
PileValue::ListExtractor(_) => return None,
|
||||||
|
|
||||||
PileValue::String(x) => {
|
PileValue::String(x) => PileValue::String(Arc::new(
|
||||||
PileValue::String(x.strip_suffix(trim_suffix).unwrap_or(x).into())
|
x.strip_suffix(trim_suffix).unwrap_or(x.as_str()).into(),
|
||||||
}
|
)),
|
||||||
|
|
||||||
PileValue::Array(x) => {
|
PileValue::Array(x) => PileValue::Array(Arc::new(
|
||||||
PileValue::Array(x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?)
|
x.iter()
|
||||||
}
|
.map(|x| apply(post, x))
|
||||||
|
.collect::<Option<Vec<_>>>()?,
|
||||||
|
)),
|
||||||
},
|
},
|
||||||
|
|
||||||
FieldSpecPost::TrimPrefix { trim_prefix } => match val {
|
FieldSpecPost::TrimPrefix { trim_prefix } => match val {
|
||||||
@@ -362,13 +364,15 @@ pub fn apply<'a>(post: &FieldSpecPost, val: &PileValue<'a>) -> Option<PileValue<
|
|||||||
PileValue::ObjectExtractor(_) => return None,
|
PileValue::ObjectExtractor(_) => return None,
|
||||||
PileValue::ListExtractor(_) => return None,
|
PileValue::ListExtractor(_) => return None,
|
||||||
|
|
||||||
PileValue::String(x) => {
|
PileValue::String(x) => PileValue::String(Arc::new(
|
||||||
PileValue::String(x.strip_prefix(trim_prefix).unwrap_or(x).into())
|
x.strip_prefix(trim_prefix).unwrap_or(x.as_str()).into(),
|
||||||
}
|
)),
|
||||||
|
|
||||||
PileValue::Array(x) => {
|
PileValue::Array(x) => PileValue::Array(Arc::new(
|
||||||
PileValue::Array(x.iter().map(|x| apply(post, x)).collect::<Option<_>>()?)
|
x.iter()
|
||||||
}
|
.map(|x| apply(post, x))
|
||||||
|
.collect::<Option<Vec<_>>>()?,
|
||||||
|
)),
|
||||||
},
|
},
|
||||||
|
|
||||||
FieldSpecPost::Join { join } => match val {
|
FieldSpecPost::Join { join } => match val {
|
||||||
@@ -381,7 +385,7 @@ pub fn apply<'a>(post: &FieldSpecPost, val: &PileValue<'a>) -> Option<PileValue<
|
|||||||
|
|
||||||
PileValue::String(x) => PileValue::String(x.clone()),
|
PileValue::String(x) => PileValue::String(x.clone()),
|
||||||
|
|
||||||
PileValue::Array(x) => PileValue::String(
|
PileValue::Array(x) => PileValue::String(Arc::new(
|
||||||
x.iter()
|
x.iter()
|
||||||
.map(|x| apply(post, x))
|
.map(|x| apply(post, x))
|
||||||
.map(|x| x.and_then(|x| x.as_str().map(|x| x.to_owned())))
|
.map(|x| x.and_then(|x| x.as_str().map(|x| x.to_owned())))
|
||||||
@@ -389,7 +393,7 @@ pub fn apply<'a>(post: &FieldSpecPost, val: &PileValue<'a>) -> Option<PileValue<
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.join(join)
|
.join(join)
|
||||||
.into(),
|
.into(),
|
||||||
),
|
)),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ use crate::source::{DirDataSource, S3DataSource};
|
|||||||
// MARK: item
|
// MARK: item
|
||||||
//
|
//
|
||||||
|
|
||||||
|
/// A cheaply-clonable pointer to an item in a dataset
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum Item {
|
pub enum Item {
|
||||||
File {
|
File {
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ pub async fn get_field(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let extractor = MetaExtractor::new(&item);
|
let extractor = MetaExtractor::new(&item);
|
||||||
let root: PileValue<'_> = PileValue::ObjectExtractor(Arc::new(extractor));
|
let root: PileValue = PileValue::ObjectExtractor(Arc::new(extractor));
|
||||||
|
|
||||||
let value = match root.query(&path).await {
|
let value = match root.query(&path).await {
|
||||||
Ok(Some(v)) => v,
|
Ok(Some(v)) => v,
|
||||||
|
|||||||
@@ -6,17 +6,18 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use crate::extract::{ListExtractor, ObjectExtractor};
|
use crate::extract::{ListExtractor, ObjectExtractor};
|
||||||
|
|
||||||
/// An immutable, lazily-computed value similar to [serde_json::Value].
|
/// An immutable, cheaply-clonable, lazily-computed value.
|
||||||
pub enum PileValue<'a> {
|
/// Very similar to [serde_json::Value].
|
||||||
|
pub enum PileValue {
|
||||||
Null,
|
Null,
|
||||||
U64(u64),
|
U64(u64),
|
||||||
I64(i64),
|
I64(i64),
|
||||||
|
|
||||||
/// A string
|
/// A string
|
||||||
String(SmartString<LazyCompact>),
|
String(Arc<SmartString<LazyCompact>>),
|
||||||
|
|
||||||
/// An array of values
|
/// An array of values
|
||||||
Array(Vec<PileValue<'a>>),
|
Array(Arc<Vec<PileValue>>),
|
||||||
|
|
||||||
/// A binary blob
|
/// A binary blob
|
||||||
Blob {
|
Blob {
|
||||||
@@ -25,13 +26,13 @@ pub enum PileValue<'a> {
|
|||||||
},
|
},
|
||||||
|
|
||||||
/// A lazily-computed map of {label: value}
|
/// A lazily-computed map of {label: value}
|
||||||
ObjectExtractor(Arc<dyn ObjectExtractor + 'a>),
|
ObjectExtractor(Arc<dyn ObjectExtractor>),
|
||||||
|
|
||||||
/// A lazily-computed array
|
/// A lazily-computed array
|
||||||
ListExtractor(Arc<dyn ListExtractor + 'a>),
|
ListExtractor(Arc<dyn ListExtractor>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for PileValue<'_> {
|
impl Clone for PileValue {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
match self {
|
match self {
|
||||||
Self::Null => Self::Null,
|
Self::Null => Self::Null,
|
||||||
@@ -49,15 +50,15 @@ impl Clone for PileValue<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> PileValue<'a> {
|
impl PileValue {
|
||||||
pub async fn query(&'a self, query: &ObjectPath) -> Result<Option<&'a Self>, std::io::Error> {
|
pub async fn query(&self, query: &ObjectPath) -> Result<Option<Self>, std::io::Error> {
|
||||||
let mut out = Some(self);
|
let mut out: Option<PileValue> = Some(self.clone());
|
||||||
|
|
||||||
for s in &query.segments {
|
for s in &query.segments {
|
||||||
match s {
|
match s {
|
||||||
PathSegment::Root => out = Some(self),
|
PathSegment::Root => out = Some(self.clone()),
|
||||||
PathSegment::Field(field) => {
|
PathSegment::Field(field) => {
|
||||||
out = match &out {
|
out = match out {
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
Some(Self::Null) => None,
|
Some(Self::Null) => None,
|
||||||
Some(Self::U64(_)) => None,
|
Some(Self::U64(_)) => None,
|
||||||
@@ -84,7 +85,7 @@ impl<'a> PileValue<'a> {
|
|||||||
usize::try_from(v.len() as i64 - idx).ok()
|
usize::try_from(v.len() as i64 - idx).ok()
|
||||||
};
|
};
|
||||||
|
|
||||||
idx.and_then(|idx| v.get(idx))
|
idx.and_then(|idx| v.get(idx)).cloned()
|
||||||
}
|
}
|
||||||
Some(Self::String(_)) => None,
|
Some(Self::String(_)) => None,
|
||||||
Some(Self::ObjectExtractor(_)) => None,
|
Some(Self::ObjectExtractor(_)) => None,
|
||||||
@@ -105,7 +106,7 @@ impl<'a> PileValue<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(out);
|
return Ok(out.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn as_str(&self) -> Option<&str> {
|
pub fn as_str(&self) -> Option<&str> {
|
||||||
@@ -129,7 +130,7 @@ impl<'a> PileValue<'a> {
|
|||||||
|
|
||||||
Self::Array(x) => {
|
Self::Array(x) => {
|
||||||
let mut arr = Vec::new();
|
let mut arr = Vec::new();
|
||||||
for item in x {
|
for item in &**x {
|
||||||
arr.push(Box::pin(item.to_json()).await?);
|
arr.push(Box::pin(item.to_json()).await?);
|
||||||
}
|
}
|
||||||
Value::Array(arr)
|
Value::Array(arr)
|
||||||
|
|||||||
Reference in New Issue
Block a user