Add text extractor
This commit is contained in:
@@ -28,6 +28,9 @@ pub use toml::*;
|
||||
mod group;
|
||||
pub use group::*;
|
||||
|
||||
mod text;
|
||||
pub use text::*;
|
||||
|
||||
use crate::{
|
||||
extract::{
|
||||
misc::MapExtractor,
|
||||
@@ -77,6 +80,10 @@ impl ItemExtractor {
|
||||
Label::new("toml").unwrap(),
|
||||
PileValue::ObjectExtractor(Arc::new(TomlExtractor::new(item))),
|
||||
),
|
||||
(
|
||||
Label::new("text").unwrap(),
|
||||
PileValue::ObjectExtractor(Arc::new(TextExtractor::new(item))),
|
||||
),
|
||||
(
|
||||
Label::new("groups").unwrap(),
|
||||
PileValue::ObjectExtractor(Arc::new(GroupExtractor::new(item))),
|
||||
@@ -110,6 +117,7 @@ impl ObjectExtractor for ItemExtractor {
|
||||
Label::new("pdf").unwrap(),
|
||||
Label::new("json").unwrap(),
|
||||
Label::new("toml").unwrap(),
|
||||
Label::new("text").unwrap(),
|
||||
Label::new("groups").unwrap(),
|
||||
]);
|
||||
}
|
||||
|
||||
67
crates/pile-value/src/extract/item/text.rs
Normal file
67
crates/pile-value/src/extract/item/text.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use pile_config::Label;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
|
||||
use crate::{
|
||||
extract::traits::{ExtractState, ObjectExtractor},
|
||||
value::{AsyncReader, Item, PileValue},
|
||||
};
|
||||
|
||||
pub struct TextExtractor {
|
||||
item: Item,
|
||||
output: OnceLock<PileValue>,
|
||||
}
|
||||
|
||||
impl TextExtractor {
|
||||
pub fn new(item: &Item) -> Self {
|
||||
Self {
|
||||
item: item.clone(),
|
||||
output: OnceLock::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ObjectExtractor for TextExtractor {
|
||||
async fn field(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
name: &Label,
|
||||
args: Option<&str>,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
if args.is_some() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if !state.ignore_mime
|
||||
&& (self.item.mime().type_() != mime::TEXT
|
||||
&& self.item.mime().type_() != mime::APPLICATION)
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if name.as_str() != "text" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
{
|
||||
if let Some(x) = self.output.get() {
|
||||
return Ok(Some(x.clone()));
|
||||
}
|
||||
|
||||
let mut reader = self.item.read().await?;
|
||||
let bytes = reader.read_to_end().await?;
|
||||
let string = String::from_utf8(bytes).ok();
|
||||
let value = match string {
|
||||
Some(x) => PileValue::String(Arc::new(x.into())),
|
||||
None => PileValue::Null,
|
||||
};
|
||||
|
||||
return Ok(Some(self.output.get_or_init(|| value).clone()));
|
||||
}
|
||||
}
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
|
||||
Ok(vec![Label::new("text").unwrap()])
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user