From 48262bab487f5a1e6ded33242ee5d37b30751baa Mon Sep 17 00:00:00 2001 From: rm-dr <96270320+rm-dr@users.noreply.github.com> Date: Sat, 21 Mar 2026 09:32:22 -0700 Subject: [PATCH] Exclude large strings --- .../pile-value/src/extract/item/epub/mod.rs | 29 +++++++++++++++++++ crates/pile-value/src/extract/item/pdf/mod.rs | 29 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/crates/pile-value/src/extract/item/epub/mod.rs b/crates/pile-value/src/extract/item/epub/mod.rs index ccf96a7..a67e4f4 100644 --- a/crates/pile-value/src/extract/item/epub/mod.rs +++ b/crates/pile-value/src/extract/item/epub/mod.rs @@ -54,4 +54,33 @@ impl ObjectExtractor for EpubExtractor { Label::new("meta").unwrap(), ]) } + + async fn to_json(&self, state: &ExtractState) -> Result { + let keys = self.fields().await?; + let mut map = serde_json::Map::new(); + for k in &keys { + let v = match self.field(state, k, None).await? { + Some(x) => x, + None => continue, + }; + + if k.as_str() == "text" { + map.insert( + k.to_string(), + serde_json::Value::String(format!( + " x.len(), + _ => 0, + } + )), + ); + continue; + } + + map.insert(k.to_string(), Box::pin(v.to_json(state)).await?); + } + + Ok(serde_json::Value::Object(map)) + } } diff --git a/crates/pile-value/src/extract/item/pdf/mod.rs b/crates/pile-value/src/extract/item/pdf/mod.rs index f603535..cdb23a4 100644 --- a/crates/pile-value/src/extract/item/pdf/mod.rs +++ b/crates/pile-value/src/extract/item/pdf/mod.rs @@ -68,4 +68,33 @@ impl ObjectExtractor for PdfExtractor { Label::new("pages").unwrap(), ]) } + + async fn to_json(&self, state: &ExtractState) -> Result { + let keys = self.fields().await?; + let mut map = serde_json::Map::new(); + for k in &keys { + let v = match self.field(state, k, None).await? { + Some(x) => x, + None => continue, + }; + + if k.as_str() == "text" { + map.insert( + k.to_string(), + serde_json::Value::String(format!( + " x.len(), + _ => 0, + } + )), + ); + continue; + } + + map.insert(k.to_string(), Box::pin(v.to_json(state)).await?); + } + + Ok(serde_json::Value::Object(map)) + } }