Exclude large strings

This commit is contained in:
2026-03-21 09:32:22 -07:00
parent b6cb5870b4
commit 48262bab48
2 changed files with 58 additions and 0 deletions

View File

@@ -54,4 +54,33 @@ impl ObjectExtractor for EpubExtractor {
Label::new("meta").unwrap(),
])
}
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
let keys = self.fields().await?;
let mut map = serde_json::Map::new();
for k in &keys {
let v = match self.field(state, k, None).await? {
Some(x) => x,
None => continue,
};
if k.as_str() == "text" {
map.insert(
k.to_string(),
serde_json::Value::String(format!(
"<String ({} bytes)",
match v {
PileValue::String(x) => x.len(),
_ => 0,
}
)),
);
continue;
}
map.insert(k.to_string(), Box::pin(v.to_json(state)).await?);
}
Ok(serde_json::Value::Object(map))
}
}

View File

@@ -68,4 +68,33 @@ impl ObjectExtractor for PdfExtractor {
Label::new("pages").unwrap(),
])
}
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
let keys = self.fields().await?;
let mut map = serde_json::Map::new();
for k in &keys {
let v = match self.field(state, k, None).await? {
Some(x) => x,
None => continue,
};
if k.as_str() == "text" {
map.insert(
k.to_string(),
serde_json::Value::String(format!(
"<String ({} bytes)",
match v {
PileValue::String(x) => x.len(),
_ => 0,
}
)),
);
continue;
}
map.insert(k.to_string(), Box::pin(v.to_json(state)).await?);
}
Ok(serde_json::Value::Object(map))
}
}