Filter by mime
This commit is contained in:
@@ -7,7 +7,7 @@ use std::{
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ListExtractor,
|
||||
extract::traits::{ExtractState, ListExtractor},
|
||||
value::{Item, PileValue, SyncReadBridge},
|
||||
};
|
||||
|
||||
@@ -34,12 +34,20 @@ impl PdfPagesExtractor {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ListExtractor for PdfPagesExtractor {
|
||||
async fn get(&self, idx: usize) -> Result<Option<PileValue>, std::io::Error> {
|
||||
async fn get(
|
||||
&self,
|
||||
state: &ExtractState,
|
||||
idx: usize,
|
||||
) -> Result<Option<PileValue>, std::io::Error> {
|
||||
trace!(
|
||||
key = self.item.key().as_str(),
|
||||
"Getting index {idx} from PdfPagesExtractor",
|
||||
);
|
||||
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "application/pdf" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let bytes = self.get_bytes().await?;
|
||||
let png = tokio::task::spawn_blocking(move || {
|
||||
let pdfium = Pdfium::default();
|
||||
@@ -81,7 +89,11 @@ impl ListExtractor for PdfPagesExtractor {
|
||||
Ok(Some(value))
|
||||
}
|
||||
|
||||
async fn len(&self) -> Result<usize, std::io::Error> {
|
||||
async fn len(&self, state: &ExtractState) -> Result<usize, std::io::Error> {
|
||||
if !state.ignore_mime && self.item.mime().essence_str() != "application/pdf" {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let bytes = self.get_bytes().await?;
|
||||
let count = tokio::task::spawn_blocking(move || {
|
||||
let pdfium = Pdfium::default();
|
||||
@@ -103,10 +115,10 @@ impl ListExtractor for PdfPagesExtractor {
|
||||
|
||||
// Override, extracting all pages is very slow,
|
||||
// and we can't display binary in json anyway
|
||||
async fn to_json(&self) -> Result<serde_json::Value, std::io::Error> {
|
||||
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
|
||||
Ok(serde_json::Value::String(format!(
|
||||
"<PdfPages ({} pages)>",
|
||||
self.len().await?
|
||||
self.len(state).await?
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user