lazily-evaluated extractors
This commit is contained in:
@@ -8,7 +8,11 @@ use rayon::{
|
||||
use std::{
|
||||
io::ErrorKind,
|
||||
path::PathBuf,
|
||||
sync::{Arc, mpsc::Receiver},
|
||||
sync::{
|
||||
Arc,
|
||||
atomic::{AtomicU64, Ordering},
|
||||
mpsc::Receiver,
|
||||
},
|
||||
thread::JoinHandle,
|
||||
time::Instant,
|
||||
};
|
||||
@@ -144,15 +148,14 @@ impl Dataset {
|
||||
|
||||
let mut total = 0u64;
|
||||
while let Ok(batch) = read_rx.recv() {
|
||||
let batch = batch.map_err(DatasetError::from)?;
|
||||
let len = batch.len() as u64;
|
||||
|
||||
let batch = batch?;
|
||||
if let Some(flag) = &flag
|
||||
&& flag.is_cancelled()
|
||||
{
|
||||
return Err(CancelableTaskError::Cancelled);
|
||||
}
|
||||
|
||||
let this = AtomicU64::new(0);
|
||||
let start = Instant::now();
|
||||
write_pool
|
||||
.install(|| {
|
||||
@@ -170,6 +173,7 @@ impl Dataset {
|
||||
}
|
||||
})
|
||||
.map(|(key, doc)| {
|
||||
this.fetch_add(1, Ordering::Relaxed);
|
||||
index_writer
|
||||
.add_document(doc)
|
||||
.map_err(|err| (key, err))
|
||||
@@ -180,9 +184,10 @@ impl Dataset {
|
||||
})
|
||||
.map_err(|(_key, err)| DatasetError::from(err))?;
|
||||
|
||||
total += len;
|
||||
let this = this.load(Ordering::Relaxed);
|
||||
total += this;
|
||||
let time_ms = start.elapsed().as_millis();
|
||||
debug!("Added a batch of {len} in {time_ms} ms ({total} total)");
|
||||
debug!("Added a batch of {this} in {time_ms} ms ({total} total)");
|
||||
}
|
||||
|
||||
if let Some(flag) = flag.as_ref()
|
||||
@@ -334,6 +339,13 @@ fn start_read_task(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !batch.is_empty() {
|
||||
match read_tx.send(Ok(batch)) {
|
||||
Ok(()) => {}
|
||||
Err(_) => return,
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
return (read_task, read_rx);
|
||||
|
||||
Reference in New Issue
Block a user