Refactor sidecars
This commit is contained in:
@@ -11,7 +11,6 @@ use std::{collections::HashMap, io::ErrorKind, path::PathBuf, sync::Arc, time::I
|
||||
use tantivy::{Executor, Index, IndexWriter, TantivyError, collector::TopDocs};
|
||||
use thiserror::Error;
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_stream::{StreamExt, wrappers::ReceiverStream};
|
||||
use tracing::{debug, info, trace, warn};
|
||||
|
||||
use crate::index::{DbFtsIndex, FtsLookupResult};
|
||||
@@ -46,10 +45,10 @@ impl Dataset {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> ReceiverStream<Result<Item, std::io::Error>> {
|
||||
pub fn iter(&self) -> Box<dyn Iterator<Item = &Item> + Send + '_> {
|
||||
match self {
|
||||
Self::Dir(ds) => ds.iter(),
|
||||
Self::S3(ds) => ds.iter(),
|
||||
Self::Dir(ds) => Box::new(ds.iter()),
|
||||
Self::S3(ds) => Box::new(ds.iter()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,7 +75,7 @@ pub struct Datasets {
|
||||
}
|
||||
|
||||
impl Datasets {
|
||||
pub fn open(config: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
|
||||
pub async fn open(config: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
|
||||
let path_config = config.into();
|
||||
let path_parent = path_config
|
||||
.parent()
|
||||
@@ -118,7 +117,7 @@ impl Datasets {
|
||||
Source::Filesystem {
|
||||
enabled,
|
||||
path,
|
||||
sidecars,
|
||||
pattern,
|
||||
} => {
|
||||
if !enabled {
|
||||
continue;
|
||||
@@ -126,11 +125,10 @@ impl Datasets {
|
||||
|
||||
sources.insert(
|
||||
label.clone(),
|
||||
Dataset::Dir(Arc::new(DirDataSource::new(
|
||||
label,
|
||||
path_parent.join(path),
|
||||
*sidecars,
|
||||
))),
|
||||
Dataset::Dir(
|
||||
DirDataSource::new(label, path_parent.join(path), pattern.clone())
|
||||
.await?,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -141,7 +139,7 @@ impl Datasets {
|
||||
endpoint,
|
||||
region,
|
||||
credentials,
|
||||
sidecars,
|
||||
pattern,
|
||||
} => {
|
||||
if !enabled {
|
||||
continue;
|
||||
@@ -154,10 +152,12 @@ impl Datasets {
|
||||
endpoint.clone(),
|
||||
region.clone(),
|
||||
credentials,
|
||||
*sidecars,
|
||||
) {
|
||||
pattern.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(ds) => {
|
||||
sources.insert(label.clone(), Dataset::S3(Arc::new(ds)));
|
||||
sources.insert(label.clone(), Dataset::S3(ds));
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Could not open S3 source {label}: {err}");
|
||||
@@ -258,17 +258,17 @@ impl Datasets {
|
||||
for (name, dataset) in &self.sources {
|
||||
info!("Loading source {name}");
|
||||
|
||||
let mut stream = dataset.iter();
|
||||
while let Some(item_result) = stream.next().await {
|
||||
let stream = dataset.iter();
|
||||
for item in stream {
|
||||
if let Some(flag) = &flag
|
||||
&& flag.is_cancelled()
|
||||
{
|
||||
return Err(CancelableTaskError::Cancelled);
|
||||
}
|
||||
|
||||
let item = item_result.map_err(DatasetError::from)?;
|
||||
let db = Arc::clone(&db_index);
|
||||
let state = state.clone();
|
||||
let item = item.clone();
|
||||
join_set.spawn(async move {
|
||||
let key = item.key();
|
||||
let result = db.entry_to_document(&state, &item).await;
|
||||
|
||||
Reference in New Issue
Block a user