Add item subcommand
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use pile_config::{ConfigToml, Label, Source, objectpath::ObjectPath};
|
||||
use pile_config::{
|
||||
ConfigToml, DatasetConfig, Label, Source, objectpath::ObjectPath, pattern::GroupPattern,
|
||||
};
|
||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||
use pile_value::{
|
||||
extract::traits::ExtractState,
|
||||
@@ -66,15 +68,109 @@ impl Dataset {
|
||||
|
||||
/// An opened dataset: config, working directory, and all opened sources.
|
||||
pub struct Datasets {
|
||||
pub path_config: PathBuf,
|
||||
pub path_config: Option<PathBuf>,
|
||||
pub path_parent: PathBuf,
|
||||
pub path_workdir: PathBuf,
|
||||
pub path_workdir: Option<PathBuf>,
|
||||
|
||||
pub config: ConfigToml,
|
||||
pub sources: HashMap<Label, Dataset>,
|
||||
}
|
||||
|
||||
impl Datasets {
|
||||
#[expect(clippy::unwrap_used)]
|
||||
pub fn virt_source() -> Label {
|
||||
Label::new("virtual-source").unwrap()
|
||||
}
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
pub async fn virt(parent: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
|
||||
let path_parent = parent.into();
|
||||
|
||||
let config = ConfigToml {
|
||||
dataset: DatasetConfig {
|
||||
name: Label::new("virtual-dataset").unwrap(),
|
||||
working_dir: None,
|
||||
|
||||
source: [(
|
||||
Self::virt_source(),
|
||||
Source::Filesystem {
|
||||
enabled: true,
|
||||
path: path_parent.clone(),
|
||||
pattern: GroupPattern::default(),
|
||||
},
|
||||
)]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
},
|
||||
schema: HashMap::new(),
|
||||
fts: None,
|
||||
};
|
||||
|
||||
let mut sources = HashMap::new();
|
||||
for (label, source) in &config.dataset.source {
|
||||
match source {
|
||||
Source::Filesystem {
|
||||
enabled,
|
||||
path,
|
||||
pattern,
|
||||
} => {
|
||||
if !enabled {
|
||||
continue;
|
||||
}
|
||||
|
||||
sources.insert(
|
||||
label.clone(),
|
||||
Dataset::Dir(
|
||||
DirDataSource::new(label, path_parent.join(path), pattern.clone())
|
||||
.await?,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
Source::S3 {
|
||||
enabled,
|
||||
bucket,
|
||||
prefix,
|
||||
endpoint,
|
||||
region,
|
||||
credentials,
|
||||
pattern,
|
||||
} => {
|
||||
if !enabled {
|
||||
continue;
|
||||
}
|
||||
|
||||
match S3DataSource::new(
|
||||
label,
|
||||
bucket.clone(),
|
||||
prefix.clone(),
|
||||
endpoint.clone(),
|
||||
region.clone(),
|
||||
credentials,
|
||||
pattern.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(ds) => {
|
||||
sources.insert(label.clone(), Dataset::S3(ds));
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Could not open S3 source {label}: {err}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(Self {
|
||||
path_config: None,
|
||||
path_workdir: None,
|
||||
path_parent,
|
||||
config,
|
||||
sources,
|
||||
});
|
||||
}
|
||||
|
||||
pub async fn open(config: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
|
||||
let path_config = config.into();
|
||||
let path_parent = path_config
|
||||
@@ -168,9 +264,9 @@ impl Datasets {
|
||||
}
|
||||
|
||||
return Ok(Self {
|
||||
path_config,
|
||||
path_config: Some(path_config),
|
||||
path_workdir: Some(path_workdir),
|
||||
path_parent,
|
||||
path_workdir,
|
||||
config,
|
||||
sources,
|
||||
});
|
||||
@@ -216,8 +312,16 @@ impl Datasets {
|
||||
_threads: usize,
|
||||
flag: Option<CancelFlag>,
|
||||
) -> Result<(), CancelableTaskError<DatasetError>> {
|
||||
let fts_tmp_dir = self.path_workdir.join(".tmp-fts");
|
||||
let fts_dir = self.path_workdir.join("fts");
|
||||
let workdir = match self.path_workdir.as_ref() {
|
||||
Some(x) => x,
|
||||
None => {
|
||||
warn!("Skipping fts_refresh, no workdir");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let fts_tmp_dir = workdir.join(".tmp-fts");
|
||||
let fts_dir = workdir.join("fts");
|
||||
|
||||
if fts_tmp_dir.is_dir() {
|
||||
warn!("Removing temporary index in {}", fts_dir.display());
|
||||
@@ -315,7 +419,15 @@ impl Datasets {
|
||||
query: &str,
|
||||
top_n: usize,
|
||||
) -> Result<Vec<FtsLookupResult>, DatasetError> {
|
||||
let fts_dir = self.path_workdir.join("fts");
|
||||
let workdir = match self.path_workdir.as_ref() {
|
||||
Some(x) => x,
|
||||
None => {
|
||||
warn!("Skipping fts_lookup, no workdir");
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
};
|
||||
|
||||
let fts_dir = workdir.join("fts");
|
||||
|
||||
if !fts_dir.exists() {
|
||||
return Err(DatasetError::NoFtsIndex);
|
||||
@@ -335,7 +447,12 @@ impl Datasets {
|
||||
|
||||
/// Time at which fts was created
|
||||
pub fn ts_fts(&self) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
||||
let fts_dir = self.path_workdir.join("fts");
|
||||
let workdir = match self.path_workdir.as_ref() {
|
||||
Some(x) => x,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let fts_dir = workdir.join("fts");
|
||||
|
||||
if !fts_dir.exists() {
|
||||
return Ok(None);
|
||||
|
||||
Reference in New Issue
Block a user