Consistent paths, disable sources

This commit is contained in:
2026-03-15 09:47:29 -07:00
parent 26a428dedc
commit 4ce563ae80
4 changed files with 37 additions and 5 deletions

View File

@@ -51,6 +51,10 @@ pub struct S3Credentials {
pub enum Source {
/// A directory of files
Filesystem {
/// If false, ignore this dataset
#[serde(default = "default_true")]
enabled: bool,
/// The directories to scan.
/// Must be relative.
path: PathBuf,
@@ -66,6 +70,10 @@ pub enum Source {
/// An S3-compatible object store bucket
S3 {
/// If false, ignore this dataset
#[serde(default = "default_true")]
enabled: bool,
bucket: String,
prefix: Option<String>,

View File

@@ -114,7 +114,15 @@ impl Datasets {
let mut sources = HashMap::new();
for (label, source) in &config.dataset.source {
match source {
Source::Filesystem { path, sidecars } => {
Source::Filesystem {
enabled,
path,
sidecars,
} => {
if !enabled {
continue;
}
sources.insert(
label.clone(),
Dataset::Dir(Arc::new(DirDataSource::new(
@@ -126,6 +134,7 @@ impl Datasets {
}
Source::S3 {
enabled,
bucket,
prefix,
endpoint,
@@ -133,6 +142,10 @@ impl Datasets {
credentials,
sidecars,
} => {
if !enabled {
continue;
}
match S3DataSource::new(
label,
bucket.clone(),

View File

@@ -92,10 +92,15 @@ impl S3DataSource {
async fn make_item(self: &Arc<Self>, key: impl Into<SmartString<LazyCompact>>) -> Item {
let key: SmartString<LazyCompact> = key.into();
let mime = mime_guess::from_path(key.as_str()).first_or_octet_stream();
let object_path = match &self.prefix {
Some(x) => format!("{x}/{key}").into(),
None => key.clone(),
};
let mime = mime_guess::from_path(object_path.as_str()).first_or_octet_stream();
let sidecar = if self.sidecars {
self.find_sidecar_key(key.as_str())
self.find_sidecar_key(object_path.as_str())
.await
.map(|sidecar_key| {
Box::new(Item::S3 {
@@ -124,11 +129,17 @@ impl DataSource for Arc<S3DataSource> {
return Ok(None);
}
let key: SmartString<LazyCompact> = key.into();
let key = match &self.prefix {
Some(x) => format!("{x}/{key}").into(),
None => key,
};
let result = self
.client
.head_object()
.bucket(self.bucket.as_str())
.key(key)
.key(key.as_str())
.send()
.await;

View File

@@ -58,7 +58,7 @@ impl CliCmd for AnnotateCommand {
for (name, source) in &ds.config.dataset.source {
match source {
Source::Filesystem { path, sidecars } => {
Source::Filesystem { path, sidecars, .. } => {
if !sidecars {
warn!("Source {name} does not have sidecars enabled, skipping");
continue;