Consistent paths, disable sources

This commit is contained in:
2026-03-15 09:47:29 -07:00
parent 26a428dedc
commit 4ce563ae80
4 changed files with 37 additions and 5 deletions

View File

@@ -51,6 +51,10 @@ pub struct S3Credentials {
pub enum Source { pub enum Source {
/// A directory of files /// A directory of files
Filesystem { Filesystem {
/// If false, ignore this dataset
#[serde(default = "default_true")]
enabled: bool,
/// The directories to scan. /// The directories to scan.
/// Must be relative. /// Must be relative.
path: PathBuf, path: PathBuf,
@@ -66,6 +70,10 @@ pub enum Source {
/// An S3-compatible object store bucket /// An S3-compatible object store bucket
S3 { S3 {
/// If false, ignore this dataset
#[serde(default = "default_true")]
enabled: bool,
bucket: String, bucket: String,
prefix: Option<String>, prefix: Option<String>,

View File

@@ -114,7 +114,15 @@ impl Datasets {
let mut sources = HashMap::new(); let mut sources = HashMap::new();
for (label, source) in &config.dataset.source { for (label, source) in &config.dataset.source {
match source { match source {
Source::Filesystem { path, sidecars } => { Source::Filesystem {
enabled,
path,
sidecars,
} => {
if !enabled {
continue;
}
sources.insert( sources.insert(
label.clone(), label.clone(),
Dataset::Dir(Arc::new(DirDataSource::new( Dataset::Dir(Arc::new(DirDataSource::new(
@@ -126,6 +134,7 @@ impl Datasets {
} }
Source::S3 { Source::S3 {
enabled,
bucket, bucket,
prefix, prefix,
endpoint, endpoint,
@@ -133,6 +142,10 @@ impl Datasets {
credentials, credentials,
sidecars, sidecars,
} => { } => {
if !enabled {
continue;
}
match S3DataSource::new( match S3DataSource::new(
label, label,
bucket.clone(), bucket.clone(),

View File

@@ -92,10 +92,15 @@ impl S3DataSource {
async fn make_item(self: &Arc<Self>, key: impl Into<SmartString<LazyCompact>>) -> Item { async fn make_item(self: &Arc<Self>, key: impl Into<SmartString<LazyCompact>>) -> Item {
let key: SmartString<LazyCompact> = key.into(); let key: SmartString<LazyCompact> = key.into();
let mime = mime_guess::from_path(key.as_str()).first_or_octet_stream(); let object_path = match &self.prefix {
Some(x) => format!("{x}/{key}").into(),
None => key.clone(),
};
let mime = mime_guess::from_path(object_path.as_str()).first_or_octet_stream();
let sidecar = if self.sidecars { let sidecar = if self.sidecars {
self.find_sidecar_key(key.as_str()) self.find_sidecar_key(object_path.as_str())
.await .await
.map(|sidecar_key| { .map(|sidecar_key| {
Box::new(Item::S3 { Box::new(Item::S3 {
@@ -124,11 +129,17 @@ impl DataSource for Arc<S3DataSource> {
return Ok(None); return Ok(None);
} }
let key: SmartString<LazyCompact> = key.into();
let key = match &self.prefix {
Some(x) => format!("{x}/{key}").into(),
None => key,
};
let result = self let result = self
.client .client
.head_object() .head_object()
.bucket(self.bucket.as_str()) .bucket(self.bucket.as_str())
.key(key) .key(key.as_str())
.send() .send()
.await; .await;

View File

@@ -58,7 +58,7 @@ impl CliCmd for AnnotateCommand {
for (name, source) in &ds.config.dataset.source { for (name, source) in &ds.config.dataset.source {
match source { match source {
Source::Filesystem { path, sidecars } => { Source::Filesystem { path, sidecars, .. } => {
if !sidecars { if !sidecars {
warn!("Source {name} does not have sidecars enabled, skipping"); warn!("Source {name} does not have sidecars enabled, skipping");
continue; continue;