diff --git a/crates/pile-config/src/lib.rs b/crates/pile-config/src/lib.rs index 76ef44f..678de58 100644 --- a/crates/pile-config/src/lib.rs +++ b/crates/pile-config/src/lib.rs @@ -51,6 +51,10 @@ pub struct S3Credentials { pub enum Source { /// A directory of files Filesystem { + /// If false, ignore this dataset + #[serde(default = "default_true")] + enabled: bool, + /// The directories to scan. /// Must be relative. path: PathBuf, @@ -66,6 +70,10 @@ pub enum Source { /// An S3-compatible object store bucket S3 { + /// If false, ignore this dataset + #[serde(default = "default_true")] + enabled: bool, + bucket: String, prefix: Option, diff --git a/crates/pile-dataset/src/dataset.rs b/crates/pile-dataset/src/dataset.rs index c3fc64d..240232e 100644 --- a/crates/pile-dataset/src/dataset.rs +++ b/crates/pile-dataset/src/dataset.rs @@ -114,7 +114,15 @@ impl Datasets { let mut sources = HashMap::new(); for (label, source) in &config.dataset.source { match source { - Source::Filesystem { path, sidecars } => { + Source::Filesystem { + enabled, + path, + sidecars, + } => { + if !enabled { + continue; + } + sources.insert( label.clone(), Dataset::Dir(Arc::new(DirDataSource::new( @@ -126,6 +134,7 @@ impl Datasets { } Source::S3 { + enabled, bucket, prefix, endpoint, @@ -133,6 +142,10 @@ impl Datasets { credentials, sidecars, } => { + if !enabled { + continue; + } + match S3DataSource::new( label, bucket.clone(), diff --git a/crates/pile-value/src/source/s3.rs b/crates/pile-value/src/source/s3.rs index e635580..b4eaf55 100644 --- a/crates/pile-value/src/source/s3.rs +++ b/crates/pile-value/src/source/s3.rs @@ -92,10 +92,15 @@ impl S3DataSource { async fn make_item(self: &Arc, key: impl Into>) -> Item { let key: SmartString = key.into(); - let mime = mime_guess::from_path(key.as_str()).first_or_octet_stream(); + let object_path = match &self.prefix { + Some(x) => format!("{x}/{key}").into(), + None => key.clone(), + }; + + let mime = mime_guess::from_path(object_path.as_str()).first_or_octet_stream(); let sidecar = if self.sidecars { - self.find_sidecar_key(key.as_str()) + self.find_sidecar_key(object_path.as_str()) .await .map(|sidecar_key| { Box::new(Item::S3 { @@ -124,11 +129,17 @@ impl DataSource for Arc { return Ok(None); } + let key: SmartString = key.into(); + let key = match &self.prefix { + Some(x) => format!("{x}/{key}").into(), + None => key, + }; + let result = self .client .head_object() .bucket(self.bucket.as_str()) - .key(key) + .key(key.as_str()) .send() .await; diff --git a/crates/pile/src/command/annotate.rs b/crates/pile/src/command/annotate.rs index 90912b3..4a5434d 100644 --- a/crates/pile/src/command/annotate.rs +++ b/crates/pile/src/command/annotate.rs @@ -58,7 +58,7 @@ impl CliCmd for AnnotateCommand { for (name, source) in &ds.config.dataset.source { match source { - Source::Filesystem { path, sidecars } => { + Source::Filesystem { path, sidecars, .. } => { if !sidecars { warn!("Source {name} does not have sidecars enabled, skipping"); continue;