TMP sidecars
Some checks failed
CI / Typos (push) Successful in 30s
CI / Clippy (push) Failing after 1m13s
CI / Build and test (all features) (push) Successful in 4m25s
CI / Build and test (push) Successful in 5m45s

This commit is contained in:
2026-03-16 20:26:41 -07:00
parent f2f5726d7b
commit e5193a1114
12 changed files with 276 additions and 272 deletions

View File

@@ -25,9 +25,6 @@ mod toml;
use pile_config::Label;
pub use toml::*;
mod sidecar;
pub use sidecar::*;
use crate::{
extract::{
misc::MapExtractor,
@@ -77,10 +74,6 @@ impl ItemExtractor {
Label::new("toml").unwrap(),
PileValue::ObjectExtractor(Arc::new(TomlExtractor::new(item))),
),
(
Label::new("sidecar").unwrap(),
PileValue::ObjectExtractor(Arc::new(SidecarExtractor::new(item))),
),
]),
};
@@ -109,7 +102,6 @@ impl ObjectExtractor for ItemExtractor {
Label::new("exif").unwrap(),
Label::new("pdf").unwrap(),
Label::new("json").unwrap(),
Label::new("sidecar").unwrap(),
]);
}
}

View File

@@ -1,57 +0,0 @@
use pile_config::Label;
use std::sync::OnceLock;
use tracing::trace;
use super::TomlExtractor;
use crate::{
extract::traits::{ExtractState, ObjectExtractor},
value::{Item, PileValue},
};
pub struct SidecarExtractor {
item: Item,
output: OnceLock<Option<TomlExtractor>>,
}
impl SidecarExtractor {
pub fn new(item: &Item) -> Self {
Self {
item: item.clone(),
output: OnceLock::new(),
}
}
}
#[async_trait::async_trait]
impl ObjectExtractor for SidecarExtractor {
async fn field(
&self,
state: &ExtractState,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
trace!(
?args,
key = self.item.key().as_str(),
"Getting field {name:?} from SidecarExtractor",
);
match self
.output
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new))
{
Some(x) => Ok(x.field(state, name, args).await?),
None => Ok(Some(PileValue::Null)),
}
}
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
match self
.output
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new))
{
Some(x) => Ok(x.fields().await?),
None => Ok(Vec::new()),
}
}
}

View File

@@ -1,5 +1,5 @@
use chrono::{DateTime, Utc};
use pile_config::Label;
use pile_config::{Label, pattern::GroupPattern};
use std::{path::PathBuf, sync::Arc};
use tokio_stream::wrappers::ReceiverStream;
use walkdir::WalkDir;
@@ -13,16 +13,15 @@ use crate::{
pub struct DirDataSource {
pub name: Label,
pub dir: PathBuf,
pub sidecars: bool,
pub pattern: GroupPattern,
}
impl DirDataSource {
pub fn new(name: &Label, dir: PathBuf, sidecars: bool) -> Self {
pub fn new(name: &Label, dir: PathBuf, pattern: GroupPattern) -> Self {
Self {
name: name.clone(),
dir,
sidecars,
pattern,
}
}
}
@@ -38,29 +37,11 @@ impl DataSource for Arc<DirDataSource> {
return Ok(None);
}
// Ignore toml files if sidecars are enabled
if self.sidecars && key.extension().and_then(|x| x.to_str()) == Some("toml") {
return Ok(None);
}
return Ok(Some(Item::File {
source: Arc::clone(self),
mime: mime_guess::from_path(&key).first_or_octet_stream(),
path: key.clone(),
sidecar: self
.sidecars
.then(|| {
let sidecar_path = key.with_extension("toml");
sidecar_path.is_file().then(|| {
Box::new(Item::File {
source: Arc::clone(self),
mime: mime_guess::from_path(&sidecar_path).first_or_octet_stream(),
path: sidecar_path,
sidecar: None,
})
})
})
.flatten(),
group: todo!(),
}));
}
@@ -91,27 +72,12 @@ impl DataSource for Arc<DirDataSource> {
let item = match path.extension().and_then(|x| x.to_str()) {
None => continue,
Some("toml") if source.sidecars => continue,
Some(_) => Item::File {
source: Arc::clone(&source),
mime: mime_guess::from_path(&path).first_or_octet_stream(),
path: path.clone(),
sidecar: source
.sidecars
.then(|| {
let sidecar_path = path.with_extension("toml");
sidecar_path.is_file().then(|| {
Box::new(Item::File {
source: Arc::clone(&source),
mime: mime_guess::from_path(&sidecar_path)
.first_or_octet_stream(),
path: sidecar_path,
sidecar: None,
})
})
})
.flatten(),
group: todo!(),
},
};

View File

@@ -1,6 +1,6 @@
use aws_sdk_s3::config::{BehaviorVersion, Credentials, Region};
use chrono::{DateTime, Utc};
use pile_config::{Label, S3Credentials};
use pile_config::{Label, S3Credentials, pattern::GroupPattern};
use smartstring::{LazyCompact, SmartString};
use std::sync::Arc;
use tokio_stream::wrappers::ReceiverStream;
@@ -12,8 +12,8 @@ pub struct S3DataSource {
pub name: Label,
pub bucket: SmartString<LazyCompact>,
pub prefix: Option<SmartString<LazyCompact>>,
pub sidecars: bool,
pub client: Arc<aws_sdk_s3::Client>,
pub pattern: GroupPattern,
}
impl S3DataSource {
@@ -24,7 +24,7 @@ impl S3DataSource {
endpoint: Option<String>,
region: String,
credentials: &S3Credentials,
sidecars: bool,
pattern: GroupPattern,
) -> Result<Self, std::io::Error> {
let client = {
let creds = Credentials::new(
@@ -51,8 +51,8 @@ impl S3DataSource {
name: name.clone(),
bucket: bucket.into(),
prefix: prefix.map(|x| x.into()),
sidecars,
client: Arc::new(client),
pattern,
})
}
@@ -99,36 +99,17 @@ impl S3DataSource {
let mime = mime_guess::from_path(object_path.as_str()).first_or_octet_stream();
let sidecar = if self.sidecars {
self.find_sidecar_key(object_path.as_str())
.await
.map(|sidecar_key| {
Box::new(Item::S3 {
source: Arc::clone(self),
mime: mime_guess::from_path(sidecar_key.as_str()).first_or_octet_stream(),
key: sidecar_key,
sidecar: None,
})
})
} else {
None
};
Item::S3 {
source: Arc::clone(self),
mime,
key,
sidecar,
group: todo!(),
}
}
}
impl DataSource for Arc<S3DataSource> {
async fn get(&self, key: &str) -> Result<Option<Item>, std::io::Error> {
if self.sidecars && key.ends_with(".toml") {
return Ok(None);
}
let key: SmartString<LazyCompact> = key.into();
let key = match &self.prefix {
Some(x) => format!("{x}/{key}").into(),
@@ -196,10 +177,6 @@ impl DataSource for Arc<S3DataSource> {
None => continue,
};
if source.sidecars && key.ends_with(".toml") {
continue;
}
let item = source.make_item(key).await;
if tx.send(Ok(item)).await.is_err() {

View File

@@ -1,6 +1,7 @@
use mime::Mime;
use pile_config::Label;
use smartstring::{LazyCompact, SmartString};
use std::{fs::File, path::PathBuf, sync::Arc};
use std::{collections::HashMap, fs::File, path::PathBuf, sync::Arc};
use crate::{
source::{DirDataSource, S3DataSource},
@@ -19,7 +20,7 @@ pub enum Item {
mime: Mime,
path: PathBuf,
sidecar: Option<Box<Item>>,
group: Arc<HashMap<Label, Box<Item>>>,
},
S3 {
@@ -27,7 +28,7 @@ pub enum Item {
mime: Mime,
key: SmartString<LazyCompact>,
sidecar: Option<Box<Item>>,
group: Arc<HashMap<Label, Box<Item>>>,
},
}
@@ -96,10 +97,10 @@ impl Item {
}
}
pub fn sidecar(&self) -> Option<&Self> {
pub fn group(&self) -> &HashMap<Label, Box<Self>> {
match self {
Self::File { sidecar, .. } => sidecar.as_ref().map(|x| &**x),
Self::S3 { sidecar, .. } => sidecar.as_ref().map(|x| &**x),
Self::File { group, .. } => group,
Self::S3 { group, .. } => group,
}
}
}