Files
pile/crates/pile-value/src/value/item.rs
2026-03-23 21:09:22 -07:00

120 lines
2.8 KiB
Rust

use mime::Mime;
use pile_config::Label;
use pile_io::{SyncReadBridge, chacha::ChaChaReaderv1Async};
use smartstring::{LazyCompact, SmartString};
use std::{collections::HashMap, fs::File, path::PathBuf, sync::Arc};
use crate::{
source::{DirDataSource, S3DataSource, encrypt_path},
value::ItemReader,
};
//
// MARK: item
//
/// A cheaply-cloneable pointer to an item in a dataset
#[derive(Debug, Clone)]
pub enum Item {
File {
source: Arc<DirDataSource>,
mime: Mime,
path: PathBuf,
group: Arc<HashMap<Label, Box<Item>>>,
},
S3 {
source: Arc<S3DataSource>,
mime: Mime,
key: SmartString<LazyCompact>,
group: Arc<HashMap<Label, Box<Item>>>,
},
}
impl Item {
/// Open the item for reading. For S3, performs a HEAD request to determine
/// the object size.
pub async fn read(&self) -> Result<ItemReader, std::io::Error> {
Ok(match self {
Self::File { path, .. } => ItemReader::File(File::open(path)?),
Self::S3 { source, key, .. } => {
let logical_key = key.as_str();
let s3_key_part: SmartString<LazyCompact> = match &source.encryption_key {
None => logical_key.into(),
Some(enc_key) => encrypt_path(enc_key, logical_key).into(),
};
let full_key: SmartString<LazyCompact> = match &source.prefix {
None => s3_key_part,
Some(p) => {
if p.ends_with('/') {
format!("{p}{s3_key_part}").into()
} else {
format!("{p}/{s3_key_part}").into()
}
}
};
let reader = source.client.get(&full_key).await?;
match source.encryption_key {
None => ItemReader::S3(reader),
Some(enc_key) => {
ItemReader::EncryptedS3(ChaChaReaderv1Async::new(reader, enc_key).await?)
}
}
}
})
}
pub fn source_name(&self) -> &pile_config::Label {
match self {
Self::File { source, .. } => &source.name,
Self::S3 { source, .. } => &source.name,
}
}
#[expect(clippy::expect_used)]
pub fn key(&self) -> SmartString<LazyCompact> {
match self {
Self::File { source, path, .. } => path
.strip_prefix(&source.dir)
.expect("item must be inside source")
.to_str()
.expect("path is not utf-8")
.into(),
Self::S3 { key, .. } => key.clone(),
}
}
pub async fn hash(&self) -> Result<blake3::Hash, std::io::Error> {
let read = self.read().await?;
let mut read = SyncReadBridge::new_current(read);
let out = tokio::task::spawn_blocking(move || {
let mut hasher = blake3::Hasher::new();
std::io::copy(&mut read, &mut hasher)?;
return Ok::<_, std::io::Error>(hasher.finalize());
})
.await??;
return Ok(out);
}
pub fn mime(&self) -> &Mime {
match self {
Self::File { mime, .. } => mime,
Self::S3 { mime, .. } => mime,
}
}
pub fn group(&self) -> &HashMap<Label, Box<Self>> {
match self {
Self::File { group, .. } => group,
Self::S3 { group, .. } => group,
}
}
}