use mime::Mime; use pile_config::Label; use pile_io::{SyncReadBridge, chacha::ChaChaReaderv1Async}; use smartstring::{LazyCompact, SmartString}; use std::{collections::HashMap, fs::File, path::PathBuf, sync::Arc}; use crate::{ source::{DirDataSource, S3DataSource, encrypt_path}, value::ItemReader, }; // // MARK: item // /// A cheaply-cloneable pointer to an item in a dataset #[derive(Debug, Clone)] pub enum Item { File { source: Arc, mime: Mime, path: PathBuf, group: Arc>>, }, S3 { source: Arc, mime: Mime, key: SmartString, group: Arc>>, }, } impl Item { /// Open the item for reading. For S3, performs a HEAD request to determine /// the object size. pub async fn read(&self) -> Result { Ok(match self { Self::File { path, .. } => ItemReader::File(File::open(path)?), Self::S3 { source, key, .. } => { let logical_key = key.as_str(); let s3_key_part: SmartString = match &source.encryption_key { None => logical_key.into(), Some(enc_key) => encrypt_path(enc_key, logical_key).into(), }; let full_key: SmartString = match &source.prefix { None => s3_key_part, Some(p) => { if p.ends_with('/') { format!("{p}{s3_key_part}").into() } else { format!("{p}/{s3_key_part}").into() } } }; let reader = source.client.get(&full_key).await?; match source.encryption_key { None => ItemReader::S3(reader), Some(enc_key) => { ItemReader::EncryptedS3(ChaChaReaderv1Async::new(reader, enc_key).await?) } } } }) } pub fn source_name(&self) -> &pile_config::Label { match self { Self::File { source, .. } => &source.name, Self::S3 { source, .. } => &source.name, } } #[expect(clippy::expect_used)] pub fn key(&self) -> SmartString { match self { Self::File { source, path, .. } => path .strip_prefix(&source.dir) .expect("item must be inside source") .to_str() .expect("path is not utf-8") .into(), Self::S3 { key, .. } => key.clone(), } } pub async fn hash(&self) -> Result { let read = self.read().await?; let mut read = SyncReadBridge::new_current(read); let out = tokio::task::spawn_blocking(move || { let mut hasher = blake3::Hasher::new(); std::io::copy(&mut read, &mut hasher)?; return Ok::<_, std::io::Error>(hasher.finalize()); }) .await??; return Ok(out); } pub fn mime(&self) -> &Mime { match self { Self::File { mime, .. } => mime, Self::S3 { mime, .. } => mime, } } pub fn group(&self) -> &HashMap> { match self { Self::File { group, .. } => group, Self::S3 { group, .. } => group, } } }