use mime::Mime; use smartstring::{LazyCompact, SmartString}; use std::{fs::File, path::PathBuf, sync::Arc}; use crate::{ source::{DirDataSource, S3DataSource}, value::{ItemReader, S3Reader}, }; // // MARK: item // /// A cheaply-cloneable pointer to an item in a dataset #[derive(Debug, Clone)] pub enum Item { File { source: Arc, mime: Mime, path: PathBuf, sidecar: Option>, }, S3 { source: Arc, mime: Mime, key: SmartString, sidecar: Option>, }, } impl Item { /// Open the item for reading. For S3, performs a HEAD request to determine /// the object size. pub async fn read(&self) -> Result { Ok(match self { Self::File { path, .. } => ItemReader::File(File::open(path)?), Self::S3 { source, key, .. } => { let head = source .client .head_object() .bucket(source.bucket.as_str()) .key(key.as_str()) .send() .await .map_err(std::io::Error::other)?; let size = head.content_length().unwrap_or(0) as u64; ItemReader::S3(S3Reader { client: source.client.clone(), bucket: source.bucket.clone(), key: key.to_owned(), cursor: 0, size, }) } }) } pub fn source_name(&self) -> &pile_config::Label { match self { Self::File { source, .. } => &source.name, Self::S3 { source, .. } => &source.name, } } #[expect(clippy::expect_used)] pub fn key(&self) -> SmartString { match self { Self::File { path, .. } => path.to_str().expect("path is not utf-8").into(), Self::S3 { key, .. } => key.clone(), } } pub fn hash(&self) -> Result { match self { Self::File { path, .. } => { let mut hasher = blake3::Hasher::new(); let mut file = std::fs::File::open(path)?; std::io::copy(&mut file, &mut hasher)?; return Ok(hasher.finalize()); } Self::S3 { .. } => todo!(), } } pub fn mime(&self) -> &Mime { match self { Self::File { mime, .. } => mime, Self::S3 { mime, .. } => mime, } } pub fn sidecar(&self) -> Option<&Self> { match self { Self::File { sidecar, .. } => sidecar.as_ref().map(|x| &**x), Self::S3 { sidecar, .. } => sidecar.as_ref().map(|x| &**x), } } }