use aws_sdk_s3::{error::SdkError, operation::get_object::GetObjectError}; use mime::Mime; use std::io::{Error as IoError, Seek, SeekFrom, Write}; use thiserror::Error; use super::S3Client; use crate::retry; #[derive(Debug, Error)] #[expect(clippy::large_enum_variant)] pub enum S3ReaderError { #[error("sdk error")] SdkError(#[from] SdkError), #[error("byte stream error")] ByteStreamError(#[from] aws_sdk_s3::primitives::ByteStreamError), #[error("i/o error")] IoError(#[from] IoError), } /// Provides a [`std::io::Read`]-like interface to an S3 object. \ /// This doesn't actually implement [`std::io::Read`] because Read isn't async. /// /// Also implements [`std::io::Seek`] pub struct S3Reader { pub(super) client: S3Client, pub(super) bucket: String, pub(super) key: String, pub(super) cursor: u64, pub(super) size: u64, pub(super) mime: Mime, } impl S3Reader { pub async fn read(&mut self, mut buf: &mut [u8]) -> Result { let len_left = self.size - self.cursor; if len_left == 0 || buf.is_empty() { return Ok(0); } #[expect(clippy::unwrap_used)] // TODO: probably fits? let start_byte = usize::try_from(self.cursor).unwrap(); #[expect(clippy::unwrap_used)] // usize fits in u64 let len_to_read = u64::try_from(buf.len()).unwrap().min(len_left); #[expect(clippy::unwrap_used)] // must fit, we called min() let len_to_read = usize::try_from(len_to_read).unwrap(); let end_byte = start_byte + len_to_read - 1; let b = retry!( self.client.retries, self.client .client .get_object() .bucket(self.bucket.as_str()) .key(self.key.as_str()) .range(format!("bytes={start_byte}-{end_byte}")) .send() .await )?; // Looks like `bytes 31000000-31999999/33921176`` // println!("{:?}", b.content_range); let mut bytes = b.body.collect().await?.into_bytes(); bytes.truncate(len_to_read); let l = bytes.len(); // Memory to memory writes are infallible #[expect(clippy::unwrap_used)] buf.write_all(&bytes).unwrap(); // Cannot fail, usize should always fit into u64 #[expect(clippy::unwrap_used)] { self.cursor += u64::try_from(l).unwrap(); } return Ok(len_to_read); } pub fn is_done(&self) -> bool { return self.cursor == self.size; } pub fn mime(&self) -> &Mime { &self.mime } /// Write the entire contents of this reader to `r`. /// /// This method always downloads the whole object, /// and always preserves `self.cursor`. pub async fn download(&mut self, r: &mut W) -> Result<(), S3ReaderError> { let pos = self.stream_position()?; const BUF_LEN: usize = 10_000_000; #[expect(clippy::unwrap_used)] // Cannot fail let mut buf: Box<[u8; BUF_LEN]> = vec![0u8; BUF_LEN].try_into().unwrap(); while !self.is_done() { let b = self.read(&mut buf[..]).await?; r.write_all(&buf[0..b])?; } self.seek(SeekFrom::Start(pos))?; Ok(()) } } impl Seek for S3Reader { fn seek(&mut self, pos: SeekFrom) -> std::io::Result { match pos { SeekFrom::Start(x) => self.cursor = x.min(self.size - 1), // Cannot panic, we handle all cases #[expect(clippy::unwrap_used)] SeekFrom::Current(x) => { if x < 0 { if u64::try_from(x.abs()).unwrap() > self.cursor { return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, "cannot seek past start", )); } self.cursor -= u64::try_from(x.abs()).unwrap(); } else { self.cursor += u64::try_from(x).unwrap(); } } // Cannot panic, we handle all cases #[expect(clippy::unwrap_used)] SeekFrom::End(x) => { if x < 0 { if u64::try_from(x.abs()).unwrap() > self.size { return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, "cannot seek past start", )); } // Cannot fail, is abs self.cursor = self.size - u64::try_from(x.abs()).unwrap(); } else { // Cannot fail, is positive self.cursor = self.size + u64::try_from(x).unwrap(); } } } self.cursor = self.cursor.min(self.size - 1); return Ok(self.cursor); } }