Remove S3 + encryption
All checks were successful
CI / Typos (push) Successful in 20s
CI / Clippy (push) Successful in 2m44s
CI / Build and test (push) Successful in 3m10s
Docker / build-and-push (push) Successful in 5m6s
CI / Build and test (all features) (push) Successful in 6m51s

This commit is contained in:
2026-03-26 14:37:18 -07:00
parent ec7326a55e
commit 80f4ebdbe6
24 changed files with 42 additions and 2915 deletions

View File

@@ -9,7 +9,3 @@ workspace = true
[dependencies]
tokio = { workspace = true }
smartstring = { workspace = true }
aws-sdk-s3 = { workspace = true }
chacha20poly1305 = { workspace = true }
binrw = { workspace = true }

View File

@@ -1,50 +0,0 @@
use binrw::{binrw, meta::ReadMagic};
#[binrw]
#[brw(little, magic = b"PileChaChav1")]
#[derive(Debug, Clone, Copy)]
pub struct ChaChaHeaderv1 {
pub config: ChaChaConfigv1,
pub plaintext_size: u64,
}
impl ChaChaHeaderv1 {
pub const SIZE: usize = ChaChaHeaderv1::MAGIC.len() + std::mem::size_of::<ChaChaConfigv1>() + 8;
}
#[test]
fn chachaheader_size() {
assert_eq!(
ChaChaHeaderv1::SIZE,
std::mem::size_of::<ChaChaHeaderv1>() - ChaChaHeaderv1::MAGIC.len()
)
}
//
// MARK: config
//
#[binrw]
#[brw(little)]
#[derive(Debug, Clone, Copy)]
pub struct ChaChaConfigv1 {
pub chunk_size: u64,
pub nonce_size: u64,
pub tag_size: u64,
}
impl Default for ChaChaConfigv1 {
fn default() -> Self {
Self {
chunk_size: 64 * 1024,
nonce_size: 24,
tag_size: 16,
}
}
}
impl ChaChaConfigv1 {
pub(crate) fn enc_chunk_size(&self) -> u64 {
self.chunk_size + self.nonce_size + self.tag_size
}
}

View File

@@ -1,9 +0,0 @@
mod reader;
mod reader_async;
mod writer;
mod writer_async;
pub use {reader::*, reader_async::*, writer::*, writer_async::*};
mod format;
pub use format::*;

View File

@@ -1,151 +0,0 @@
use std::io::{Read, Seek, SeekFrom};
use crate::{AsyncReader, AsyncSeekReader, chacha::ChaChaHeaderv1};
//
// MARK: reader
//
pub struct ChaChaReaderv1<R: Read + Seek> {
inner: R,
header: ChaChaHeaderv1,
data_offset: u64,
encryption_key: [u8; 32],
cursor: u64,
plaintext_size: u64,
cached_chunk: Option<(u64, Vec<u8>)>,
}
impl<R: Read + Seek> ChaChaReaderv1<R> {
pub fn new(mut inner: R, encryption_key: [u8; 32]) -> Result<Self, std::io::Error> {
use binrw::BinReaderExt;
inner.seek(SeekFrom::Start(0))?;
let header: ChaChaHeaderv1 = inner.read_le().map_err(std::io::Error::other)?;
let data_offset = inner.stream_position()?;
Ok(Self {
inner,
header,
data_offset,
encryption_key,
cursor: 0,
plaintext_size: header.plaintext_size,
cached_chunk: None,
})
}
fn fetch_chunk(&mut self, chunk_index: u64) -> Result<(), std::io::Error> {
use chacha20poly1305::{KeyInit, XChaCha20Poly1305, XNonce, aead::Aead};
let enc_start = self.data_offset + chunk_index * self.header.config.enc_chunk_size();
self.inner.seek(SeekFrom::Start(enc_start))?;
let mut encrypted = vec![0u8; self.header.config.enc_chunk_size() as usize];
let n = self.read_exact_or_eof(&mut encrypted)?;
encrypted.truncate(n);
if encrypted.len() < (self.header.config.nonce_size + self.header.config.tag_size) as usize
{
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"encrypted chunk too short",
));
}
let (nonce_bytes, ciphertext) = encrypted.split_at(self.header.config.nonce_size as usize);
let nonce = XNonce::from_slice(nonce_bytes);
let key = chacha20poly1305::Key::from_slice(&self.encryption_key);
let cipher = XChaCha20Poly1305::new(key);
let plaintext = cipher.decrypt(nonce, ciphertext).map_err(|_| {
std::io::Error::new(std::io::ErrorKind::InvalidData, "decryption failed")
})?;
self.cached_chunk = Some((chunk_index, plaintext));
Ok(())
}
fn read_exact_or_eof(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
let mut total = 0;
while total < buf.len() {
match self.inner.read(&mut buf[total..])? {
0 => break,
n => total += n,
}
}
Ok(total)
}
}
impl<R: Read + Seek + Send> AsyncReader for ChaChaReaderv1<R> {
async fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
let remaining = self.plaintext_size.saturating_sub(self.cursor);
if remaining == 0 || buf.is_empty() {
return Ok(0);
}
let chunk_index = self.cursor / self.header.config.chunk_size;
let need_fetch = match &self.cached_chunk {
None => true,
Some((idx, _)) => *idx != chunk_index,
};
if need_fetch {
self.fetch_chunk(chunk_index)?;
}
#[expect(clippy::unwrap_used)]
let (_, chunk_data) = self.cached_chunk.as_ref().unwrap();
let offset_in_chunk = (self.cursor % self.header.config.chunk_size) as usize;
let available = chunk_data.len() - offset_in_chunk;
let to_copy = available.min(buf.len());
buf[..to_copy].copy_from_slice(&chunk_data[offset_in_chunk..offset_in_chunk + to_copy]);
self.cursor += to_copy as u64;
Ok(to_copy)
}
}
impl<R: Read + Seek + Send> AsyncSeekReader for ChaChaReaderv1<R> {
async fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
match pos {
SeekFrom::Start(x) => self.cursor = x.min(self.plaintext_size),
SeekFrom::Current(x) => {
if x < 0 {
let abs = x.unsigned_abs();
if abs > self.cursor {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"cannot seek past start",
));
}
self.cursor -= abs;
} else {
self.cursor += x as u64;
}
}
SeekFrom::End(x) => {
if x < 0 {
let abs = x.unsigned_abs();
if abs > self.plaintext_size {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"cannot seek past start",
));
}
self.cursor = self.plaintext_size - abs;
} else {
self.cursor = self.plaintext_size + x as u64;
}
}
}
self.cursor = self.cursor.min(self.plaintext_size);
Ok(self.cursor)
}
}

View File

@@ -1,165 +0,0 @@
use std::io::SeekFrom;
use crate::{AsyncReader, AsyncSeekReader, chacha::ChaChaHeaderv1};
pub struct ChaChaReaderv1Async<R: AsyncSeekReader> {
inner: R,
header: ChaChaHeaderv1,
data_offset: u64,
encryption_key: [u8; 32],
cursor: u64,
plaintext_size: u64,
cached_chunk: Option<(u64, Vec<u8>)>,
}
impl<R: AsyncSeekReader> ChaChaReaderv1Async<R> {
pub async fn new(mut inner: R, encryption_key: [u8; 32]) -> Result<Self, std::io::Error> {
use binrw::BinReaderExt;
use std::io::Cursor;
inner.seek(SeekFrom::Start(0)).await?;
let mut buf = [0u8; ChaChaHeaderv1::SIZE];
read_exact(&mut inner, &mut buf).await?;
let header: ChaChaHeaderv1 = Cursor::new(&buf[..])
.read_le()
.map_err(std::io::Error::other)?;
Ok(Self {
inner,
header,
data_offset: buf.len() as u64,
encryption_key,
cursor: 0,
plaintext_size: header.plaintext_size,
cached_chunk: None,
})
}
async fn fetch_chunk(&mut self, chunk_index: u64) -> Result<(), std::io::Error> {
use chacha20poly1305::{KeyInit, XChaCha20Poly1305, XNonce, aead::Aead};
let enc_start = self.data_offset + chunk_index * self.header.config.enc_chunk_size();
self.inner.seek(SeekFrom::Start(enc_start)).await?;
let mut encrypted = vec![0u8; self.header.config.enc_chunk_size() as usize];
let n = read_exact_or_eof(&mut self.inner, &mut encrypted).await?;
encrypted.truncate(n);
if encrypted.len() < (self.header.config.nonce_size + self.header.config.tag_size) as usize
{
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"encrypted chunk too short",
));
}
let (nonce_bytes, ciphertext) = encrypted.split_at(self.header.config.nonce_size as usize);
let nonce = XNonce::from_slice(nonce_bytes);
let key = chacha20poly1305::Key::from_slice(&self.encryption_key);
let cipher = XChaCha20Poly1305::new(key);
let plaintext = cipher.decrypt(nonce, ciphertext).map_err(|_| {
std::io::Error::new(std::io::ErrorKind::InvalidData, "decryption failed")
})?;
self.cached_chunk = Some((chunk_index, plaintext));
Ok(())
}
}
async fn read_exact<R: AsyncReader>(inner: &mut R, buf: &mut [u8]) -> Result<(), std::io::Error> {
let n = read_exact_or_eof(inner, buf).await?;
if n < buf.len() {
return Err(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"unexpected EOF reading header",
));
}
Ok(())
}
async fn read_exact_or_eof<R: AsyncReader>(
inner: &mut R,
buf: &mut [u8],
) -> Result<usize, std::io::Error> {
let mut total = 0;
while total < buf.len() {
match inner.read(&mut buf[total..]).await? {
0 => break,
n => total += n,
}
}
Ok(total)
}
impl<R: AsyncSeekReader> AsyncReader for ChaChaReaderv1Async<R> {
async fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
let remaining = self.plaintext_size.saturating_sub(self.cursor);
if remaining == 0 || buf.is_empty() {
return Ok(0);
}
let chunk_index = self.cursor / self.header.config.chunk_size;
let need_fetch = match &self.cached_chunk {
None => true,
Some((idx, _)) => *idx != chunk_index,
};
if need_fetch {
self.fetch_chunk(chunk_index).await?;
}
#[expect(clippy::unwrap_used)]
let (_, chunk_data) = self.cached_chunk.as_ref().unwrap();
let offset_in_chunk = (self.cursor % self.header.config.chunk_size) as usize;
let available = chunk_data.len() - offset_in_chunk;
let to_copy = available.min(buf.len());
buf[..to_copy].copy_from_slice(&chunk_data[offset_in_chunk..offset_in_chunk + to_copy]);
self.cursor += to_copy as u64;
Ok(to_copy)
}
}
impl<R: AsyncSeekReader> AsyncSeekReader for ChaChaReaderv1Async<R> {
async fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
match pos {
SeekFrom::Start(x) => self.cursor = x.min(self.plaintext_size),
SeekFrom::Current(x) => {
if x < 0 {
let abs = x.unsigned_abs();
if abs > self.cursor {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"cannot seek past start",
));
}
self.cursor -= abs;
} else {
self.cursor += x as u64;
}
}
SeekFrom::End(x) => {
if x < 0 {
let abs = x.unsigned_abs();
if abs > self.plaintext_size {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"cannot seek past start",
));
}
self.cursor = self.plaintext_size - abs;
} else {
self.cursor = self.plaintext_size + x as u64;
}
}
}
self.cursor = self.cursor.min(self.plaintext_size);
Ok(self.cursor)
}
}

View File

@@ -1,91 +0,0 @@
use std::io::SeekFrom;
use tokio::io::{AsyncSeek, AsyncSeekExt, AsyncWrite, AsyncWriteExt};
use crate::chacha::{ChaChaConfigv1, ChaChaHeaderv1};
pub struct ChaChaWriterAsync<W: AsyncWrite + AsyncSeek + Unpin + Send> {
inner: W,
header: ChaChaHeaderv1,
encryption_key: [u8; 32],
buffer: Vec<u8>,
plaintext_bytes_written: u64,
}
impl<W: AsyncWrite + AsyncSeek + Unpin + Send> ChaChaWriterAsync<W> {
pub async fn new(mut inner: W, encryption_key: [u8; 32]) -> Result<Self, std::io::Error> {
let header = ChaChaHeaderv1 {
config: ChaChaConfigv1::default(),
plaintext_size: 0,
};
inner.write_all(&serialize_header(header)?).await?;
Ok(Self {
inner,
header,
encryption_key,
buffer: Vec::new(),
plaintext_bytes_written: 0,
})
}
pub async fn write(&mut self, buf: &[u8]) -> Result<(), std::io::Error> {
self.buffer.extend_from_slice(buf);
self.plaintext_bytes_written += buf.len() as u64;
let chunk_size = self.header.config.chunk_size as usize;
while self.buffer.len() >= chunk_size {
let encrypted = encrypt_chunk(&self.encryption_key, &self.buffer[..chunk_size])?;
self.inner.write_all(&encrypted).await?;
self.buffer.drain(..chunk_size);
}
Ok(())
}
/// Encrypt and write any buffered plaintext, patch the header with the
/// final `plaintext_size`, then return the inner writer.
pub async fn finish(mut self) -> Result<W, std::io::Error> {
if !self.buffer.is_empty() {
let encrypted = encrypt_chunk(&self.encryption_key, &self.buffer)?;
self.inner.write_all(&encrypted).await?;
}
self.inner.seek(SeekFrom::Start(0)).await?;
let header_bytes = serialize_header(ChaChaHeaderv1 {
config: self.header.config,
plaintext_size: self.plaintext_bytes_written,
})?;
self.inner.write_all(&header_bytes).await?;
Ok(self.inner)
}
}
fn encrypt_chunk(key: &[u8; 32], plaintext: &[u8]) -> Result<Vec<u8>, std::io::Error> {
use chacha20poly1305::{
XChaCha20Poly1305,
aead::{Aead, AeadCore, KeyInit, OsRng},
};
let nonce = XChaCha20Poly1305::generate_nonce(&mut OsRng);
let cipher = XChaCha20Poly1305::new(chacha20poly1305::Key::from_slice(key));
let ciphertext = cipher
.encrypt(&nonce, plaintext)
.map_err(|_| std::io::Error::other("encryption failed"))?;
let mut output = Vec::with_capacity(nonce.len() + ciphertext.len());
output.extend_from_slice(&nonce);
output.extend_from_slice(&ciphertext);
Ok(output)
}
fn serialize_header(header: ChaChaHeaderv1) -> Result<Vec<u8>, std::io::Error> {
use binrw::BinWriterExt;
use std::io::Cursor;
let mut buf = Cursor::new(Vec::new());
buf.write_le(&header).map_err(std::io::Error::other)?;
Ok(buf.into_inner())
}

View File

@@ -1,260 +0,0 @@
use std::io::{Seek, SeekFrom, Write};
use crate::chacha::{ChaChaConfigv1, ChaChaHeaderv1};
/// Generate a random 32-byte encryption key suitable for use with [`ChaChaWriter`].
pub fn generate_key() -> [u8; 32] {
use chacha20poly1305::aead::OsRng;
use chacha20poly1305::{KeyInit, XChaCha20Poly1305};
XChaCha20Poly1305::generate_key(&mut OsRng).into()
}
pub struct ChaChaWriterv1<W: Write + Seek> {
inner: W,
header: ChaChaHeaderv1,
encryption_key: [u8; 32],
buffer: Vec<u8>,
plaintext_bytes_written: u64,
}
impl<W: Write + Seek> ChaChaWriterv1<W> {
pub fn new(mut inner: W, encryption_key: [u8; 32]) -> Result<Self, std::io::Error> {
use binrw::BinWriterExt;
let header = ChaChaHeaderv1 {
config: ChaChaConfigv1::default(),
plaintext_size: 0,
};
inner.write_le(&header).map_err(std::io::Error::other)?;
Ok(Self {
inner,
header,
encryption_key,
buffer: Vec::new(),
plaintext_bytes_written: 0,
})
}
/// Encrypt and write any buffered plaintext, patch the header with the
/// final `plaintext_size`, then return the inner writer.
pub fn finish(mut self) -> Result<W, std::io::Error> {
use binrw::BinWriterExt;
self.flush_buffer()?;
self.inner.seek(SeekFrom::Start(0))?;
let header = ChaChaHeaderv1 {
config: self.header.config,
plaintext_size: self.plaintext_bytes_written,
};
self.inner
.write_le(&header)
.map_err(std::io::Error::other)?;
Ok(self.inner)
}
fn encrypt_chunk(&self, plaintext: &[u8]) -> Result<Vec<u8>, std::io::Error> {
use chacha20poly1305::{
XChaCha20Poly1305,
aead::{Aead, AeadCore, KeyInit, OsRng},
};
let nonce = XChaCha20Poly1305::generate_nonce(&mut OsRng);
let key = chacha20poly1305::Key::from_slice(&self.encryption_key);
let cipher = XChaCha20Poly1305::new(key);
let ciphertext = cipher
.encrypt(&nonce, plaintext)
.map_err(|_| std::io::Error::other("encryption failed"))?;
let mut output = Vec::with_capacity(nonce.len() + ciphertext.len());
output.extend_from_slice(&nonce);
output.extend_from_slice(&ciphertext);
Ok(output)
}
fn flush_buffer(&mut self) -> Result<(), std::io::Error> {
if !self.buffer.is_empty() {
let encrypted = self.encrypt_chunk(&self.buffer)?;
self.inner.write_all(&encrypted)?;
self.buffer.clear();
}
Ok(())
}
}
impl<W: Write + Seek> Write for ChaChaWriterv1<W> {
fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
self.buffer.extend_from_slice(buf);
self.plaintext_bytes_written += buf.len() as u64;
let chunk_size = self.header.config.chunk_size as usize;
while self.buffer.len() >= chunk_size {
let encrypted = self.encrypt_chunk(&self.buffer[..chunk_size])?;
self.inner.write_all(&encrypted)?;
self.buffer.drain(..chunk_size);
}
Ok(buf.len())
}
/// Encrypts and flushes any buffered plaintext as a partial chunk.
///
/// Prefer [`finish`](Self::finish) to retrieve the inner writer after
/// all data has been written. Calling `flush` multiple times will produce
/// multiple small encrypted chunks for the same partial data.
fn flush(&mut self) -> Result<(), std::io::Error> {
self.flush_buffer()?;
self.inner.flush()
}
}
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod tests {
use std::io::{Cursor, SeekFrom, Write};
use super::ChaChaWriterv1;
use crate::{AsyncReader, AsyncSeekReader, chacha::ChaChaReaderv1};
const KEY: [u8; 32] = [42u8; 32];
fn encrypt(data: &[u8]) -> Cursor<Vec<u8>> {
let mut writer = ChaChaWriterv1::new(Cursor::new(Vec::new()), KEY).unwrap();
writer.write_all(data).unwrap();
let mut buf = writer.finish().unwrap();
buf.set_position(0);
buf
}
async fn decrypt_all(buf: Cursor<Vec<u8>>) -> Vec<u8> {
let mut reader = ChaChaReaderv1::new(buf, KEY).unwrap();
reader.read_to_end().await.unwrap()
}
#[tokio::test]
async fn roundtrip_empty() {
let buf = encrypt(&[]);
// Header present but no chunks
assert!(!buf.get_ref().is_empty());
assert!(decrypt_all(buf).await.is_empty());
}
#[tokio::test]
async fn roundtrip_small() {
let data = b"hello, world!";
assert_eq!(decrypt_all(encrypt(data)).await, data);
}
#[tokio::test]
async fn roundtrip_exact_chunk() {
let data = vec![0xABu8; 65536];
assert_eq!(decrypt_all(encrypt(&data)).await, data);
}
#[tokio::test]
async fn roundtrip_multi_chunk() {
// 2.5 chunks
let data: Vec<u8> = (0u8..=255).cycle().take(65536 * 2 + 1000).collect();
assert_eq!(decrypt_all(encrypt(&data)).await, data);
}
#[tokio::test]
async fn roundtrip_incremental_writes() {
// Write one byte at a time
let data: Vec<u8> = (0u8..200).collect();
let mut writer = ChaChaWriterv1::new(Cursor::new(Vec::new()), KEY).unwrap();
for byte in &data {
writer.write_all(&[*byte]).unwrap();
}
let mut buf = writer.finish().unwrap();
buf.set_position(0);
assert_eq!(decrypt_all(buf).await, data);
}
#[tokio::test]
async fn wrong_key_fails() {
let buf = encrypt(b"secret data");
let mut reader = ChaChaReaderv1::new(buf, [0u8; 32]).unwrap();
assert!(reader.read_to_end().await.is_err());
}
#[tokio::test]
async fn header_magic_checked() {
// Corrupt the magic bytes — reader should fail
let mut buf = encrypt(b"data");
buf.get_mut()[0] = 0xFF;
buf.set_position(0);
assert!(ChaChaReaderv1::new(buf, KEY).is_err());
}
#[tokio::test]
async fn seek_from_start() {
let data: Vec<u8> = (0u8..100).collect();
let mut reader = ChaChaReaderv1::new(encrypt(&data), KEY).unwrap();
reader.seek(SeekFrom::Start(50)).await.unwrap();
let mut buf = [0u8; 10];
let mut read = 0;
while read < buf.len() {
read += reader.read(&mut buf[read..]).await.unwrap();
}
assert_eq!(buf, data[50..60]);
}
#[tokio::test]
async fn seek_from_end() {
let data: Vec<u8> = (0u8..100).collect();
let mut reader = ChaChaReaderv1::new(encrypt(&data), KEY).unwrap();
reader.seek(SeekFrom::End(-10)).await.unwrap();
assert_eq!(reader.read_to_end().await.unwrap(), &data[90..]);
}
#[tokio::test]
async fn seek_across_chunk_boundary() {
// Seek to 6 bytes before the end of chunk 0, read 12 bytes spanning into chunk 1
let data: Vec<u8> = (0u8..=255).cycle().take(65536 + 500).collect();
let mut reader = ChaChaReaderv1::new(encrypt(&data), KEY).unwrap();
reader.seek(SeekFrom::Start(65530)).await.unwrap();
let mut buf = vec![0u8; 12];
let mut read = 0;
while read < buf.len() {
read += reader.read(&mut buf[read..]).await.unwrap();
}
assert_eq!(buf, data[65530..65542]);
}
#[tokio::test]
async fn seek_current() {
let data: Vec<u8> = (0u8..=255).cycle().take(200).collect();
let mut reader = ChaChaReaderv1::new(encrypt(&data), KEY).unwrap();
// Read 10, seek back 5, read 5 — should get bytes 5..10
let mut first = [0u8; 10];
let mut n = 0;
while n < first.len() {
n += reader.read(&mut first[n..]).await.unwrap();
}
reader.seek(SeekFrom::Current(-5)).await.unwrap();
let mut second = [0u8; 5];
n = 0;
while n < second.len() {
n += reader.read(&mut second[n..]).await.unwrap();
}
assert_eq!(second, data[5..10]);
}
#[tokio::test]
async fn seek_past_end_clamps() {
let data = b"hello";
let mut reader = ChaChaReaderv1::new(encrypt(data), KEY).unwrap();
let pos = reader.seek(SeekFrom::Start(9999)).await.unwrap();
assert_eq!(pos, data.len() as u64);
assert_eq!(reader.read_to_end().await.unwrap(), b"");
}
}

View File

@@ -1,7 +1,2 @@
mod asyncreader;
pub use asyncreader::*;
mod s3reader;
pub use s3reader::*;
pub mod chacha;

View File

@@ -1,181 +0,0 @@
use aws_sdk_s3::config::{BehaviorVersion, Credentials, Region};
use smartstring::{LazyCompact, SmartString};
use std::{fmt::Debug, io::SeekFrom, sync::Arc};
use crate::{AsyncReader, AsyncSeekReader};
//
// MARK: client
//
/// An interface to an S3 bucket.
///
/// TODO: S3 is slow and expensive. Ideally, we'll have this struct cache data
/// so we don't have to download anything twice. This is, however, complicated,
/// and doesn't fully solve the "expensive" problem.
pub struct S3Client {
pub client: aws_sdk_s3::Client,
bucket: SmartString<LazyCompact>,
/// maximum number of bytes to use for cached data
cache_limit_bytes: usize,
}
impl Debug for S3Client {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("S3Client")
.field("bucket", &self.bucket)
.field("cache_limit_bytes", &self.cache_limit_bytes)
.finish()
}
}
impl S3Client {
pub async fn new(
bucket: &str,
endpoint: Option<&str>,
region: &str,
access_key_id: &str,
secret_access_key: &str,
cache_limit_bytes: usize,
) -> Arc<Self> {
let client = {
let mut s3_config = aws_sdk_s3::config::Builder::new()
.behavior_version(BehaviorVersion::latest())
.region(Region::new(region.to_owned()))
.credentials_provider(Credentials::new(
access_key_id,
secret_access_key,
None,
None,
"pile",
));
if let Some(ep) = endpoint {
s3_config = s3_config.endpoint_url(ep).force_path_style(true);
}
aws_sdk_s3::Client::from_conf(s3_config.build())
};
return Arc::new(Self {
bucket: bucket.into(),
client,
cache_limit_bytes,
});
}
pub fn bucket(&self) -> &str {
&self.bucket
}
pub async fn get(self: &Arc<Self>, key: &str) -> Result<S3Reader, std::io::Error> {
let head = self
.client
.head_object()
.bucket(self.bucket.as_str())
.key(key)
.send()
.await
.map_err(std::io::Error::other)?;
let size = head.content_length().unwrap_or(0) as u64;
Ok(S3Reader {
client: self.clone(),
bucket: self.bucket.clone(),
key: key.into(),
cursor: 0,
size,
})
}
}
//
// MARK: reader
//
pub struct S3Reader {
pub client: Arc<S3Client>,
pub bucket: SmartString<LazyCompact>,
pub key: SmartString<LazyCompact>,
pub cursor: u64,
pub size: u64,
}
impl AsyncReader for S3Reader {
async fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
let len_left = self.size.saturating_sub(self.cursor);
if len_left == 0 || buf.is_empty() {
return Ok(0);
}
let start_byte = self.cursor;
let len_to_read = (buf.len() as u64).min(len_left);
let end_byte = start_byte + len_to_read - 1;
let resp = self
.client
.client
.get_object()
.bucket(self.bucket.as_str())
.key(self.key.as_str())
.range(format!("bytes={start_byte}-{end_byte}"))
.send()
.await
.map_err(std::io::Error::other)?;
let bytes = resp
.body
.collect()
.await
.map(|x| x.into_bytes())
.map_err(std::io::Error::other)?;
let n = bytes.len().min(buf.len());
buf[..n].copy_from_slice(&bytes[..n]);
self.cursor += n as u64;
Ok(n)
}
}
impl AsyncSeekReader for S3Reader {
async fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
match pos {
SeekFrom::Start(x) => self.cursor = x.min(self.size),
SeekFrom::Current(x) => {
if x < 0 {
let abs = x.unsigned_abs();
if abs > self.cursor {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"cannot seek past start",
));
}
self.cursor -= abs;
} else {
self.cursor += x as u64;
}
}
std::io::SeekFrom::End(x) => {
if x < 0 {
let abs = x.unsigned_abs();
if abs > self.size {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"cannot seek past start",
));
}
self.cursor = self.size - abs;
} else {
self.cursor = self.size + x as u64;
}
}
}
self.cursor = self.cursor.min(self.size);
Ok(self.cursor)
}
}