Add S3 encryption
This commit is contained in:
15
crates/pile-io/Cargo.toml
Normal file
15
crates/pile-io/Cargo.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
[package]
|
||||
name = "pile-io"
|
||||
version = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
tokio = { workspace = true }
|
||||
smartstring = { workspace = true }
|
||||
aws-sdk-s3 = { workspace = true }
|
||||
chacha20poly1305 = { workspace = true }
|
||||
binrw = { workspace = true }
|
||||
75
crates/pile-io/src/asyncreader.rs
Normal file
75
crates/pile-io/src/asyncreader.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use tokio::runtime::Handle;
|
||||
|
||||
//
|
||||
// MARK: asyncreader
|
||||
//
|
||||
|
||||
/// An `async` equivalent of [std::io::Read].
|
||||
pub trait AsyncReader: Send {
|
||||
/// Read a chunk of bytes.
|
||||
fn read(
|
||||
&mut self,
|
||||
buf: &mut [u8],
|
||||
) -> impl Future<Output = Result<usize, std::io::Error>> + Send;
|
||||
|
||||
/// Read all remaining bytes into a `Vec`.
|
||||
fn read_to_end(&mut self) -> impl Future<Output = Result<Vec<u8>, std::io::Error>> + Send {
|
||||
async {
|
||||
let mut buf = Vec::new();
|
||||
let mut chunk = vec![0u8; 65536];
|
||||
loop {
|
||||
let n = self.read(&mut chunk).await?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
buf.extend_from_slice(&chunk[..n]);
|
||||
}
|
||||
Ok(buf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An `async` equivalent of [std::io::Read] + [std::io::Seek].
|
||||
pub trait AsyncSeekReader: AsyncReader {
|
||||
fn seek(&mut self, pos: SeekFrom) -> impl Future<Output = Result<u64, std::io::Error>> + Send;
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: sync bridge
|
||||
//
|
||||
|
||||
/// Turn an async [Reader] into a sync [Read] + [Seek].
|
||||
///
|
||||
/// Never use this outside of [tokio::task::spawn_blocking],
|
||||
/// the async runtime will deadlock if this struct blocks
|
||||
/// the runtime.
|
||||
pub struct SyncReadBridge<R: AsyncReader> {
|
||||
inner: R,
|
||||
handle: Handle,
|
||||
}
|
||||
|
||||
impl<R: AsyncReader> SyncReadBridge<R> {
|
||||
/// Creates a new adapter using a handle to the current runtime.
|
||||
/// Panics if called outside of a tokio context.
|
||||
pub fn new_current(inner: R) -> Self {
|
||||
Self::new(inner, Handle::current())
|
||||
}
|
||||
|
||||
/// Creates a new adapter using a handle to an existing runtime.
|
||||
pub fn new(inner: R, handle: Handle) -> Self {
|
||||
Self { inner, handle }
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: AsyncReader> Read for SyncReadBridge<R> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
|
||||
self.handle.block_on(self.inner.read(buf))
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: AsyncReader + AsyncSeekReader> Seek for SyncReadBridge<R> {
|
||||
fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
|
||||
self.handle.block_on(self.inner.seek(pos))
|
||||
}
|
||||
}
|
||||
205
crates/pile-io/src/chachareader.rs
Normal file
205
crates/pile-io/src/chachareader.rs
Normal file
@@ -0,0 +1,205 @@
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
|
||||
use binrw::binrw;
|
||||
|
||||
use crate::{AsyncReader, AsyncSeekReader};
|
||||
|
||||
//
|
||||
// MARK: header
|
||||
//
|
||||
|
||||
/// Serialized size of [`ChaChaHeader`] in bytes: 12 magic + 3×8 config + 8 plaintext_size.
|
||||
pub const HEADER_SIZE: usize = 44;
|
||||
|
||||
#[binrw]
|
||||
#[brw(little, magic = b"PileChaChav1")]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ChaChaHeader {
|
||||
pub chunk_size: u64,
|
||||
pub nonce_size: u64,
|
||||
pub tag_size: u64,
|
||||
pub plaintext_size: u64,
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: config
|
||||
//
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ChaChaReaderConfig {
|
||||
pub chunk_size: u64,
|
||||
pub nonce_size: u64,
|
||||
pub tag_size: u64,
|
||||
}
|
||||
|
||||
impl Default for ChaChaReaderConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
chunk_size: 1_048_576, // 1MiB
|
||||
nonce_size: 24,
|
||||
tag_size: 16,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ChaChaReaderConfig {
|
||||
pub(crate) fn enc_chunk_size(&self) -> u64 {
|
||||
self.chunk_size + self.nonce_size + self.tag_size
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ChaChaHeader> for ChaChaReaderConfig {
|
||||
fn from(h: ChaChaHeader) -> Self {
|
||||
Self {
|
||||
chunk_size: h.chunk_size,
|
||||
nonce_size: h.nonce_size,
|
||||
tag_size: h.tag_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: reader
|
||||
//
|
||||
|
||||
pub struct ChaChaReader<R: Read + Seek> {
|
||||
inner: R,
|
||||
config: ChaChaReaderConfig,
|
||||
data_offset: u64,
|
||||
encryption_key: [u8; 32],
|
||||
cursor: u64,
|
||||
plaintext_size: u64,
|
||||
cached_chunk: Option<(u64, Vec<u8>)>,
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> ChaChaReader<R> {
|
||||
pub fn new(mut inner: R, encryption_key: [u8; 32]) -> Result<Self, std::io::Error> {
|
||||
use binrw::BinReaderExt;
|
||||
|
||||
inner.seek(SeekFrom::Start(0))?;
|
||||
let header: ChaChaHeader = inner.read_le().map_err(std::io::Error::other)?;
|
||||
let data_offset = inner.stream_position()?;
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
config: header.into(),
|
||||
data_offset,
|
||||
encryption_key,
|
||||
cursor: 0,
|
||||
plaintext_size: header.plaintext_size,
|
||||
cached_chunk: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn fetch_chunk(&mut self, chunk_index: u64) -> Result<(), std::io::Error> {
|
||||
use chacha20poly1305::{KeyInit, XChaCha20Poly1305, XNonce, aead::Aead};
|
||||
|
||||
let enc_start = self.data_offset + chunk_index * self.config.enc_chunk_size();
|
||||
self.inner.seek(SeekFrom::Start(enc_start))?;
|
||||
|
||||
let mut encrypted = vec![0u8; self.config.enc_chunk_size() as usize];
|
||||
let n = self.read_exact_or_eof(&mut encrypted)?;
|
||||
encrypted.truncate(n);
|
||||
|
||||
if encrypted.len() < (self.config.nonce_size + self.config.tag_size) as usize {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
"encrypted chunk too short",
|
||||
));
|
||||
}
|
||||
|
||||
let (nonce_bytes, ciphertext) = encrypted.split_at(self.config.nonce_size as usize);
|
||||
let nonce = XNonce::from_slice(nonce_bytes);
|
||||
let key = chacha20poly1305::Key::from_slice(&self.encryption_key);
|
||||
let cipher = XChaCha20Poly1305::new(key);
|
||||
let plaintext = cipher.decrypt(nonce, ciphertext).map_err(|_| {
|
||||
std::io::Error::new(std::io::ErrorKind::InvalidData, "decryption failed")
|
||||
})?;
|
||||
|
||||
self.cached_chunk = Some((chunk_index, plaintext));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_exact_or_eof(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
|
||||
let mut total = 0;
|
||||
while total < buf.len() {
|
||||
match self.inner.read(&mut buf[total..])? {
|
||||
0 => break,
|
||||
n => total += n,
|
||||
}
|
||||
}
|
||||
Ok(total)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek + Send> AsyncReader for ChaChaReader<R> {
|
||||
async fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
|
||||
let remaining = self.plaintext_size.saturating_sub(self.cursor);
|
||||
if remaining == 0 || buf.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let chunk_index = self.cursor / self.config.chunk_size;
|
||||
|
||||
let need_fetch = match &self.cached_chunk {
|
||||
None => true,
|
||||
Some((idx, _)) => *idx != chunk_index,
|
||||
};
|
||||
|
||||
if need_fetch {
|
||||
self.fetch_chunk(chunk_index)?;
|
||||
}
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let (_, chunk_data) = self.cached_chunk.as_ref().unwrap();
|
||||
|
||||
let offset_in_chunk = (self.cursor % self.config.chunk_size) as usize;
|
||||
let available = chunk_data.len() - offset_in_chunk;
|
||||
let to_copy = available.min(buf.len());
|
||||
|
||||
buf[..to_copy].copy_from_slice(&chunk_data[offset_in_chunk..offset_in_chunk + to_copy]);
|
||||
self.cursor += to_copy as u64;
|
||||
Ok(to_copy)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek + Send> AsyncSeekReader for ChaChaReader<R> {
|
||||
async fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
|
||||
match pos {
|
||||
SeekFrom::Start(x) => self.cursor = x.min(self.plaintext_size),
|
||||
|
||||
SeekFrom::Current(x) => {
|
||||
if x < 0 {
|
||||
let abs = x.unsigned_abs();
|
||||
if abs > self.cursor {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"cannot seek past start",
|
||||
));
|
||||
}
|
||||
self.cursor -= abs;
|
||||
} else {
|
||||
self.cursor += x as u64;
|
||||
}
|
||||
}
|
||||
|
||||
SeekFrom::End(x) => {
|
||||
if x < 0 {
|
||||
let abs = x.unsigned_abs();
|
||||
if abs > self.plaintext_size {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"cannot seek past start",
|
||||
));
|
||||
}
|
||||
self.cursor = self.plaintext_size - abs;
|
||||
} else {
|
||||
self.cursor = self.plaintext_size + x as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.cursor = self.cursor.min(self.plaintext_size);
|
||||
Ok(self.cursor)
|
||||
}
|
||||
}
|
||||
163
crates/pile-io/src/chachareader_async.rs
Normal file
163
crates/pile-io/src/chachareader_async.rs
Normal file
@@ -0,0 +1,163 @@
|
||||
use std::io::SeekFrom;
|
||||
|
||||
use crate::{AsyncReader, AsyncSeekReader, ChaChaHeader, ChaChaReaderConfig, HEADER_SIZE};
|
||||
|
||||
pub struct ChaChaReaderAsync<R: AsyncSeekReader> {
|
||||
inner: R,
|
||||
config: ChaChaReaderConfig,
|
||||
data_offset: u64,
|
||||
encryption_key: [u8; 32],
|
||||
cursor: u64,
|
||||
plaintext_size: u64,
|
||||
cached_chunk: Option<(u64, Vec<u8>)>,
|
||||
}
|
||||
|
||||
impl<R: AsyncSeekReader> ChaChaReaderAsync<R> {
|
||||
pub async fn new(mut inner: R, encryption_key: [u8; 32]) -> Result<Self, std::io::Error> {
|
||||
use binrw::BinReaderExt;
|
||||
use std::io::Cursor;
|
||||
|
||||
inner.seek(SeekFrom::Start(0)).await?;
|
||||
let mut buf = [0u8; HEADER_SIZE];
|
||||
read_exact(&mut inner, &mut buf).await?;
|
||||
let header: ChaChaHeader = Cursor::new(&buf[..])
|
||||
.read_le()
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
config: header.into(),
|
||||
data_offset: HEADER_SIZE as u64,
|
||||
encryption_key,
|
||||
cursor: 0,
|
||||
plaintext_size: header.plaintext_size,
|
||||
cached_chunk: None,
|
||||
})
|
||||
}
|
||||
|
||||
async fn fetch_chunk(&mut self, chunk_index: u64) -> Result<(), std::io::Error> {
|
||||
use chacha20poly1305::{KeyInit, XChaCha20Poly1305, XNonce, aead::Aead};
|
||||
|
||||
let enc_start = self.data_offset + chunk_index * self.config.enc_chunk_size();
|
||||
self.inner.seek(SeekFrom::Start(enc_start)).await?;
|
||||
|
||||
let mut encrypted = vec![0u8; self.config.enc_chunk_size() as usize];
|
||||
let n = read_exact_or_eof(&mut self.inner, &mut encrypted).await?;
|
||||
encrypted.truncate(n);
|
||||
|
||||
if encrypted.len() < (self.config.nonce_size + self.config.tag_size) as usize {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
"encrypted chunk too short",
|
||||
));
|
||||
}
|
||||
|
||||
let (nonce_bytes, ciphertext) = encrypted.split_at(self.config.nonce_size as usize);
|
||||
let nonce = XNonce::from_slice(nonce_bytes);
|
||||
let key = chacha20poly1305::Key::from_slice(&self.encryption_key);
|
||||
let cipher = XChaCha20Poly1305::new(key);
|
||||
let plaintext = cipher.decrypt(nonce, ciphertext).map_err(|_| {
|
||||
std::io::Error::new(std::io::ErrorKind::InvalidData, "decryption failed")
|
||||
})?;
|
||||
|
||||
self.cached_chunk = Some((chunk_index, plaintext));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_exact<R: AsyncReader>(inner: &mut R, buf: &mut [u8]) -> Result<(), std::io::Error> {
|
||||
let n = read_exact_or_eof(inner, buf).await?;
|
||||
if n < buf.len() {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::UnexpectedEof,
|
||||
"unexpected EOF reading header",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_exact_or_eof<R: AsyncReader>(
|
||||
inner: &mut R,
|
||||
buf: &mut [u8],
|
||||
) -> Result<usize, std::io::Error> {
|
||||
let mut total = 0;
|
||||
while total < buf.len() {
|
||||
match inner.read(&mut buf[total..]).await? {
|
||||
0 => break,
|
||||
n => total += n,
|
||||
}
|
||||
}
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
impl<R: AsyncSeekReader> AsyncReader for ChaChaReaderAsync<R> {
|
||||
async fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
|
||||
let remaining = self.plaintext_size.saturating_sub(self.cursor);
|
||||
if remaining == 0 || buf.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let chunk_index = self.cursor / self.config.chunk_size;
|
||||
|
||||
let need_fetch = match &self.cached_chunk {
|
||||
None => true,
|
||||
Some((idx, _)) => *idx != chunk_index,
|
||||
};
|
||||
|
||||
if need_fetch {
|
||||
self.fetch_chunk(chunk_index).await?;
|
||||
}
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let (_, chunk_data) = self.cached_chunk.as_ref().unwrap();
|
||||
|
||||
let offset_in_chunk = (self.cursor % self.config.chunk_size) as usize;
|
||||
let available = chunk_data.len() - offset_in_chunk;
|
||||
let to_copy = available.min(buf.len());
|
||||
|
||||
buf[..to_copy].copy_from_slice(&chunk_data[offset_in_chunk..offset_in_chunk + to_copy]);
|
||||
self.cursor += to_copy as u64;
|
||||
Ok(to_copy)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: AsyncSeekReader> AsyncSeekReader for ChaChaReaderAsync<R> {
|
||||
async fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
|
||||
match pos {
|
||||
SeekFrom::Start(x) => self.cursor = x.min(self.plaintext_size),
|
||||
|
||||
SeekFrom::Current(x) => {
|
||||
if x < 0 {
|
||||
let abs = x.unsigned_abs();
|
||||
if abs > self.cursor {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"cannot seek past start",
|
||||
));
|
||||
}
|
||||
self.cursor -= abs;
|
||||
} else {
|
||||
self.cursor += x as u64;
|
||||
}
|
||||
}
|
||||
|
||||
SeekFrom::End(x) => {
|
||||
if x < 0 {
|
||||
let abs = x.unsigned_abs();
|
||||
if abs > self.plaintext_size {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"cannot seek past start",
|
||||
));
|
||||
}
|
||||
self.cursor = self.plaintext_size - abs;
|
||||
} else {
|
||||
self.cursor = self.plaintext_size + x as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.cursor = self.cursor.min(self.plaintext_size);
|
||||
Ok(self.cursor)
|
||||
}
|
||||
}
|
||||
264
crates/pile-io/src/chachawriter.rs
Normal file
264
crates/pile-io/src/chachawriter.rs
Normal file
@@ -0,0 +1,264 @@
|
||||
use std::io::{Seek, SeekFrom, Write};
|
||||
|
||||
use crate::{ChaChaHeader, ChaChaReaderConfig};
|
||||
|
||||
/// Generate a random 32-byte encryption key suitable for use with [`ChaChaWriter`].
|
||||
pub fn generate_key() -> [u8; 32] {
|
||||
use chacha20poly1305::aead::OsRng;
|
||||
use chacha20poly1305::{KeyInit, XChaCha20Poly1305};
|
||||
XChaCha20Poly1305::generate_key(&mut OsRng).into()
|
||||
}
|
||||
|
||||
pub struct ChaChaWriter<W: Write + Seek> {
|
||||
inner: W,
|
||||
config: ChaChaReaderConfig,
|
||||
encryption_key: [u8; 32],
|
||||
buffer: Vec<u8>,
|
||||
plaintext_bytes_written: u64,
|
||||
}
|
||||
|
||||
impl<W: Write + Seek> ChaChaWriter<W> {
|
||||
pub fn new(mut inner: W, encryption_key: [u8; 32]) -> Result<Self, std::io::Error> {
|
||||
use binrw::BinWriterExt;
|
||||
|
||||
let config = ChaChaReaderConfig::default();
|
||||
let header = ChaChaHeader {
|
||||
chunk_size: config.chunk_size,
|
||||
nonce_size: config.nonce_size,
|
||||
tag_size: config.tag_size,
|
||||
plaintext_size: 0,
|
||||
};
|
||||
inner.write_le(&header).map_err(std::io::Error::other)?;
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
config,
|
||||
encryption_key,
|
||||
buffer: Vec::new(),
|
||||
plaintext_bytes_written: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Encrypt and write any buffered plaintext, patch the header with the
|
||||
/// final `plaintext_size`, then return the inner writer.
|
||||
pub fn finish(mut self) -> Result<W, std::io::Error> {
|
||||
use binrw::BinWriterExt;
|
||||
|
||||
self.flush_buffer()?;
|
||||
|
||||
self.inner.seek(SeekFrom::Start(0))?;
|
||||
let header = ChaChaHeader {
|
||||
chunk_size: self.config.chunk_size,
|
||||
nonce_size: self.config.nonce_size,
|
||||
tag_size: self.config.tag_size,
|
||||
plaintext_size: self.plaintext_bytes_written,
|
||||
};
|
||||
self.inner
|
||||
.write_le(&header)
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
Ok(self.inner)
|
||||
}
|
||||
|
||||
fn encrypt_chunk(&self, plaintext: &[u8]) -> Result<Vec<u8>, std::io::Error> {
|
||||
use chacha20poly1305::{
|
||||
XChaCha20Poly1305,
|
||||
aead::{Aead, AeadCore, KeyInit, OsRng},
|
||||
};
|
||||
|
||||
let nonce = XChaCha20Poly1305::generate_nonce(&mut OsRng);
|
||||
let key = chacha20poly1305::Key::from_slice(&self.encryption_key);
|
||||
let cipher = XChaCha20Poly1305::new(key);
|
||||
let ciphertext = cipher
|
||||
.encrypt(&nonce, plaintext)
|
||||
.map_err(|_| std::io::Error::other("encryption failed"))?;
|
||||
|
||||
let mut output = Vec::with_capacity(nonce.len() + ciphertext.len());
|
||||
output.extend_from_slice(&nonce);
|
||||
output.extend_from_slice(&ciphertext);
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn flush_buffer(&mut self) -> Result<(), std::io::Error> {
|
||||
if !self.buffer.is_empty() {
|
||||
let encrypted = self.encrypt_chunk(&self.buffer)?;
|
||||
self.inner.write_all(&encrypted)?;
|
||||
self.buffer.clear();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Write + Seek> Write for ChaChaWriter<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
|
||||
self.buffer.extend_from_slice(buf);
|
||||
self.plaintext_bytes_written += buf.len() as u64;
|
||||
|
||||
let chunk_size = self.config.chunk_size as usize;
|
||||
while self.buffer.len() >= chunk_size {
|
||||
let encrypted = self.encrypt_chunk(&self.buffer[..chunk_size])?;
|
||||
self.inner.write_all(&encrypted)?;
|
||||
self.buffer.drain(..chunk_size);
|
||||
}
|
||||
|
||||
Ok(buf.len())
|
||||
}
|
||||
|
||||
/// Encrypts and flushes any buffered plaintext as a partial chunk.
|
||||
///
|
||||
/// Prefer [`finish`](Self::finish) to retrieve the inner writer after
|
||||
/// all data has been written. Calling `flush` multiple times will produce
|
||||
/// multiple small encrypted chunks for the same partial data.
|
||||
fn flush(&mut self) -> Result<(), std::io::Error> {
|
||||
self.flush_buffer()?;
|
||||
self.inner.flush()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::io::{Cursor, SeekFrom, Write};
|
||||
|
||||
use super::ChaChaWriter;
|
||||
use crate::{AsyncReader, AsyncSeekReader, ChaChaReader};
|
||||
|
||||
const KEY: [u8; 32] = [42u8; 32];
|
||||
|
||||
fn encrypt(data: &[u8]) -> Cursor<Vec<u8>> {
|
||||
let mut writer = ChaChaWriter::new(Cursor::new(Vec::new()), KEY).unwrap();
|
||||
writer.write_all(data).unwrap();
|
||||
let mut buf = writer.finish().unwrap();
|
||||
buf.set_position(0);
|
||||
buf
|
||||
}
|
||||
|
||||
async fn decrypt_all(buf: Cursor<Vec<u8>>) -> Vec<u8> {
|
||||
let mut reader = ChaChaReader::new(buf, KEY).unwrap();
|
||||
reader.read_to_end().await.unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn roundtrip_empty() {
|
||||
let buf = encrypt(&[]);
|
||||
// Header present but no chunks
|
||||
assert!(!buf.get_ref().is_empty());
|
||||
assert!(decrypt_all(buf).await.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn roundtrip_small() {
|
||||
let data = b"hello, world!";
|
||||
assert_eq!(decrypt_all(encrypt(data)).await, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn roundtrip_exact_chunk() {
|
||||
let data = vec![0xABu8; 65536];
|
||||
assert_eq!(decrypt_all(encrypt(&data)).await, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn roundtrip_multi_chunk() {
|
||||
// 2.5 chunks
|
||||
let data: Vec<u8> = (0u8..=255).cycle().take(65536 * 2 + 1000).collect();
|
||||
assert_eq!(decrypt_all(encrypt(&data)).await, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn roundtrip_incremental_writes() {
|
||||
// Write one byte at a time
|
||||
let data: Vec<u8> = (0u8..200).collect();
|
||||
let mut writer = ChaChaWriter::new(Cursor::new(Vec::new()), KEY).unwrap();
|
||||
for byte in &data {
|
||||
writer.write_all(&[*byte]).unwrap();
|
||||
}
|
||||
let mut buf = writer.finish().unwrap();
|
||||
buf.set_position(0);
|
||||
assert_eq!(decrypt_all(buf).await, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wrong_key_fails() {
|
||||
let buf = encrypt(b"secret data");
|
||||
let mut reader = ChaChaReader::new(buf, [0u8; 32]).unwrap();
|
||||
assert!(reader.read_to_end().await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn header_magic_checked() {
|
||||
// Corrupt the magic bytes — reader should fail
|
||||
let mut buf = encrypt(b"data");
|
||||
buf.get_mut()[0] = 0xFF;
|
||||
buf.set_position(0);
|
||||
assert!(ChaChaReader::new(buf, KEY).is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_from_start() {
|
||||
let data: Vec<u8> = (0u8..100).collect();
|
||||
let mut reader = ChaChaReader::new(encrypt(&data), KEY).unwrap();
|
||||
|
||||
reader.seek(SeekFrom::Start(50)).await.unwrap();
|
||||
let mut buf = [0u8; 10];
|
||||
let mut read = 0;
|
||||
while read < buf.len() {
|
||||
read += reader.read(&mut buf[read..]).await.unwrap();
|
||||
}
|
||||
assert_eq!(buf, data[50..60]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_from_end() {
|
||||
let data: Vec<u8> = (0u8..100).collect();
|
||||
let mut reader = ChaChaReader::new(encrypt(&data), KEY).unwrap();
|
||||
|
||||
reader.seek(SeekFrom::End(-10)).await.unwrap();
|
||||
assert_eq!(reader.read_to_end().await.unwrap(), &data[90..]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_across_chunk_boundary() {
|
||||
// Seek to 6 bytes before the end of chunk 0, read 12 bytes spanning into chunk 1
|
||||
let data: Vec<u8> = (0u8..=255).cycle().take(65536 + 500).collect();
|
||||
let mut reader = ChaChaReader::new(encrypt(&data), KEY).unwrap();
|
||||
|
||||
reader.seek(SeekFrom::Start(65530)).await.unwrap();
|
||||
let mut buf = vec![0u8; 12];
|
||||
let mut read = 0;
|
||||
while read < buf.len() {
|
||||
read += reader.read(&mut buf[read..]).await.unwrap();
|
||||
}
|
||||
assert_eq!(buf, data[65530..65542]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_current() {
|
||||
let data: Vec<u8> = (0u8..=255).cycle().take(200).collect();
|
||||
let mut reader = ChaChaReader::new(encrypt(&data), KEY).unwrap();
|
||||
|
||||
// Read 10, seek back 5, read 5 — should get bytes 5..10
|
||||
let mut first = [0u8; 10];
|
||||
let mut n = 0;
|
||||
while n < first.len() {
|
||||
n += reader.read(&mut first[n..]).await.unwrap();
|
||||
}
|
||||
reader.seek(SeekFrom::Current(-5)).await.unwrap();
|
||||
let mut second = [0u8; 5];
|
||||
n = 0;
|
||||
while n < second.len() {
|
||||
n += reader.read(&mut second[n..]).await.unwrap();
|
||||
}
|
||||
assert_eq!(second, data[5..10]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_past_end_clamps() {
|
||||
let data = b"hello";
|
||||
let mut reader = ChaChaReader::new(encrypt(data), KEY).unwrap();
|
||||
|
||||
let pos = reader.seek(SeekFrom::Start(9999)).await.unwrap();
|
||||
assert_eq!(pos, data.len() as u64);
|
||||
assert_eq!(reader.read_to_end().await.unwrap(), b"");
|
||||
}
|
||||
}
|
||||
95
crates/pile-io/src/chachawriter_async.rs
Normal file
95
crates/pile-io/src/chachawriter_async.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use std::io::SeekFrom;
|
||||
|
||||
use tokio::io::{AsyncSeek, AsyncSeekExt, AsyncWrite, AsyncWriteExt};
|
||||
|
||||
use crate::{ChaChaHeader, ChaChaReaderConfig};
|
||||
|
||||
pub struct ChaChaWriterAsync<W: AsyncWrite + AsyncSeek + Unpin + Send> {
|
||||
inner: W,
|
||||
config: ChaChaReaderConfig,
|
||||
encryption_key: [u8; 32],
|
||||
buffer: Vec<u8>,
|
||||
plaintext_bytes_written: u64,
|
||||
}
|
||||
|
||||
impl<W: AsyncWrite + AsyncSeek + Unpin + Send> ChaChaWriterAsync<W> {
|
||||
pub async fn new(mut inner: W, encryption_key: [u8; 32]) -> Result<Self, std::io::Error> {
|
||||
let config = ChaChaReaderConfig::default();
|
||||
let header_bytes = serialize_header(ChaChaHeader {
|
||||
chunk_size: config.chunk_size,
|
||||
nonce_size: config.nonce_size,
|
||||
tag_size: config.tag_size,
|
||||
plaintext_size: 0,
|
||||
})?;
|
||||
inner.write_all(&header_bytes).await?;
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
config,
|
||||
encryption_key,
|
||||
buffer: Vec::new(),
|
||||
plaintext_bytes_written: 0,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn write(&mut self, buf: &[u8]) -> Result<(), std::io::Error> {
|
||||
self.buffer.extend_from_slice(buf);
|
||||
self.plaintext_bytes_written += buf.len() as u64;
|
||||
|
||||
let chunk_size = self.config.chunk_size as usize;
|
||||
while self.buffer.len() >= chunk_size {
|
||||
let encrypted = encrypt_chunk(&self.encryption_key, &self.buffer[..chunk_size])?;
|
||||
self.inner.write_all(&encrypted).await?;
|
||||
self.buffer.drain(..chunk_size);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Encrypt and write any buffered plaintext, patch the header with the
|
||||
/// final `plaintext_size`, then return the inner writer.
|
||||
pub async fn finish(mut self) -> Result<W, std::io::Error> {
|
||||
if !self.buffer.is_empty() {
|
||||
let encrypted = encrypt_chunk(&self.encryption_key, &self.buffer)?;
|
||||
self.inner.write_all(&encrypted).await?;
|
||||
}
|
||||
|
||||
self.inner.seek(SeekFrom::Start(0)).await?;
|
||||
let header_bytes = serialize_header(ChaChaHeader {
|
||||
chunk_size: self.config.chunk_size,
|
||||
nonce_size: self.config.nonce_size,
|
||||
tag_size: self.config.tag_size,
|
||||
plaintext_size: self.plaintext_bytes_written,
|
||||
})?;
|
||||
self.inner.write_all(&header_bytes).await?;
|
||||
|
||||
Ok(self.inner)
|
||||
}
|
||||
}
|
||||
|
||||
fn encrypt_chunk(key: &[u8; 32], plaintext: &[u8]) -> Result<Vec<u8>, std::io::Error> {
|
||||
use chacha20poly1305::{
|
||||
XChaCha20Poly1305,
|
||||
aead::{Aead, AeadCore, KeyInit, OsRng},
|
||||
};
|
||||
|
||||
let nonce = XChaCha20Poly1305::generate_nonce(&mut OsRng);
|
||||
let cipher = XChaCha20Poly1305::new(chacha20poly1305::Key::from_slice(key));
|
||||
let ciphertext = cipher
|
||||
.encrypt(&nonce, plaintext)
|
||||
.map_err(|_| std::io::Error::other("encryption failed"))?;
|
||||
|
||||
let mut output = Vec::with_capacity(nonce.len() + ciphertext.len());
|
||||
output.extend_from_slice(&nonce);
|
||||
output.extend_from_slice(&ciphertext);
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn serialize_header(header: ChaChaHeader) -> Result<Vec<u8>, std::io::Error> {
|
||||
use binrw::BinWriterExt;
|
||||
use std::io::Cursor;
|
||||
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
buf.write_le(&header).map_err(std::io::Error::other)?;
|
||||
Ok(buf.into_inner())
|
||||
}
|
||||
17
crates/pile-io/src/lib.rs
Normal file
17
crates/pile-io/src/lib.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
mod asyncreader;
|
||||
pub use asyncreader::*;
|
||||
|
||||
mod s3reader;
|
||||
pub use s3reader::*;
|
||||
|
||||
mod chachareader;
|
||||
pub use chachareader::*;
|
||||
|
||||
mod chachawriter;
|
||||
pub use chachawriter::*;
|
||||
|
||||
mod chachareader_async;
|
||||
pub use chachareader_async::*;
|
||||
|
||||
mod chachawriter_async;
|
||||
pub use chachawriter_async::*;
|
||||
88
crates/pile-io/src/s3reader.rs
Normal file
88
crates/pile-io/src/s3reader.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
use smartstring::{LazyCompact, SmartString};
|
||||
use std::{io::SeekFrom, sync::Arc};
|
||||
|
||||
use crate::{AsyncReader, AsyncSeekReader};
|
||||
|
||||
pub struct S3Reader {
|
||||
pub client: Arc<aws_sdk_s3::Client>,
|
||||
pub bucket: SmartString<LazyCompact>,
|
||||
pub key: SmartString<LazyCompact>,
|
||||
pub cursor: u64,
|
||||
pub size: u64,
|
||||
}
|
||||
|
||||
impl AsyncReader for S3Reader {
|
||||
async fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
|
||||
let len_left = self.size.saturating_sub(self.cursor);
|
||||
if len_left == 0 || buf.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let start_byte = self.cursor;
|
||||
let len_to_read = (buf.len() as u64).min(len_left);
|
||||
let end_byte = start_byte + len_to_read - 1;
|
||||
|
||||
let resp = self
|
||||
.client
|
||||
.get_object()
|
||||
.bucket(self.bucket.as_str())
|
||||
.key(self.key.as_str())
|
||||
.range(format!("bytes={start_byte}-{end_byte}"))
|
||||
.send()
|
||||
.await
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
let bytes = resp
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.map(|x| x.into_bytes())
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
let n = bytes.len().min(buf.len());
|
||||
buf[..n].copy_from_slice(&bytes[..n]);
|
||||
self.cursor += n as u64;
|
||||
Ok(n)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncSeekReader for S3Reader {
|
||||
async fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
|
||||
match pos {
|
||||
SeekFrom::Start(x) => self.cursor = x.min(self.size),
|
||||
|
||||
SeekFrom::Current(x) => {
|
||||
if x < 0 {
|
||||
let abs = x.unsigned_abs();
|
||||
if abs > self.cursor {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"cannot seek past start",
|
||||
));
|
||||
}
|
||||
self.cursor -= abs;
|
||||
} else {
|
||||
self.cursor += x as u64;
|
||||
}
|
||||
}
|
||||
|
||||
std::io::SeekFrom::End(x) => {
|
||||
if x < 0 {
|
||||
let abs = x.unsigned_abs();
|
||||
if abs > self.size {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"cannot seek past start",
|
||||
));
|
||||
}
|
||||
self.cursor = self.size - abs;
|
||||
} else {
|
||||
self.cursor = self.size + x as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.cursor = self.cursor.min(self.size);
|
||||
Ok(self.cursor)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user