diff --git a/README.md b/README.md new file mode 120000 index 0000000..be3c437 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +crates/datapath/README.md \ No newline at end of file diff --git a/crates/datapath/README.md b/crates/datapath/README.md new file mode 100644 index 0000000..8bedf31 --- /dev/null +++ b/crates/datapath/README.md @@ -0,0 +1,89 @@ +# Datapath: type-safe structured paths + +Provides simple macros and utilities for type-safe structured paths. +This is intended for use with S3 and [duckdb hive partitions](https://duckdb.org/docs/stable/data/partitioning/hive_partitioning), or simple S3 paths for [lancedb](https://lancedb.com). + + +## Basic Usage + +```rust +use datapath::{datapath, Datapath}; +use uuid::Uuid; + +/// Define a datapath pattern +datapath! { + struct CaptureRaw(capture/user_id=Uuid/ts=i64/raw/2.0); +} + +// Create a datapath instance +let path = CaptureRaw { + user_id: Uuid::new_v4(), + ts: 1234567890, +}; + +println!("{}", path); // "capture/user_id=/ts=1234567890/raw/2.0" + +let file = path.with_file("data.json"); +println!("{}", file); // "capture/user_id=/ts=1234567890/raw/2.0/data.json" + +let parsed = CaptureRaw::parse("capture/user_id=550e8400-e29b-41d4-a716-446655440000/ts=1234567890/raw/2.0/data.json"); +match parsed { + Some(datapath_file) => { + println!("User ID: {}", datapath_file.path.user_id); + println!("Timestamp: {}", datapath_file.path.ts); + println!("File: {}", datapath_file.file); + } + None => println!("Invalid path format"), +} +``` + +## Schema Associations + +Associate datapaths with schema types for type-safe data handling: + +```rust +use datapath::datapath; + +pub struct UserEvent { + pub action: String, + pub timestamp: i64, +} + +datapath! { + struct EventPath { + pattern: events/user_id=String/date=i64/"v1.0" + schema: UserEvent + }; +} + +// EventPath now implements SchemaDatapath +// EventPath::Schema == UserEvent +``` + +## Examples + +```rust +use datapath::datapath; + +pub struct MetricsSchema; + +datapath! { + // Constant segments (identifiers) + struct SimplePath(data/logs/events); + + // String literal constants (for segments with dashes) + struct QuotedPath("web-data"/"user-logs"/2024); + + // Typed partitions with identifier keys + struct TypedPath(domain/user_id=uuid::Uuid/timestamp=i64); + + // Typed partitions with dashes + struct QuotedKeys("service-name"=String/"request-id"=uuid::Uuid); + + // With schema association + struct MetricsData { + pattern: metrics/service=String/timestamp=i64/"v1.0" + schema: MetricsSchema + }; +} +``` \ No newline at end of file diff --git a/crates/datapath/src/lib.rs b/crates/datapath/src/lib.rs index e833fbd..58b7f2d 100644 --- a/crates/datapath/src/lib.rs +++ b/crates/datapath/src/lib.rs @@ -1,3 +1,8 @@ +// this readme is symlinked to the root of the repo, +// because cargo publish does odd things with paths. +// a relative path to the root readme will NOT work. +#![doc = include_str!("../README.md")] + // silence linter, used in README #[cfg(test)] use uuid as _;