Compare commits

..

3 Commits

Author SHA1 Message Date
e5193a1114 TMP sidecars
Some checks failed
CI / Typos (push) Successful in 30s
CI / Clippy (push) Failing after 1m13s
CI / Build and test (all features) (push) Successful in 4m25s
CI / Build and test (push) Successful in 5m45s
2026-03-16 20:26:41 -07:00
f2f5726d7b FLAC error edits 2026-03-16 19:17:51 -07:00
b1f76b0741 Cross-platform builds 2026-03-16 19:03:14 -07:00
15 changed files with 336 additions and 287 deletions

View File

@@ -1,12 +1,13 @@
use serde::Deserialize;
use std::{collections::HashMap, fmt::Debug, path::PathBuf};
use crate::{objectpath::ObjectPath, pattern::GroupPattern};
mod misc;
pub use misc::*;
use crate::objectpath::ObjectPath;
pub mod objectpath;
pub mod pattern;
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
@@ -59,13 +60,9 @@ pub enum Source {
/// Must be relative.
path: PathBuf,
/// If true, all toml files are ignored.
/// Metadata can be added to any file using a {filename}.toml.
///
/// If false, toml files are treated as regular files
/// and sidecar metadata is disabled.
#[serde(default = "default_true")]
sidecars: bool,
/// How to group files into items in this source
#[serde(default)]
pattern: GroupPattern,
},
/// An S3-compatible object store bucket
@@ -84,9 +81,9 @@ pub enum Source {
credentials: S3Credentials,
/// If true, all .toml objects are treated as sidecar metadata files.
#[serde(default = "default_true")]
sidecars: bool,
/// How to group files into items in this source
#[serde(default)]
pattern: GroupPattern,
},
}

View File

@@ -58,7 +58,7 @@ pub enum PathSegment {
/// - `$` refers to the root object
/// - `.<name>` selects aPathSegment::Field of an object
/// - `[n]` selects an item of an array
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ObjectPath {
pub segments: Vec<PathSegment>,
}

View File

@@ -0,0 +1,49 @@
use std::collections::HashMap;
use serde::{Deserialize, Deserializer, de};
use smartstring::{LazyCompact, SmartString};
use thiserror::Error;
use crate::{Label, objectpath::PathParseError as ObjectPathError};
mod parser;
pub use parser::GroupSegment;
#[derive(Debug, Error, PartialEq)]
pub enum GroupPatternParseError {
/// A `{` or `}` appeared in an invalid position, or a `{` was never closed.
#[error("syntax error at index {position}")]
Syntax { position: usize },
/// The contents of a `{...}` block could not be parsed as an object path.
#[error("invalid object path {path:?}: {source}")]
InvalidObjectPath {
start: usize,
end: usize,
path: SmartString<LazyCompact>,
source: ObjectPathError,
},
}
#[derive(Debug, Clone, Default)]
pub struct GroupPattern {
pub parts: HashMap<Label, Vec<GroupSegment>>,
}
impl<'de> Deserialize<'de> for GroupPattern {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let raw = HashMap::<String, String>::deserialize(deserializer)?;
let mut parts = HashMap::with_capacity(raw.len());
for (key, value) in raw {
let label = Label::try_from(key.as_str()).map_err(de::Error::custom)?;
let segments = parser::Parser::new()
.parse(&value)
.map_err(de::Error::custom)?
.into_iter()
.map(|(_, seg)| seg)
.collect();
parts.insert(label, segments);
}
Ok(GroupPattern { parts })
}
}

View File

@@ -0,0 +1,195 @@
use smartstring::{LazyCompact, SmartString};
use crate::{objectpath::ObjectPath, pattern::GroupPatternParseError};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug, Clone)]
pub enum GroupSegment {
Path(ObjectPath),
Literal(SmartString<LazyCompact>),
}
pub struct Parser {}
impl Parser {
pub fn new() -> Self {
Self {}
}
/// Parse a pattern string of the form `{path}.literal{path}...`.
///
/// - `{...}` delimiters are parsed as [`ObjectPath`] expressions.
/// Nested `{}` inside a path are allowed; depth is tracked to find the
/// matching closing brace.
/// - Everything outside `{...}` is a `Literal` segment.
/// - A bare `}` in literal position (depth == 0) is a syntax error.
/// - An unclosed `{` is a syntax error.
pub fn parse(self, source: &str) -> Result<Vec<(usize, GroupSegment)>, GroupPatternParseError> {
let mut tokens = Vec::new();
// `depth` > 0 means we are currently inside a `{...}` path expression.
let mut depth: usize = 0;
// Start of the current segment (literal text or path content).
let mut window_start: usize = 0;
// Source position of the opening `{` for the current path (used for error reporting).
let mut open_brace: usize = 0;
for (i, c) in source.char_indices() {
match c {
'{' => {
if depth == 0 {
// Emit any accumulated literal.
if i > window_start {
tokens.push((
window_start,
GroupSegment::Literal(source[window_start..i].into()),
));
}
open_brace = i;
// Path content starts after the opening brace.
window_start = i + 1;
depth = 1;
} else {
// Nested brace inside a path — keep counting.
depth += 1;
}
}
'}' => {
if depth == 0 {
// Unmatched `}` outside any path.
return Err(GroupPatternParseError::Syntax { position: i });
}
depth -= 1;
if depth == 0 {
// Closing brace of the outermost path expression — parse as ObjectPath.
let path_str = &source[window_start..i];
let path = path_str.parse::<ObjectPath>().map_err(|e| {
GroupPatternParseError::InvalidObjectPath {
start: open_brace,
end: i + 1,
path: path_str.into(),
source: e,
}
})?;
tokens.push((open_brace, GroupSegment::Path(path)));
// Literal content (if any) starts after this `}`.
window_start = i + 1;
}
}
_ => {}
}
}
// Unclosed `{`.
if depth > 0 {
return Err(GroupPatternParseError::Syntax {
position: open_brace,
});
}
// Emit any trailing literal.
if window_start < source.len() {
tokens.push((
window_start,
GroupSegment::Literal(source[window_start..].into()),
));
}
Ok(tokens)
}
}
//
// MARK: tests
//
#[expect(clippy::unwrap_used)]
#[cfg(test)]
mod tests {
use super::*;
fn parse(source: &str) -> Result<Vec<(usize, GroupSegment)>, GroupPatternParseError> {
Parser::new().parse(source)
}
fn path(s: &str) -> GroupSegment {
GroupSegment::Path(s.parse().unwrap())
}
fn lit(s: &str) -> GroupSegment {
GroupSegment::Literal(s.into())
}
#[test]
fn single_path() {
assert_eq!(parse("{$.foo}").unwrap(), vec![(0, path("$.foo"))]);
}
#[test]
fn single_literal() {
assert_eq!(parse("hello").unwrap(), vec![(0, lit("hello"))]);
}
#[test]
fn path_then_literal() {
assert_eq!(
parse("{$.foo}.txt").unwrap(),
vec![(0, path("$.foo")), (7, lit(".txt"))]
);
}
#[test]
fn literal_then_path() {
assert_eq!(
parse("prefix/{$.foo}").unwrap(),
vec![(0, lit("prefix/")), (7, path("$.foo"))]
);
}
#[test]
fn interleaved() {
assert_eq!(
parse("{$.a}.sep.{$.b}").unwrap(),
vec![(0, path("$.a")), (5, lit(".sep.")), (10, path("$.b")),]
);
}
#[test]
fn unmatched_open_brace_error() {
assert_eq!(
parse("{$.foo"),
Err(GroupPatternParseError::Syntax { position: 0 })
);
}
#[test]
fn unmatched_close_brace_in_literal_error() {
assert_eq!(
parse("foo}bar"),
Err(GroupPatternParseError::Syntax { position: 3 })
);
}
#[test]
fn invalid_path_error() {
assert_eq!(
parse("{not-a-path}"),
Err(GroupPatternParseError::InvalidObjectPath {
start: 0,
end: 12,
path: "not-a-path".into(),
source: crate::objectpath::PathParseError::MustStartWithRoot { position: 0 },
})
);
}
#[test]
fn literal_between_paths() {
assert_eq!(
parse("foo{$.x}bar").unwrap(),
vec![(0, lit("foo")), (3, path("$.x")), (8, lit("bar")),]
);
}
}

View File

@@ -118,7 +118,7 @@ impl Datasets {
Source::Filesystem {
enabled,
path,
sidecars,
pattern,
} => {
if !enabled {
continue;
@@ -129,7 +129,7 @@ impl Datasets {
Dataset::Dir(Arc::new(DirDataSource::new(
label,
path_parent.join(path),
*sidecars,
pattern.clone(),
))),
);
}
@@ -141,7 +141,7 @@ impl Datasets {
endpoint,
region,
credentials,
sidecars,
pattern,
} => {
if !enabled {
continue;
@@ -154,7 +154,7 @@ impl Datasets {
endpoint.clone(),
region.clone(),
credentials,
*sidecars,
pattern.clone(),
) {
Ok(ds) => {
sources.insert(label.clone(), Dataset::S3(Arc::new(ds)));

View File

@@ -1,10 +1,20 @@
use std::io::{Read, Seek, SeekFrom};
use std::io::{ErrorKind, Read, Seek, SeekFrom};
use crate::{
FlacBlock, FlacDecodeError,
blocks::{FlacAudioFrame, FlacMetablockHeader, FlacMetablockType},
};
fn read_exact_flac<R: Read>(reader: &mut R, buf: &mut [u8]) -> Result<(), FlacDecodeError> {
reader.read_exact(buf).map_err(|e| {
if e.kind() == ErrorKind::UnexpectedEof {
FlacDecodeError::MalformedBlock
} else {
e.into()
}
})
}
// TODO: quickly skip blocks we do not need
/// The next block we expect to read
@@ -42,9 +52,9 @@ impl<R: Read + Seek> Iterator for FlacReader<R> {
ReaderState::MagicBits => {
let mut data = [0u8; 4];
if let Err(e) = self.inner.read_exact(&mut data[..4]) {
if let Err(e) = read_exact_flac(&mut self.inner, &mut data[..4]) {
self.state = ReaderState::Done;
return Some(Err(e.into()));
return Some(Err(e));
}
if data != [0x66, 0x4C, 0x61, 0x43] {
@@ -57,9 +67,9 @@ impl<R: Read + Seek> Iterator for FlacReader<R> {
ReaderState::MetablockHeader { is_first } => {
let mut data = [0u8; 4];
if let Err(e) = self.inner.read_exact(&mut data[..]) {
if let Err(e) = read_exact_flac(&mut self.inner, &mut data[..]) {
self.state = ReaderState::Done;
return Some(Err(e.into()));
return Some(Err(e));
}
let header = match FlacMetablockHeader::decode(&data) {
@@ -80,9 +90,9 @@ impl<R: Read + Seek> Iterator for FlacReader<R> {
ReaderState::MetaBlock { header } => {
let mut data = vec![0u8; header.length as usize];
if let Err(e) = self.inner.read_exact(&mut data) {
if let Err(e) = read_exact_flac(&mut self.inner, &mut data) {
self.state = ReaderState::Done;
return Some(Err(e.into()));
return Some(Err(e));
}
let block = match FlacBlock::decode(header.block_type, &data) {

View File

@@ -1,7 +1,34 @@
use std::env;
use std::path::PathBuf;
const PDFIUM_URL: &str = "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium%2F7725/pdfium-linux-x64.tgz";
const PDFIUM_VERSION: &str = "chromium%2F7725";
fn pdfium_url(os: &str, arch: &str) -> String {
let platform = match (os, arch) {
("linux", "x86_64") => "linux-x64",
("linux", "aarch64") => "linux-arm64",
("macos", "x86_64") => "mac-x64",
("macos", "aarch64") => "mac-arm64",
_ => panic!("unsupported platform: {os}-{arch}"),
};
format!(
"https://github.com/bblanchon/pdfium-binaries/releases/download/{PDFIUM_VERSION}/pdfium-{platform}.tgz"
)
}
fn lib_name(os: &str) -> &'static str {
match os {
"macos" => "libpdfium.dylib",
_ => "libpdfium.so",
}
}
fn rpath_flag(os: &str) -> &'static str {
match os {
"macos" => "-Wl,-rpath,@loader_path",
_ => "-Wl,-rpath,$ORIGIN",
}
}
#[expect(clippy::expect_used)]
#[expect(clippy::unwrap_used)]
@@ -13,6 +40,9 @@ fn main() {
return;
}
let os = env::var("CARGO_CFG_TARGET_OS").unwrap();
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
// OUT_DIR is target/<profile>/build/<pkg>-<hash>/out
@@ -30,15 +60,17 @@ fn main() {
return;
}
let lib_path = profile_dir.join("libpdfium.so");
let lib_file = lib_name(&os);
let lib_path = profile_dir.join(lib_file);
if !lib_path.exists() {
let url = pdfium_url(&os, &arch);
let tgz_path = out_dir.join("pdfium.tgz");
eprintln!("cargo:warning=Downloading PDFium from {PDFIUM_URL}");
eprintln!("cargo:warning=Downloading PDFium from {url}");
let status = std::process::Command::new("curl")
.args(["-L", "--fail", "-o", tgz_path.to_str().unwrap(), PDFIUM_URL])
.args(["-L", "--fail", "-o", tgz_path.to_str().unwrap(), &url])
.status()
.expect("failed to run curl");
assert!(status.success(), "curl failed to download PDFium");
@@ -54,11 +86,11 @@ fn main() {
.expect("failed to run tar");
assert!(status.success(), "tar failed to extract PDFium");
std::fs::copy(out_dir.join("lib").join("libpdfium.so"), &lib_path)
.expect("failed to copy libpdfium.so");
std::fs::copy(out_dir.join("lib").join(lib_file), &lib_path)
.expect("failed to copy pdfium library");
}
println!("cargo:rustc-link-search=native={}", profile_dir.display());
println!("cargo:rustc-link-lib=dylib=pdfium");
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
println!("cargo:rustc-link-arg={}", rpath_flag(&os));
}

View File

@@ -25,9 +25,6 @@ mod toml;
use pile_config::Label;
pub use toml::*;
mod sidecar;
pub use sidecar::*;
use crate::{
extract::{
misc::MapExtractor,
@@ -77,10 +74,6 @@ impl ItemExtractor {
Label::new("toml").unwrap(),
PileValue::ObjectExtractor(Arc::new(TomlExtractor::new(item))),
),
(
Label::new("sidecar").unwrap(),
PileValue::ObjectExtractor(Arc::new(SidecarExtractor::new(item))),
),
]),
};
@@ -109,7 +102,6 @@ impl ObjectExtractor for ItemExtractor {
Label::new("exif").unwrap(),
Label::new("pdf").unwrap(),
Label::new("json").unwrap(),
Label::new("sidecar").unwrap(),
]);
}
}

View File

@@ -1,57 +0,0 @@
use pile_config::Label;
use std::sync::OnceLock;
use tracing::trace;
use super::TomlExtractor;
use crate::{
extract::traits::{ExtractState, ObjectExtractor},
value::{Item, PileValue},
};
pub struct SidecarExtractor {
item: Item,
output: OnceLock<Option<TomlExtractor>>,
}
impl SidecarExtractor {
pub fn new(item: &Item) -> Self {
Self {
item: item.clone(),
output: OnceLock::new(),
}
}
}
#[async_trait::async_trait]
impl ObjectExtractor for SidecarExtractor {
async fn field(
&self,
state: &ExtractState,
name: &Label,
args: Option<&str>,
) -> Result<Option<PileValue>, std::io::Error> {
trace!(
?args,
key = self.item.key().as_str(),
"Getting field {name:?} from SidecarExtractor",
);
match self
.output
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new))
{
Some(x) => Ok(x.field(state, name, args).await?),
None => Ok(Some(PileValue::Null)),
}
}
async fn fields(&self) -> Result<Vec<Label>, std::io::Error> {
match self
.output
.get_or_init(|| self.item.sidecar().map(TomlExtractor::new))
{
Some(x) => Ok(x.fields().await?),
None => Ok(Vec::new()),
}
}
}

View File

@@ -1,5 +1,5 @@
use chrono::{DateTime, Utc};
use pile_config::Label;
use pile_config::{Label, pattern::GroupPattern};
use std::{path::PathBuf, sync::Arc};
use tokio_stream::wrappers::ReceiverStream;
use walkdir::WalkDir;
@@ -13,16 +13,15 @@ use crate::{
pub struct DirDataSource {
pub name: Label,
pub dir: PathBuf,
pub sidecars: bool,
pub pattern: GroupPattern,
}
impl DirDataSource {
pub fn new(name: &Label, dir: PathBuf, sidecars: bool) -> Self {
pub fn new(name: &Label, dir: PathBuf, pattern: GroupPattern) -> Self {
Self {
name: name.clone(),
dir,
sidecars,
pattern,
}
}
}
@@ -38,29 +37,11 @@ impl DataSource for Arc<DirDataSource> {
return Ok(None);
}
// Ignore toml files if sidecars are enabled
if self.sidecars && key.extension().and_then(|x| x.to_str()) == Some("toml") {
return Ok(None);
}
return Ok(Some(Item::File {
source: Arc::clone(self),
mime: mime_guess::from_path(&key).first_or_octet_stream(),
path: key.clone(),
sidecar: self
.sidecars
.then(|| {
let sidecar_path = key.with_extension("toml");
sidecar_path.is_file().then(|| {
Box::new(Item::File {
source: Arc::clone(self),
mime: mime_guess::from_path(&sidecar_path).first_or_octet_stream(),
path: sidecar_path,
sidecar: None,
})
})
})
.flatten(),
group: todo!(),
}));
}
@@ -91,27 +72,12 @@ impl DataSource for Arc<DirDataSource> {
let item = match path.extension().and_then(|x| x.to_str()) {
None => continue,
Some("toml") if source.sidecars => continue,
Some(_) => Item::File {
source: Arc::clone(&source),
mime: mime_guess::from_path(&path).first_or_octet_stream(),
path: path.clone(),
sidecar: source
.sidecars
.then(|| {
let sidecar_path = path.with_extension("toml");
sidecar_path.is_file().then(|| {
Box::new(Item::File {
source: Arc::clone(&source),
mime: mime_guess::from_path(&sidecar_path)
.first_or_octet_stream(),
path: sidecar_path,
sidecar: None,
})
})
})
.flatten(),
group: todo!(),
},
};

View File

@@ -1,6 +1,6 @@
use aws_sdk_s3::config::{BehaviorVersion, Credentials, Region};
use chrono::{DateTime, Utc};
use pile_config::{Label, S3Credentials};
use pile_config::{Label, S3Credentials, pattern::GroupPattern};
use smartstring::{LazyCompact, SmartString};
use std::sync::Arc;
use tokio_stream::wrappers::ReceiverStream;
@@ -12,8 +12,8 @@ pub struct S3DataSource {
pub name: Label,
pub bucket: SmartString<LazyCompact>,
pub prefix: Option<SmartString<LazyCompact>>,
pub sidecars: bool,
pub client: Arc<aws_sdk_s3::Client>,
pub pattern: GroupPattern,
}
impl S3DataSource {
@@ -24,7 +24,7 @@ impl S3DataSource {
endpoint: Option<String>,
region: String,
credentials: &S3Credentials,
sidecars: bool,
pattern: GroupPattern,
) -> Result<Self, std::io::Error> {
let client = {
let creds = Credentials::new(
@@ -51,8 +51,8 @@ impl S3DataSource {
name: name.clone(),
bucket: bucket.into(),
prefix: prefix.map(|x| x.into()),
sidecars,
client: Arc::new(client),
pattern,
})
}
@@ -99,36 +99,17 @@ impl S3DataSource {
let mime = mime_guess::from_path(object_path.as_str()).first_or_octet_stream();
let sidecar = if self.sidecars {
self.find_sidecar_key(object_path.as_str())
.await
.map(|sidecar_key| {
Box::new(Item::S3 {
source: Arc::clone(self),
mime: mime_guess::from_path(sidecar_key.as_str()).first_or_octet_stream(),
key: sidecar_key,
sidecar: None,
})
})
} else {
None
};
Item::S3 {
source: Arc::clone(self),
mime,
key,
sidecar,
group: todo!(),
}
}
}
impl DataSource for Arc<S3DataSource> {
async fn get(&self, key: &str) -> Result<Option<Item>, std::io::Error> {
if self.sidecars && key.ends_with(".toml") {
return Ok(None);
}
let key: SmartString<LazyCompact> = key.into();
let key = match &self.prefix {
Some(x) => format!("{x}/{key}").into(),
@@ -196,10 +177,6 @@ impl DataSource for Arc<S3DataSource> {
None => continue,
};
if source.sidecars && key.ends_with(".toml") {
continue;
}
let item = source.make_item(key).await;
if tx.send(Ok(item)).await.is_err() {

View File

@@ -1,6 +1,7 @@
use mime::Mime;
use pile_config::Label;
use smartstring::{LazyCompact, SmartString};
use std::{fs::File, path::PathBuf, sync::Arc};
use std::{collections::HashMap, fs::File, path::PathBuf, sync::Arc};
use crate::{
source::{DirDataSource, S3DataSource},
@@ -19,7 +20,7 @@ pub enum Item {
mime: Mime,
path: PathBuf,
sidecar: Option<Box<Item>>,
group: Arc<HashMap<Label, Box<Item>>>,
},
S3 {
@@ -27,7 +28,7 @@ pub enum Item {
mime: Mime,
key: SmartString<LazyCompact>,
sidecar: Option<Box<Item>>,
group: Arc<HashMap<Label, Box<Item>>>,
},
}
@@ -96,10 +97,10 @@ impl Item {
}
}
pub fn sidecar(&self) -> Option<&Self> {
pub fn group(&self) -> &HashMap<Label, Box<Self>> {
match self {
Self::File { sidecar, .. } => sidecar.as_ref().map(|x| &**x),
Self::S3 { sidecar, .. } => sidecar.as_ref().map(|x| &**x),
Self::File { group, .. } => group,
Self::S3 { group, .. } => group,
}
}
}

View File

@@ -1,108 +0,0 @@
use anyhow::{Context, Result};
use clap::Args;
use pile_config::{Label, Source};
use pile_dataset::{Datasets, index::DbFtsIndex};
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
use pile_value::{
extract::traits::ExtractState,
source::{DataSource, DirDataSource},
value::{Item, PileValue},
};
use std::{path::PathBuf, sync::Arc};
use tokio_stream::StreamExt;
use tracing::{info, warn};
use crate::{CliCmd, GlobalContext};
#[derive(Debug, Args)]
pub struct AnnotateCommand {
/// The schema field to read (must be defined in pile.toml)
field: String,
/// Sidecar path to write to (e.g. meta.title)
dest: String,
/// Path to dataset config
#[arg(long, short = 'c', default_value = "./pile.toml")]
config: PathBuf,
}
impl AnnotateCommand {
fn parse_dest(dest: &str) -> Result<Vec<Label>> {
dest.split('.')
.map(|s| {
Label::new(s).ok_or_else(|| anyhow::anyhow!("invalid label {s:?} in dest path"))
})
.collect()
}
}
impl CliCmd for AnnotateCommand {
async fn run(
self,
_ctx: GlobalContext,
_flag: CancelFlag,
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
let field = Label::new(&self.field)
.ok_or_else(|| anyhow::anyhow!("invalid field name {:?}", self.field))?;
let dest_path = Self::parse_dest(&self.dest)?;
let state = ExtractState { ignore_mime: false };
let ds = Datasets::open(&self.config)
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
if !ds.config.schema.contains_key(&field) {
return Err(anyhow::anyhow!("field {:?} is not defined in schema", self.field).into());
}
let index = DbFtsIndex::new(&ds.path_workdir, &ds.config);
let count = 0u64;
for (name, source) in &ds.config.dataset.source {
match source {
Source::Filesystem { path, sidecars, .. } => {
if !sidecars {
warn!("Source {name} does not have sidecars enabled, skipping");
continue;
}
let source = Arc::new(DirDataSource::new(name, path.clone(), *sidecars));
let mut stream = source.iter();
while let Some(res) = stream.next().await {
let item = res.with_context(|| format!("while reading source {name}"))?;
let Item::File { path, .. } = &item else {
continue;
};
let item = PileValue::Item(item.clone());
let vals =
index
.get_field(&state, &item, &field)
.await
.with_context(|| {
format!("while extracting field from {}", path.display())
})?;
// TODO: implement sidecar writing
let _ = (&dest_path, &vals);
todo!("write_sidecar not yet implemented");
#[expect(unreachable_code)]
{
count += 1;
}
}
}
Source::S3 { .. } => {
warn!("Source {name} is an S3 source; sidecar annotation is not yet supported");
}
}
}
info!("Annotated {count} items");
return Ok(0);
}
}

View File

@@ -4,7 +4,6 @@ use pile_toolbox::cancelabletask::{
CancelFlag, CancelableTask, CancelableTaskError, CancelableTaskResult,
};
mod annotate;
mod check;
mod fields;
mod index;
@@ -23,12 +22,6 @@ pub enum SubCommand {
#[clap(alias = "doc")]
Docs {},
/// Annotate all items with a field, writing it to a sidecar path
Annotate {
#[command(flatten)]
cmd: annotate::AnnotateCommand,
},
/// Create an empty dataset
Init {
#[command(flatten)]
@@ -88,7 +81,6 @@ pub enum SubCommand {
impl CliCmdDispatch for SubCommand {
fn start(self, ctx: GlobalContext) -> Result<CancelableTask<Result<i32>>> {
match self {
Self::Annotate { cmd } => cmd.start(ctx),
Self::Init { cmd } => cmd.start(ctx),
Self::Check { cmd } => cmd.start(ctx),
Self::Index { cmd } => cmd.start(ctx),

View File

@@ -42,7 +42,10 @@ pileRustPlatform.buildRustPackage {
postInstall = ''
wrapProgram $out/bin/pile \
--prefix LD_LIBRARY_PATH : ${pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ]}
${if pkgs.stdenv.isDarwin then
"--prefix DYLD_LIBRARY_PATH : ${pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ]}"
else
"--prefix LD_LIBRARY_PATH : ${pkgs.lib.makeLibraryPath [ pkgs.pdfium-binaries ]}"}
'';
meta = {