Add --schema arg to fields command

This commit is contained in:
2026-03-28 11:29:19 -07:00
parent 60dc755561
commit 8b4dfb1a1a

View File

@@ -1,5 +1,6 @@
use anyhow::{Context, Result};
use clap::Args;
use pile_config::objectpath::ObjectPath;
use pile_dataset::Datasets;
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
use pile_value::{extract::traits::ExtractState, value::PileValue};
@@ -40,6 +41,10 @@ pub struct FieldsCommand {
#[arg(long)]
max_percent: Option<f64>,
/// Print counts of non-null schema fields instead of raw fields
#[arg(long)]
schema: bool,
/// Restrict to these sources (all sources if empty)
#[arg(long, short = 's')]
source: Vec<String>,
@@ -67,6 +72,17 @@ impl CliCmd for FieldsCommand {
let jobs = self.jobs.max(1);
let state = ExtractState { ignore_mime: false };
// Pre-collect schema fields for the --schema mode
let schema_fields: Vec<(String, Vec<ObjectPath>)> = if self.schema {
ds.config
.schema
.iter()
.map(|(name, spec)| (name.to_string(), spec.path.clone()))
.collect()
} else {
Vec::new()
};
for (name, dataset) in ds.sources.iter().filter(|(name, _)| {
self.source.is_empty() || self.source.iter().any(|s| s == name.as_str())
}) {
@@ -98,19 +114,50 @@ impl CliCmd for FieldsCommand {
let item = item.clone();
let name = name.clone();
let state = state.clone();
join_set.spawn(async move {
let item = PileValue::Item(item);
let result = item.count_fields(&state).await.with_context(|| {
format!("while counting fields in source {name}")
})?;
Ok(result.and_then(|v| {
if let Value::Object(m) = v {
Some(m)
} else {
None
if self.schema {
let schema_fields = schema_fields.clone();
join_set.spawn(async move {
let pv = PileValue::Item(item);
let mut counts = Map::new();
for (field_name, paths) in &schema_fields {
let mut present = false;
for path in paths {
let v =
pv.query(&state, path).await.with_context(|| {
format!(
"while extracting field {field_name} in source {name}"
)
})?;
if let Some(v) = v
&& !matches!(v, PileValue::Null)
{
present = true;
break;
}
}
counts.insert(
field_name.clone(),
Value::Number((present as u64).into()),
);
}
}))
});
Ok(Some(counts))
});
} else {
join_set.spawn(async move {
let item = PileValue::Item(item);
let result =
item.count_fields(&state).await.with_context(|| {
format!("while counting fields in source {name}")
})?;
Ok(result.and_then(|v| {
if let Value::Object(m) = v {
Some(m)
} else {
None
}
}))
});
}
}
}
}