diff --git a/crates/pile/src/command/fields.rs b/crates/pile/src/command/fields.rs index b1596a0..8b29fed 100644 --- a/crates/pile/src/command/fields.rs +++ b/crates/pile/src/command/fields.rs @@ -1,5 +1,6 @@ use anyhow::{Context, Result}; use clap::Args; +use pile_config::objectpath::ObjectPath; use pile_dataset::Datasets; use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError}; use pile_value::{extract::traits::ExtractState, value::PileValue}; @@ -40,6 +41,10 @@ pub struct FieldsCommand { #[arg(long)] max_percent: Option, + /// Print counts of non-null schema fields instead of raw fields + #[arg(long)] + schema: bool, + /// Restrict to these sources (all sources if empty) #[arg(long, short = 's')] source: Vec, @@ -67,6 +72,17 @@ impl CliCmd for FieldsCommand { let jobs = self.jobs.max(1); let state = ExtractState { ignore_mime: false }; + // Pre-collect schema fields for the --schema mode + let schema_fields: Vec<(String, Vec)> = if self.schema { + ds.config + .schema + .iter() + .map(|(name, spec)| (name.to_string(), spec.path.clone())) + .collect() + } else { + Vec::new() + }; + for (name, dataset) in ds.sources.iter().filter(|(name, _)| { self.source.is_empty() || self.source.iter().any(|s| s == name.as_str()) }) { @@ -98,19 +114,50 @@ impl CliCmd for FieldsCommand { let item = item.clone(); let name = name.clone(); let state = state.clone(); - join_set.spawn(async move { - let item = PileValue::Item(item); - let result = item.count_fields(&state).await.with_context(|| { - format!("while counting fields in source {name}") - })?; - Ok(result.and_then(|v| { - if let Value::Object(m) = v { - Some(m) - } else { - None + if self.schema { + let schema_fields = schema_fields.clone(); + join_set.spawn(async move { + let pv = PileValue::Item(item); + let mut counts = Map::new(); + for (field_name, paths) in &schema_fields { + let mut present = false; + for path in paths { + let v = + pv.query(&state, path).await.with_context(|| { + format!( + "while extracting field {field_name} in source {name}" + ) + })?; + if let Some(v) = v + && !matches!(v, PileValue::Null) + { + present = true; + break; + } + } + counts.insert( + field_name.clone(), + Value::Number((present as u64).into()), + ); } - })) - }); + Ok(Some(counts)) + }); + } else { + join_set.spawn(async move { + let item = PileValue::Item(item); + let result = + item.count_fields(&state).await.with_context(|| { + format!("while counting fields in source {name}") + })?; + Ok(result.and_then(|v| { + if let Value::Object(m) = v { + Some(m) + } else { + None + } + })) + }); + } } } }