Add --schema arg to fields command
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
use anyhow::{Context, Result};
|
||||
use clap::Args;
|
||||
use pile_config::objectpath::ObjectPath;
|
||||
use pile_dataset::Datasets;
|
||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||
use pile_value::{extract::traits::ExtractState, value::PileValue};
|
||||
@@ -40,6 +41,10 @@ pub struct FieldsCommand {
|
||||
#[arg(long)]
|
||||
max_percent: Option<f64>,
|
||||
|
||||
/// Print counts of non-null schema fields instead of raw fields
|
||||
#[arg(long)]
|
||||
schema: bool,
|
||||
|
||||
/// Restrict to these sources (all sources if empty)
|
||||
#[arg(long, short = 's')]
|
||||
source: Vec<String>,
|
||||
@@ -67,6 +72,17 @@ impl CliCmd for FieldsCommand {
|
||||
let jobs = self.jobs.max(1);
|
||||
let state = ExtractState { ignore_mime: false };
|
||||
|
||||
// Pre-collect schema fields for the --schema mode
|
||||
let schema_fields: Vec<(String, Vec<ObjectPath>)> = if self.schema {
|
||||
ds.config
|
||||
.schema
|
||||
.iter()
|
||||
.map(|(name, spec)| (name.to_string(), spec.path.clone()))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
for (name, dataset) in ds.sources.iter().filter(|(name, _)| {
|
||||
self.source.is_empty() || self.source.iter().any(|s| s == name.as_str())
|
||||
}) {
|
||||
@@ -98,9 +114,39 @@ impl CliCmd for FieldsCommand {
|
||||
let item = item.clone();
|
||||
let name = name.clone();
|
||||
let state = state.clone();
|
||||
if self.schema {
|
||||
let schema_fields = schema_fields.clone();
|
||||
join_set.spawn(async move {
|
||||
let pv = PileValue::Item(item);
|
||||
let mut counts = Map::new();
|
||||
for (field_name, paths) in &schema_fields {
|
||||
let mut present = false;
|
||||
for path in paths {
|
||||
let v =
|
||||
pv.query(&state, path).await.with_context(|| {
|
||||
format!(
|
||||
"while extracting field {field_name} in source {name}"
|
||||
)
|
||||
})?;
|
||||
if let Some(v) = v
|
||||
&& !matches!(v, PileValue::Null)
|
||||
{
|
||||
present = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
counts.insert(
|
||||
field_name.clone(),
|
||||
Value::Number((present as u64).into()),
|
||||
);
|
||||
}
|
||||
Ok(Some(counts))
|
||||
});
|
||||
} else {
|
||||
join_set.spawn(async move {
|
||||
let item = PileValue::Item(item);
|
||||
let result = item.count_fields(&state).await.with_context(|| {
|
||||
let result =
|
||||
item.count_fields(&state).await.with_context(|| {
|
||||
format!("while counting fields in source {name}")
|
||||
})?;
|
||||
Ok(result.and_then(|v| {
|
||||
@@ -114,6 +160,7 @@ impl CliCmd for FieldsCommand {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Drain remaining tasks
|
||||
while let Some(result) = join_set.join_next().await {
|
||||
|
||||
Reference in New Issue
Block a user