Refactor grouping
This commit is contained in:
@@ -1,27 +1,25 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use pile_config::{
|
||||
Label,
|
||||
pattern::{GroupPattern, GroupSegment},
|
||||
};
|
||||
use pile_config::Label;
|
||||
use regex::Regex;
|
||||
use smartstring::{LazyCompact, SmartString};
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
collections::{BTreeMap, HashMap},
|
||||
path::PathBuf,
|
||||
sync::{Arc, OnceLock},
|
||||
};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::{
|
||||
extract::traits::ExtractState,
|
||||
source::{DataSource, misc::path_ts_latest},
|
||||
value::{Item, PileValue},
|
||||
value::{BinaryPileValue, Item, PileValue},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DirDataSource {
|
||||
pub name: Label,
|
||||
pub dir: PathBuf,
|
||||
pub pattern: GroupPattern,
|
||||
pub base_pattern: Regex,
|
||||
pub files: HashMap<Label, String>,
|
||||
pub index: OnceLock<BTreeMap<SmartString<LazyCompact>, Item>>,
|
||||
}
|
||||
|
||||
@@ -29,21 +27,18 @@ impl DirDataSource {
|
||||
pub async fn new(
|
||||
name: &Label,
|
||||
dir: PathBuf,
|
||||
pattern: GroupPattern,
|
||||
base_pattern: Regex,
|
||||
files: HashMap<Label, String>,
|
||||
) -> Result<Arc<Self>, std::io::Error> {
|
||||
let source = Arc::new(Self {
|
||||
name: name.clone(),
|
||||
dir,
|
||||
pattern,
|
||||
base_pattern,
|
||||
files,
|
||||
index: OnceLock::new(),
|
||||
});
|
||||
|
||||
//
|
||||
// MARK: list paths
|
||||
//
|
||||
|
||||
let mut paths_items = HashSet::new();
|
||||
let mut paths_grouped_items = HashSet::new();
|
||||
let mut index = BTreeMap::new();
|
||||
'entry: for entry in WalkDir::new(&source.dir) {
|
||||
let entry = match entry {
|
||||
Err(e) => {
|
||||
@@ -59,51 +54,52 @@ impl DirDataSource {
|
||||
}
|
||||
|
||||
let path = entry.into_path();
|
||||
let path_str = match path.to_str() {
|
||||
let rel_path = match path.strip_prefix(&source.dir) {
|
||||
Ok(p) => p,
|
||||
Err(_) => continue 'entry,
|
||||
};
|
||||
let path_str = match rel_path.to_str() {
|
||||
Some(x) => x,
|
||||
None => continue 'entry,
|
||||
};
|
||||
|
||||
let groups = resolve_groups(&source.pattern, path_str).await;
|
||||
paths_grouped_items.extend(groups.into_values());
|
||||
paths_items.insert(path);
|
||||
}
|
||||
|
||||
//
|
||||
// MARK: resolve groups
|
||||
//
|
||||
|
||||
let mut index = BTreeMap::new();
|
||||
'entry: for path in paths_items.difference(&paths_grouped_items) {
|
||||
let path_str = match path.to_str() {
|
||||
Some(x) => x,
|
||||
let captures = match source.base_pattern.captures(path_str) {
|
||||
Some(c) => c,
|
||||
None => continue 'entry,
|
||||
};
|
||||
let base = match captures.get(1) {
|
||||
Some(m) => m.as_str(),
|
||||
None => continue 'entry,
|
||||
};
|
||||
|
||||
let group = resolve_groups(&source.pattern, path_str).await;
|
||||
let group = group
|
||||
.into_iter()
|
||||
.map(|(k, group_path)| {
|
||||
(
|
||||
k,
|
||||
Box::new(Item::File {
|
||||
source: Arc::clone(&source),
|
||||
mime: mime_guess::from_path(&group_path).first_or_octet_stream(),
|
||||
path: group_path.clone(),
|
||||
group: Arc::new(HashMap::new()),
|
||||
let key: SmartString<LazyCompact> = base.into();
|
||||
if index.contains_key(&key) {
|
||||
continue 'entry;
|
||||
}
|
||||
|
||||
let mut item_files = HashMap::new();
|
||||
for (label, template) in &source.files {
|
||||
let file_path = source.dir.join(template.replace("{base}", base));
|
||||
if file_path.exists() {
|
||||
let mime = mime_guess::from_path(&file_path).first_or_octet_stream();
|
||||
item_files.insert(
|
||||
label.clone(),
|
||||
PileValue::Binary(BinaryPileValue::File {
|
||||
mime,
|
||||
path: file_path,
|
||||
}),
|
||||
)
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let item = Item::File {
|
||||
source: Arc::clone(&source),
|
||||
mime: mime_guess::from_path(path).first_or_octet_stream(),
|
||||
path: path.into(),
|
||||
group: Arc::new(group),
|
||||
};
|
||||
|
||||
index.insert(item.key(), item);
|
||||
index.insert(
|
||||
key.clone(),
|
||||
Item::File {
|
||||
key,
|
||||
source: Arc::clone(&source),
|
||||
files: item_files,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
source.index.get_or_init(|| index);
|
||||
@@ -139,43 +135,3 @@ impl DataSource for Arc<DirDataSource> {
|
||||
path_ts_latest(&self.dir)
|
||||
}
|
||||
}
|
||||
|
||||
async fn resolve_groups(pattern: &GroupPattern, path_str: &str) -> HashMap<Label, PathBuf> {
|
||||
let state = ExtractState { ignore_mime: false };
|
||||
let mut group = HashMap::new();
|
||||
'pattern: for (l, pat) in &pattern.pattern {
|
||||
let item = PileValue::String(Arc::new(path_str.into()));
|
||||
let mut target = String::new();
|
||||
for p in pat {
|
||||
match p {
|
||||
GroupSegment::Literal(x) => target.push_str(x),
|
||||
GroupSegment::Path(op) => {
|
||||
let res = match item.query(&state, op).await {
|
||||
Ok(Some(x)) => x,
|
||||
_ => continue 'pattern,
|
||||
};
|
||||
|
||||
let res = match res.as_str() {
|
||||
Some(x) => x,
|
||||
None => continue 'pattern,
|
||||
};
|
||||
|
||||
target.push_str(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let group_path: PathBuf = match target.parse() {
|
||||
Ok(x) => x,
|
||||
Err(_) => continue 'pattern,
|
||||
};
|
||||
|
||||
if !group_path.exists() {
|
||||
continue;
|
||||
}
|
||||
|
||||
group.insert(l.clone(), group_path);
|
||||
}
|
||||
|
||||
return group;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user