use chrono::{DateTime, Utc}; use pile_config::{ Label, pattern::{GroupPattern, GroupSegment}, }; use smartstring::{LazyCompact, SmartString}; use std::{ collections::{HashMap, HashSet}, path::PathBuf, sync::{Arc, OnceLock}, }; use walkdir::WalkDir; use crate::{ extract::traits::ExtractState, source::{DataSource, misc::path_ts_latest}, value::{Item, PileValue}, }; #[derive(Debug)] pub struct DirDataSource { pub name: Label, pub dir: PathBuf, pub pattern: GroupPattern, pub index: OnceLock, Item>>, } impl DirDataSource { pub async fn new( name: &Label, dir: PathBuf, pattern: GroupPattern, ) -> Result, std::io::Error> { let source = Arc::new(Self { name: name.clone(), dir, pattern, index: OnceLock::new(), }); // // MARK: list paths // let mut paths_items = HashSet::new(); let mut paths_grouped_items = HashSet::new(); 'entry: for entry in WalkDir::new(&source.dir) { let entry = match entry { Err(e) => { let msg = format!("walkdir error: {e:?}"); let err = e.into_io_error().unwrap_or(std::io::Error::other(msg)); return Err(err); } Ok(e) => e, }; if entry.file_type().is_dir() { continue; } let path = entry.into_path(); let path_str = match path.to_str() { Some(x) => x, None => continue 'entry, }; let groups = resolve_groups(&source.pattern, path_str).await; paths_grouped_items.extend(groups.into_values()); paths_items.insert(path); } // // MARK: resolve groups // let mut index = HashMap::new(); 'entry: for path in paths_items.difference(&paths_grouped_items) { let path_str = match path.to_str() { Some(x) => x, None => continue 'entry, }; let group = resolve_groups(&source.pattern, path_str).await; let group = group .into_iter() .map(|(k, group_path)| { ( k, Box::new(Item::File { source: Arc::clone(&source), mime: mime_guess::from_path(&group_path).first_or_octet_stream(), path: group_path.clone(), group: Arc::new(HashMap::new()), }), ) }) .collect::>(); let item = Item::File { source: Arc::clone(&source), mime: mime_guess::from_path(path).first_or_octet_stream(), path: path.into(), group: Arc::new(group), }; index.insert(item.key(), item); } source.index.get_or_init(|| index); Ok(source) } } impl DataSource for Arc { fn len(&self) -> usize { self.index.get().expect("index should be initialized").len() } #[expect(clippy::expect_used)] async fn get(&self, key: &str) -> Result, std::io::Error> { return Ok(self .index .get() .expect("index should be initialized") .get(key) .cloned()); } #[expect(clippy::expect_used)] fn iter(&self) -> impl Iterator { self.index .get() .expect("index should be initialized") .values() } async fn latest_change(&self) -> Result>, std::io::Error> { path_ts_latest(&self.dir) } } async fn resolve_groups(pattern: &GroupPattern, path_str: &str) -> HashMap { let state = ExtractState { ignore_mime: false }; let mut group = HashMap::new(); 'pattern: for (l, pat) in &pattern.pattern { let item = PileValue::String(Arc::new(path_str.into())); let mut target = String::new(); for p in pat { match p { GroupSegment::Literal(x) => target.push_str(x), GroupSegment::Path(op) => { let res = match item.query(&state, op).await { Ok(Some(x)) => x, _ => continue 'pattern, }; let res = match res.as_str() { Some(x) => x, None => continue 'pattern, }; target.push_str(res); } } } let group_path: PathBuf = match target.parse() { Ok(x) => x, Err(_) => continue 'pattern, }; if !group_path.exists() { continue; } group.insert(l.clone(), group_path); } return group; }