Refactor sidecars

This commit is contained in:
2026-03-16 22:24:30 -07:00
parent f2f5726d7b
commit 053459f340
25 changed files with 674 additions and 530 deletions

View File

@@ -1,143 +1,176 @@
use chrono::{DateTime, Utc};
use pile_config::Label;
use std::{path::PathBuf, sync::Arc};
use tokio_stream::wrappers::ReceiverStream;
use pile_config::{
Label,
pattern::{GroupPattern, GroupSegment},
};
use smartstring::{LazyCompact, SmartString};
use std::{
collections::{HashMap, HashSet},
path::PathBuf,
sync::{Arc, OnceLock},
};
use walkdir::WalkDir;
use crate::{
extract::traits::ExtractState,
source::{DataSource, misc::path_ts_latest},
value::Item,
value::{Item, PileValue},
};
#[derive(Debug)]
pub struct DirDataSource {
pub name: Label,
pub dir: PathBuf,
pub sidecars: bool,
pub pattern: GroupPattern,
pub index: OnceLock<HashMap<SmartString<LazyCompact>, Item>>,
}
impl DirDataSource {
pub fn new(name: &Label, dir: PathBuf, sidecars: bool) -> Self {
Self {
pub async fn new(
name: &Label,
dir: PathBuf,
pattern: GroupPattern,
) -> Result<Arc<Self>, std::io::Error> {
let source = Arc::new(Self {
name: name.clone(),
dir,
sidecars,
pattern,
index: OnceLock::new(),
});
//
// MARK: list paths
//
let mut paths_items = HashSet::new();
let mut paths_grouped_items = HashSet::new();
'entry: for entry in WalkDir::new(&source.dir) {
let entry = match entry {
Err(e) => {
let msg = format!("walkdir error: {e:?}");
let err = e.into_io_error().unwrap_or(std::io::Error::other(msg));
return Err(err);
}
Ok(e) => e,
};
if entry.file_type().is_dir() {
continue;
}
let path = entry.into_path();
let path_str = match path.to_str() {
Some(x) => x,
None => continue 'entry,
};
let groups = resolve_groups(&source.pattern, path_str).await;
paths_grouped_items.extend(groups.into_values());
paths_items.insert(path);
}
//
// MARK: resolve groups
//
let mut index = HashMap::new();
'entry: for path in paths_items.difference(&paths_grouped_items) {
let path_str = match path.to_str() {
Some(x) => x,
None => continue 'entry,
};
let group = resolve_groups(&source.pattern, path_str).await;
let group = group
.into_iter()
.map(|(k, group_path)| {
(
k,
Box::new(Item::File {
source: Arc::clone(&source),
mime: mime_guess::from_path(&group_path).first_or_octet_stream(),
path: group_path.clone(),
group: Arc::new(HashMap::new()),
}),
)
})
.collect::<HashMap<_, _>>();
let item = Item::File {
source: Arc::clone(&source),
mime: mime_guess::from_path(path).first_or_octet_stream(),
path: path.into(),
group: Arc::new(group),
};
index.insert(item.key(), item);
}
source.index.get_or_init(|| index);
Ok(source)
}
}
impl DataSource for Arc<DirDataSource> {
#[expect(clippy::expect_used)]
async fn get(&self, key: &str) -> Result<Option<Item>, std::io::Error> {
let key = match key.parse::<PathBuf>() {
Ok(x) => self.dir.join(x),
Err(_) => return Ok(None),
};
if !key.is_file() {
return Ok(None);
}
// Ignore toml files if sidecars are enabled
if self.sidecars && key.extension().and_then(|x| x.to_str()) == Some("toml") {
return Ok(None);
}
return Ok(Some(Item::File {
source: Arc::clone(self),
mime: mime_guess::from_path(&key).first_or_octet_stream(),
path: key.clone(),
sidecar: self
.sidecars
.then(|| {
let sidecar_path = key.with_extension("toml");
sidecar_path.is_file().then(|| {
Box::new(Item::File {
source: Arc::clone(self),
mime: mime_guess::from_path(&sidecar_path).first_or_octet_stream(),
path: sidecar_path,
sidecar: None,
})
})
})
.flatten(),
}));
return Ok(self
.index
.get()
.expect("index should be initialized")
.get(key)
.cloned());
}
fn iter(&self) -> ReceiverStream<Result<Item, std::io::Error>> {
let (tx, rx) = tokio::sync::mpsc::channel(64);
let source = Arc::clone(self);
let dir = self.dir.clone();
tokio::task::spawn_blocking(move || {
for entry in WalkDir::new(dir) {
let entry = match entry {
Err(e) => {
let msg = format!("walkdir error: {e:?}");
let err = e.into_io_error().unwrap_or(std::io::Error::other(msg));
if tx.blocking_send(Err(err)).is_err() {
return;
}
continue;
}
Ok(e) => e,
};
if entry.file_type().is_dir() {
continue;
}
let path = entry.into_path();
let item = match path.extension().and_then(|x| x.to_str()) {
None => continue,
Some("toml") if source.sidecars => continue,
Some(_) => Item::File {
source: Arc::clone(&source),
mime: mime_guess::from_path(&path).first_or_octet_stream(),
path: path.clone(),
sidecar: source
.sidecars
.then(|| {
let sidecar_path = path.with_extension("toml");
sidecar_path.is_file().then(|| {
Box::new(Item::File {
source: Arc::clone(&source),
mime: mime_guess::from_path(&sidecar_path)
.first_or_octet_stream(),
path: sidecar_path,
sidecar: None,
})
})
})
.flatten(),
},
};
if tx.blocking_send(Ok(item)).is_err() {
return;
}
}
});
ReceiverStream::new(rx)
#[expect(clippy::expect_used)]
fn iter(&self) -> impl Iterator<Item = &Item> {
self.index
.get()
.expect("index should be initialized")
.values()
}
async fn latest_change(&self) -> Result<Option<DateTime<Utc>>, std::io::Error> {
let mut ts: Option<DateTime<Utc>> = None;
if !self.dir.exists() {
return Ok(None);
}
let new = path_ts_latest(&self.dir)?;
match (ts, new) {
(_, None) => {}
(None, Some(new)) => ts = Some(new),
(Some(old), Some(new)) => ts = Some(old.max(new)),
};
return Ok(ts);
path_ts_latest(&self.dir)
}
}
async fn resolve_groups(pattern: &GroupPattern, path_str: &str) -> HashMap<Label, PathBuf> {
let state = ExtractState { ignore_mime: false };
let mut group = HashMap::new();
'pattern: for (l, pat) in &pattern.pattern {
let item = PileValue::String(Arc::new(path_str.into()));
let mut target = String::new();
for p in pat {
match p {
GroupSegment::Literal(x) => target.push_str(x),
GroupSegment::Path(op) => {
let res = match item.query(&state, op).await {
Ok(Some(x)) => x,
_ => continue 'pattern,
};
let res = match res.as_str() {
Some(x) => x,
None => continue 'pattern,
};
target.push_str(res);
}
}
}
let group_path: PathBuf = match target.parse() {
Ok(x) => x,
Err(_) => continue 'pattern,
};
if !group_path.exists() {
continue;
}
group.insert(l.clone(), group_path);
}
return group;
}