Compare commits
1 Commits
8a8e0a2770
...
e7afca3010
| Author | SHA1 | Date | |
|---|---|---|---|
| e7afca3010 |
37
README.md
Normal file
37
README.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# TODO
|
||||||
|
|
||||||
|
- redo sidecars
|
||||||
|
- sidecar manipulation commands:
|
||||||
|
- fill (defaults, defined in toml)
|
||||||
|
- delete
|
||||||
|
- writing to files would be nice, but we want to be able to revert changes
|
||||||
|
- make sure to `upload` sidecars
|
||||||
|
- json extractor
|
||||||
|
|
||||||
|
- incremental index
|
||||||
|
- better errors (s3 not found)
|
||||||
|
|
||||||
|
## later
|
||||||
|
|
||||||
|
- search ui (betalupi books + handouts)
|
||||||
|
- lists
|
||||||
|
- export
|
||||||
|
- libgen search?
|
||||||
|
- remote encryption
|
||||||
|
- publish (arch, nix, crates)
|
||||||
|
|
||||||
|
- extractors
|
||||||
|
- ogg
|
||||||
|
- regex (from any string, filename, etc)
|
||||||
|
- whisper (speech-to-text)
|
||||||
|
- ocr (pdf pages, pass through fn!)
|
||||||
|
- list, fields on schemas instead of fields
|
||||||
|
- blobs as items or items as blobs?
|
||||||
|
- default args for each label (to_json, count)?
|
||||||
|
- which fields to include in json?
|
||||||
|
- nonempty (array)
|
||||||
|
|
||||||
|
- redo docs
|
||||||
|
- source types
|
||||||
|
- sidecars (rename?)
|
||||||
|
- doc all keys
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::{fmt, str::FromStr};
|
use std::{fmt, str::FromStr};
|
||||||
|
|
||||||
use serde::{
|
use serde::{
|
||||||
Deserialize, Deserializer, Serialize, Serializer,
|
Deserialize, Deserializer,
|
||||||
de::{self, Visitor},
|
de::{self, Visitor},
|
||||||
};
|
};
|
||||||
use smartstring::{LazyCompact, SmartString};
|
use smartstring::{LazyCompact, SmartString};
|
||||||
@@ -49,13 +49,6 @@ pub enum PathSegment {
|
|||||||
|
|
||||||
/// Go to an element of the current list
|
/// Go to an element of the current list
|
||||||
Index(i64),
|
Index(i64),
|
||||||
|
|
||||||
/// Go to a slice of the current list
|
|
||||||
Range {
|
|
||||||
start: i64,
|
|
||||||
end: i64,
|
|
||||||
inclusive: bool,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A path to aPathSegment::Field inside a nested object,
|
/// A path to aPathSegment::Field inside a nested object,
|
||||||
@@ -70,39 +63,6 @@ pub struct ObjectPath {
|
|||||||
pub segments: Vec<PathSegment>,
|
pub segments: Vec<PathSegment>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for ObjectPath {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
for seg in &self.segments {
|
|
||||||
match seg {
|
|
||||||
PathSegment::Root => write!(f, "$")?,
|
|
||||||
PathSegment::Field { name, args: None } => write!(f, ".{name}")?,
|
|
||||||
PathSegment::Field {
|
|
||||||
name,
|
|
||||||
args: Some(a),
|
|
||||||
} => write!(f, ".{name}({a})")?,
|
|
||||||
PathSegment::Index(i) => write!(f, "[{i}]")?,
|
|
||||||
PathSegment::Range {
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
inclusive: false,
|
|
||||||
} => write!(f, "[{start}..{end}]")?,
|
|
||||||
PathSegment::Range {
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
inclusive: true,
|
|
||||||
} => write!(f, "[{start}..={end}]")?,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Serialize for ObjectPath {
|
|
||||||
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
|
||||||
serializer.serialize_str(&self.to_string())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'de> Deserialize<'de> for ObjectPath {
|
impl<'de> Deserialize<'de> for ObjectPath {
|
||||||
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
|
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
|
||||||
struct PathVisitor;
|
struct PathVisitor;
|
||||||
|
|||||||
@@ -87,15 +87,6 @@ enum State {
|
|||||||
/// We are indexing an array, waiting for a number
|
/// We are indexing an array, waiting for a number
|
||||||
Index,
|
Index,
|
||||||
|
|
||||||
/// We parsed the start index, waiting for `]` or the first `.` of `..`
|
|
||||||
IndexAfterStart(i64),
|
|
||||||
|
|
||||||
/// We saw one `.` after the start index, waiting for the second `.`
|
|
||||||
IndexRangeDot1(i64),
|
|
||||||
|
|
||||||
/// We saw `..`, waiting for the end index (optionally prefixed with `=`)
|
|
||||||
IndexRangeDot2(i64),
|
|
||||||
|
|
||||||
/// We are indexing an array, waiting for a close-bracket
|
/// We are indexing an array, waiting for a close-bracket
|
||||||
IndexClose,
|
IndexClose,
|
||||||
}
|
}
|
||||||
@@ -173,7 +164,8 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
self.state = State::IndexAfterStart(idx);
|
self.segments.push(PathSegment::Index(idx));
|
||||||
|
self.state = State::IndexClose;
|
||||||
}
|
}
|
||||||
|
|
||||||
(State::Index, (p, Token::Root))
|
(State::Index, (p, Token::Root))
|
||||||
@@ -183,49 +175,6 @@ impl Parser {
|
|||||||
return Err(PathParseError::Syntax { position: *p });
|
return Err(PathParseError::Syntax { position: *p });
|
||||||
}
|
}
|
||||||
|
|
||||||
(State::IndexAfterStart(idx), (_, Token::SqbClose)) => {
|
|
||||||
self.segments.push(PathSegment::Index(idx));
|
|
||||||
self.state = State::Selected;
|
|
||||||
}
|
|
||||||
(State::IndexAfterStart(idx), (_, Token::Dot)) => {
|
|
||||||
self.state = State::IndexRangeDot1(idx);
|
|
||||||
}
|
|
||||||
(State::IndexAfterStart(_), (p, _)) => {
|
|
||||||
return Err(PathParseError::Syntax { position: *p });
|
|
||||||
}
|
|
||||||
|
|
||||||
(State::IndexRangeDot1(idx), (_, Token::Dot)) => {
|
|
||||||
self.state = State::IndexRangeDot2(idx);
|
|
||||||
}
|
|
||||||
(State::IndexRangeDot1(_), (p, _)) => {
|
|
||||||
return Err(PathParseError::Syntax { position: *p });
|
|
||||||
}
|
|
||||||
|
|
||||||
(State::IndexRangeDot2(start), (p, Token::Ident(ident))) => {
|
|
||||||
let (end_str, inclusive) = if let Some(stripped) = ident.strip_prefix('=') {
|
|
||||||
(stripped, true)
|
|
||||||
} else {
|
|
||||||
(*ident, false)
|
|
||||||
};
|
|
||||||
|
|
||||||
let end: i64 = i64::from_str(end_str).map_err(|_err| {
|
|
||||||
PathParseError::InvalidIndexString {
|
|
||||||
position: *p,
|
|
||||||
str: (*ident).into(),
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
|
|
||||||
self.segments.push(PathSegment::Range {
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
inclusive,
|
|
||||||
});
|
|
||||||
self.state = State::IndexClose;
|
|
||||||
}
|
|
||||||
(State::IndexRangeDot2(_), (p, _)) => {
|
|
||||||
return Err(PathParseError::Syntax { position: *p });
|
|
||||||
}
|
|
||||||
|
|
||||||
(State::IndexClose, (_, Token::SqbClose)) => self.state = State::Selected,
|
(State::IndexClose, (_, Token::SqbClose)) => self.state = State::Selected,
|
||||||
(State::IndexClose, (p, _)) => {
|
(State::IndexClose, (p, _)) => {
|
||||||
return Err(PathParseError::Syntax { position: *p });
|
return Err(PathParseError::Syntax { position: *p });
|
||||||
@@ -238,9 +187,6 @@ impl Parser {
|
|||||||
State::Start => Err(PathParseError::Syntax { position: 0 }),
|
State::Start => Err(PathParseError::Syntax { position: 0 }),
|
||||||
State::Dot => Err(PathParseError::Syntax { position }),
|
State::Dot => Err(PathParseError::Syntax { position }),
|
||||||
State::Index => Err(PathParseError::Syntax { position }),
|
State::Index => Err(PathParseError::Syntax { position }),
|
||||||
State::IndexAfterStart(_) => Err(PathParseError::Syntax { position }),
|
|
||||||
State::IndexRangeDot1(_) => Err(PathParseError::Syntax { position }),
|
|
||||||
State::IndexRangeDot2(_) => Err(PathParseError::Syntax { position }),
|
|
||||||
State::IndexClose => Err(PathParseError::Syntax { position }),
|
State::IndexClose => Err(PathParseError::Syntax { position }),
|
||||||
State::Selected => Ok(()),
|
State::Selected => Ok(()),
|
||||||
}?;
|
}?;
|
||||||
@@ -441,46 +387,4 @@ mod tests {
|
|||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: range
|
|
||||||
|
|
||||||
fn range(start: i64, end: i64, inclusive: bool) -> PathSegment {
|
|
||||||
PathSegment::Range {
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
inclusive,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn exclusive_range() {
|
|
||||||
parse_test(
|
|
||||||
"$.a[0..5]",
|
|
||||||
Ok(&[PathSegment::Root, field("a"), range(0, 5, false)]),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn inclusive_range() {
|
|
||||||
parse_test(
|
|
||||||
"$.a[1..=2]",
|
|
||||||
Ok(&[PathSegment::Root, field("a"), range(1, 2, true)]),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn range_with_negative_end() {
|
|
||||||
parse_test(
|
|
||||||
"$.a[0..-1]",
|
|
||||||
Ok(&[PathSegment::Root, field("a"), range(0, -1, false)]),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn range_with_negative_start() {
|
|
||||||
parse_test(
|
|
||||||
"$.a[-3..-1]",
|
|
||||||
Ok(&[PathSegment::Root, field("a"), range(-3, -1, false)]),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,52 +21,7 @@ impl Tokenizer {
|
|||||||
let mut tokens = Vec::new();
|
let mut tokens = Vec::new();
|
||||||
|
|
||||||
let mut window_start = None;
|
let mut window_start = None;
|
||||||
// Paren depth: while > 0, `.` / `[` / `]` / `$` are part of the ident.
|
|
||||||
let mut paren_depth: usize = 0;
|
|
||||||
// When true, the current char is escaped by a preceding `\` and is
|
|
||||||
// treated as a plain ident character with no special meaning.
|
|
||||||
let mut skip_next = false;
|
|
||||||
|
|
||||||
for (i, c) in source.char_indices() {
|
for (i, c) in source.char_indices() {
|
||||||
if skip_next {
|
|
||||||
skip_next = false;
|
|
||||||
// Escaped char: just extend the ident window (already opened by `\`).
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if c == '\\' {
|
|
||||||
if window_start.is_none() {
|
|
||||||
window_start = Some(i);
|
|
||||||
}
|
|
||||||
skip_next = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if paren_depth > 0 {
|
|
||||||
// Inside parens: only track depth changes, everything else is ident.
|
|
||||||
match c {
|
|
||||||
'(' => {
|
|
||||||
if window_start.is_none() {
|
|
||||||
window_start = Some(i);
|
|
||||||
}
|
|
||||||
paren_depth += 1;
|
|
||||||
}
|
|
||||||
')' => {
|
|
||||||
if window_start.is_none() {
|
|
||||||
window_start = Some(i);
|
|
||||||
}
|
|
||||||
paren_depth -= 1;
|
|
||||||
}
|
|
||||||
x if x.is_ascii() => {
|
|
||||||
if window_start.is_none() {
|
|
||||||
window_start = Some(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
char => return Err(PathParseError::NonAsciiChar { position: i, char }),
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
match c {
|
match c {
|
||||||
'$' => {
|
'$' => {
|
||||||
if let Some(s) = window_start.take() {
|
if let Some(s) = window_start.take() {
|
||||||
@@ -96,26 +51,10 @@ impl Tokenizer {
|
|||||||
tokens.push((i, Token::SqbClose));
|
tokens.push((i, Token::SqbClose));
|
||||||
}
|
}
|
||||||
|
|
||||||
'(' => {
|
x if x.is_ascii() => match window_start {
|
||||||
if window_start.is_none() {
|
None => window_start = Some(i),
|
||||||
window_start = Some(i);
|
Some(_) => continue,
|
||||||
}
|
},
|
||||||
paren_depth += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
')' => {
|
|
||||||
if window_start.is_none() {
|
|
||||||
window_start = Some(i);
|
|
||||||
}
|
|
||||||
// paren_depth is 0 here — stray `)` is an ident char and
|
|
||||||
// parse_field will surface the error later.
|
|
||||||
}
|
|
||||||
|
|
||||||
x if x.is_ascii() => {
|
|
||||||
if window_start.is_none() {
|
|
||||||
window_start = Some(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
char => return Err(PathParseError::NonAsciiChar { position: i, char }),
|
char => return Err(PathParseError::NonAsciiChar { position: i, char }),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -122,14 +122,6 @@ mod tests {
|
|||||||
GroupSegment::Literal(s.into())
|
GroupSegment::Literal(s.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn regex() {
|
|
||||||
assert_eq!(
|
|
||||||
parse("{$.split(/)[-1].regex((.*).pub \\((.*)\\).pdf)[0]}").unwrap(),
|
|
||||||
vec![(0, path("$.split(/)[-1].regex((.*).pub \\((.*)\\).pdf)[0]"))]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn single_path() {
|
fn single_path() {
|
||||||
assert_eq!(parse("{$.foo}").unwrap(), vec![(0, path("$.foo"))]);
|
assert_eq!(parse("{$.foo}").unwrap(), vec![(0, path("$.foo"))]);
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use pile_config::{
|
use pile_config::{ConfigToml, Label, Source, objectpath::ObjectPath};
|
||||||
ConfigToml, DatasetConfig, Label, Source, objectpath::ObjectPath, pattern::GroupPattern,
|
|
||||||
};
|
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
use pile_value::{
|
use pile_value::{
|
||||||
extract::traits::ExtractState,
|
extract::traits::ExtractState,
|
||||||
@@ -68,109 +66,15 @@ impl Dataset {
|
|||||||
|
|
||||||
/// An opened dataset: config, working directory, and all opened sources.
|
/// An opened dataset: config, working directory, and all opened sources.
|
||||||
pub struct Datasets {
|
pub struct Datasets {
|
||||||
pub path_config: Option<PathBuf>,
|
pub path_config: PathBuf,
|
||||||
pub path_parent: PathBuf,
|
pub path_parent: PathBuf,
|
||||||
pub path_workdir: Option<PathBuf>,
|
pub path_workdir: PathBuf,
|
||||||
|
|
||||||
pub config: ConfigToml,
|
pub config: ConfigToml,
|
||||||
pub sources: HashMap<Label, Dataset>,
|
pub sources: HashMap<Label, Dataset>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Datasets {
|
impl Datasets {
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
pub fn virt_source() -> Label {
|
|
||||||
Label::new("virtual-source").unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
pub async fn virt(parent: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
|
|
||||||
let path_parent = parent.into();
|
|
||||||
|
|
||||||
let config = ConfigToml {
|
|
||||||
dataset: DatasetConfig {
|
|
||||||
name: Label::new("virtual-dataset").unwrap(),
|
|
||||||
working_dir: None,
|
|
||||||
|
|
||||||
source: [(
|
|
||||||
Self::virt_source(),
|
|
||||||
Source::Filesystem {
|
|
||||||
enabled: true,
|
|
||||||
path: path_parent.clone(),
|
|
||||||
pattern: GroupPattern::default(),
|
|
||||||
},
|
|
||||||
)]
|
|
||||||
.into_iter()
|
|
||||||
.collect(),
|
|
||||||
},
|
|
||||||
schema: HashMap::new(),
|
|
||||||
fts: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut sources = HashMap::new();
|
|
||||||
for (label, source) in &config.dataset.source {
|
|
||||||
match source {
|
|
||||||
Source::Filesystem {
|
|
||||||
enabled,
|
|
||||||
path,
|
|
||||||
pattern,
|
|
||||||
} => {
|
|
||||||
if !enabled {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
sources.insert(
|
|
||||||
label.clone(),
|
|
||||||
Dataset::Dir(
|
|
||||||
DirDataSource::new(label, path_parent.join(path), pattern.clone())
|
|
||||||
.await?,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
Source::S3 {
|
|
||||||
enabled,
|
|
||||||
bucket,
|
|
||||||
prefix,
|
|
||||||
endpoint,
|
|
||||||
region,
|
|
||||||
credentials,
|
|
||||||
pattern,
|
|
||||||
} => {
|
|
||||||
if !enabled {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
match S3DataSource::new(
|
|
||||||
label,
|
|
||||||
bucket.clone(),
|
|
||||||
prefix.clone(),
|
|
||||||
endpoint.clone(),
|
|
||||||
region.clone(),
|
|
||||||
credentials,
|
|
||||||
pattern.clone(),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
Ok(ds) => {
|
|
||||||
sources.insert(label.clone(), Dataset::S3(ds));
|
|
||||||
}
|
|
||||||
Err(err) => {
|
|
||||||
warn!("Could not open S3 source {label}: {err}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Ok(Self {
|
|
||||||
path_config: None,
|
|
||||||
path_workdir: None,
|
|
||||||
path_parent,
|
|
||||||
config,
|
|
||||||
sources,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn open(config: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
|
pub async fn open(config: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
|
||||||
let path_config = config.into();
|
let path_config = config.into();
|
||||||
let path_parent = path_config
|
let path_parent = path_config
|
||||||
@@ -264,9 +168,9 @@ impl Datasets {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return Ok(Self {
|
return Ok(Self {
|
||||||
path_config: Some(path_config),
|
path_config,
|
||||||
path_workdir: Some(path_workdir),
|
|
||||||
path_parent,
|
path_parent,
|
||||||
|
path_workdir,
|
||||||
config,
|
config,
|
||||||
sources,
|
sources,
|
||||||
});
|
});
|
||||||
@@ -312,16 +216,8 @@ impl Datasets {
|
|||||||
_threads: usize,
|
_threads: usize,
|
||||||
flag: Option<CancelFlag>,
|
flag: Option<CancelFlag>,
|
||||||
) -> Result<(), CancelableTaskError<DatasetError>> {
|
) -> Result<(), CancelableTaskError<DatasetError>> {
|
||||||
let workdir = match self.path_workdir.as_ref() {
|
let fts_tmp_dir = self.path_workdir.join(".tmp-fts");
|
||||||
Some(x) => x,
|
let fts_dir = self.path_workdir.join("fts");
|
||||||
None => {
|
|
||||||
warn!("Skipping fts_refresh, no workdir");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let fts_tmp_dir = workdir.join(".tmp-fts");
|
|
||||||
let fts_dir = workdir.join("fts");
|
|
||||||
|
|
||||||
if fts_tmp_dir.is_dir() {
|
if fts_tmp_dir.is_dir() {
|
||||||
warn!("Removing temporary index in {}", fts_dir.display());
|
warn!("Removing temporary index in {}", fts_dir.display());
|
||||||
@@ -419,15 +315,7 @@ impl Datasets {
|
|||||||
query: &str,
|
query: &str,
|
||||||
top_n: usize,
|
top_n: usize,
|
||||||
) -> Result<Vec<FtsLookupResult>, DatasetError> {
|
) -> Result<Vec<FtsLookupResult>, DatasetError> {
|
||||||
let workdir = match self.path_workdir.as_ref() {
|
let fts_dir = self.path_workdir.join("fts");
|
||||||
Some(x) => x,
|
|
||||||
None => {
|
|
||||||
warn!("Skipping fts_lookup, no workdir");
|
|
||||||
return Ok(Vec::new());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let fts_dir = workdir.join("fts");
|
|
||||||
|
|
||||||
if !fts_dir.exists() {
|
if !fts_dir.exists() {
|
||||||
return Err(DatasetError::NoFtsIndex);
|
return Err(DatasetError::NoFtsIndex);
|
||||||
@@ -447,12 +335,7 @@ impl Datasets {
|
|||||||
|
|
||||||
/// Time at which fts was created
|
/// Time at which fts was created
|
||||||
pub fn ts_fts(&self) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
pub fn ts_fts(&self) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
||||||
let workdir = match self.path_workdir.as_ref() {
|
let fts_dir = self.path_workdir.join("fts");
|
||||||
Some(x) => x,
|
|
||||||
None => return Ok(None),
|
|
||||||
};
|
|
||||||
|
|
||||||
let fts_dir = workdir.join("fts");
|
|
||||||
|
|
||||||
if !fts_dir.exists() {
|
if !fts_dir.exists() {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
|
|||||||
@@ -112,8 +112,8 @@ impl DbFtsIndex {
|
|||||||
// Try paths in order, using the first value we find
|
// Try paths in order, using the first value we find
|
||||||
for path in field.path.as_slice() {
|
for path in field.path.as_slice() {
|
||||||
let val = match extractor.query(state, path).await? {
|
let val = match extractor.query(state, path).await? {
|
||||||
Some(PileValue::Null) | None => continue,
|
|
||||||
Some(x) => x,
|
Some(x) => x,
|
||||||
|
None => continue,
|
||||||
};
|
};
|
||||||
|
|
||||||
let val = val_to_string(state, &val, path, field_name).await?;
|
let val = val_to_string(state, &val, path, field_name).await?;
|
||||||
|
|||||||
@@ -54,33 +54,4 @@ impl ObjectExtractor for EpubExtractor {
|
|||||||
Label::new("meta").unwrap(),
|
Label::new("meta").unwrap(),
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
|
|
||||||
let keys = self.fields().await?;
|
|
||||||
let mut map = serde_json::Map::new();
|
|
||||||
for k in &keys {
|
|
||||||
let v = match self.field(state, k, None).await? {
|
|
||||||
Some(x) => x,
|
|
||||||
None => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
if k.as_str() == "text" {
|
|
||||||
map.insert(
|
|
||||||
k.to_string(),
|
|
||||||
serde_json::Value::String(format!(
|
|
||||||
"<String ({} bytes)",
|
|
||||||
match v {
|
|
||||||
PileValue::String(x) => x.len(),
|
|
||||||
_ => 0,
|
|
||||||
}
|
|
||||||
)),
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
map.insert(k.to_string(), Box::pin(v.to_json(state)).await?);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(serde_json::Value::Object(map))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use crate::{
|
|||||||
use pile_config::Label;
|
use pile_config::Label;
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
path::{Component, PathBuf},
|
path::Component,
|
||||||
sync::{Arc, OnceLock},
|
sync::{Arc, OnceLock},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -27,20 +27,22 @@ impl FsExtractor {
|
|||||||
return Ok(x);
|
return Ok(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
let path = PathBuf::from(self.item.key().as_str());
|
let Item::File { path, .. } = &self.item else {
|
||||||
|
return Ok(self.output.get_or_init(HashMap::new));
|
||||||
|
};
|
||||||
|
|
||||||
let mut root = false;
|
let mut root = false;
|
||||||
let components = path
|
let components = path
|
||||||
.components()
|
.components()
|
||||||
.filter_map(|x| match x {
|
.map(|x| match x {
|
||||||
Component::CurDir => None,
|
Component::CurDir => None,
|
||||||
Component::Normal(x) => Some(x.to_str().map(|x| x.to_owned())),
|
Component::Normal(x) => x.to_str().map(|x| x.to_owned()),
|
||||||
Component::ParentDir => Some(Some("..".to_owned())),
|
Component::ParentDir => Some("..".to_owned()),
|
||||||
Component::RootDir => {
|
Component::RootDir => {
|
||||||
root = true;
|
root = true;
|
||||||
Some(None)
|
None
|
||||||
}
|
}
|
||||||
Component::Prefix(x) => Some(x.as_os_str().to_str().map(|x| x.to_owned())),
|
Component::Prefix(x) => x.as_os_str().to_str().map(|x| x.to_owned()),
|
||||||
})
|
})
|
||||||
.collect::<Option<Vec<_>>>();
|
.collect::<Option<Vec<_>>>();
|
||||||
|
|
||||||
@@ -67,7 +69,6 @@ impl FsExtractor {
|
|||||||
(
|
(
|
||||||
Label::new("segments").unwrap(),
|
Label::new("segments").unwrap(),
|
||||||
components
|
components
|
||||||
.clone()
|
|
||||||
.map(|x| {
|
.map(|x| {
|
||||||
PileValue::Array(Arc::new(
|
PileValue::Array(Arc::new(
|
||||||
x.iter()
|
x.iter()
|
||||||
@@ -77,12 +78,6 @@ impl FsExtractor {
|
|||||||
})
|
})
|
||||||
.unwrap_or(PileValue::Null),
|
.unwrap_or(PileValue::Null),
|
||||||
),
|
),
|
||||||
(
|
|
||||||
Label::new("name").unwrap(),
|
|
||||||
components
|
|
||||||
.and_then(|x| x.last().map(|x| PileValue::String(Arc::new(x.into()))))
|
|
||||||
.unwrap_or(PileValue::Null),
|
|
||||||
),
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return Ok(self.output.get_or_init(|| output));
|
return Ok(self.output.get_or_init(|| output));
|
||||||
|
|||||||
@@ -68,33 +68,4 @@ impl ObjectExtractor for PdfExtractor {
|
|||||||
Label::new("pages").unwrap(),
|
Label::new("pages").unwrap(),
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn to_json(&self, state: &ExtractState) -> Result<serde_json::Value, std::io::Error> {
|
|
||||||
let keys = self.fields().await?;
|
|
||||||
let mut map = serde_json::Map::new();
|
|
||||||
for k in &keys {
|
|
||||||
let v = match self.field(state, k, None).await? {
|
|
||||||
Some(x) => x,
|
|
||||||
None => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
if k.as_str() == "text" {
|
|
||||||
map.insert(
|
|
||||||
k.to_string(),
|
|
||||||
serde_json::Value::String(format!(
|
|
||||||
"<String ({} bytes)",
|
|
||||||
match v {
|
|
||||||
PileValue::String(x) => x.len(),
|
|
||||||
_ => 0,
|
|
||||||
}
|
|
||||||
)),
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
map.insert(k.to_string(), Box::pin(v.to_json(state)).await?);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(serde_json::Value::Object(map))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -77,12 +77,15 @@ impl ObjectExtractor for RegexExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn as_list(&self) -> Option<Arc<dyn ListExtractor>> {
|
fn as_list(&self) -> Option<Arc<dyn ListExtractor>> {
|
||||||
Some(Arc::new(RegexExtractor(self.0.clone())))
|
Some(Arc::new(RegexListExtractor(self.0.clone())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Exposes unnamed (positional) capture groups as a list (group 0 excluded).
|
||||||
|
pub struct RegexListExtractor(Arc<RegexData>);
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl ListExtractor for RegexExtractor {
|
impl ListExtractor for RegexListExtractor {
|
||||||
async fn get(
|
async fn get(
|
||||||
&self,
|
&self,
|
||||||
_state: &ExtractState,
|
_state: &ExtractState,
|
||||||
|
|||||||
@@ -75,12 +75,10 @@ impl ObjectExtractor for StringExtractor {
|
|||||||
let Ok(re) = Regex::new(pattern) else {
|
let Ok(re) = Regex::new(pattern) else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
Some(
|
Some(match RegexExtractor::new(Arc::new(re), self.item.as_str()) {
|
||||||
match RegexExtractor::new(Arc::new(re), self.item.as_str()) {
|
|
||||||
Some(ext) => PileValue::ObjectExtractor(Arc::new(ext)),
|
Some(ext) => PileValue::ObjectExtractor(Arc::new(ext)),
|
||||||
None => PileValue::Null,
|
None => PileValue::Null,
|
||||||
},
|
})
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => None,
|
_ => None,
|
||||||
|
|||||||
@@ -40,22 +40,11 @@ impl Item {
|
|||||||
Self::File { path, .. } => ItemReader::File(File::open(path)?),
|
Self::File { path, .. } => ItemReader::File(File::open(path)?),
|
||||||
|
|
||||||
Self::S3 { source, key, .. } => {
|
Self::S3 { source, key, .. } => {
|
||||||
let full_key: SmartString<LazyCompact> = match &source.prefix {
|
|
||||||
None => key.clone(),
|
|
||||||
Some(p) => {
|
|
||||||
if p.ends_with('/') {
|
|
||||||
format!("{p}{key}").into()
|
|
||||||
} else {
|
|
||||||
format!("{p}/{key}").into()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let head = source
|
let head = source
|
||||||
.client
|
.client
|
||||||
.head_object()
|
.head_object()
|
||||||
.bucket(source.bucket.as_str())
|
.bucket(source.bucket.as_str())
|
||||||
.key(full_key.as_str())
|
.key(key.as_str())
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.map_err(std::io::Error::other)?;
|
.map_err(std::io::Error::other)?;
|
||||||
@@ -65,7 +54,7 @@ impl Item {
|
|||||||
ItemReader::S3(S3Reader {
|
ItemReader::S3(S3Reader {
|
||||||
client: source.client.clone(),
|
client: source.client.clone(),
|
||||||
bucket: source.bucket.clone(),
|
bucket: source.bucket.clone(),
|
||||||
key: full_key,
|
key: key.to_owned(),
|
||||||
cursor: 0,
|
cursor: 0,
|
||||||
size,
|
size,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -86,9 +86,9 @@ impl PileValue {
|
|||||||
Self::String(_) => Arc::new(VecExtractor::default()),
|
Self::String(_) => Arc::new(VecExtractor::default()),
|
||||||
Self::Blob { .. } => Arc::new(VecExtractor::default()),
|
Self::Blob { .. } => Arc::new(VecExtractor::default()),
|
||||||
Self::ListExtractor(e) => e.clone(),
|
Self::ListExtractor(e) => e.clone(),
|
||||||
Self::ObjectExtractor(e) => e
|
Self::ObjectExtractor(e) => {
|
||||||
.as_list()
|
e.as_list().unwrap_or_else(|| Arc::new(VecExtractor::default()))
|
||||||
.unwrap_or_else(|| Arc::new(VecExtractor::default())),
|
}
|
||||||
Self::Item(_) => Arc::new(VecExtractor::default()),
|
Self::Item(_) => Arc::new(VecExtractor::default()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -140,40 +140,6 @@ impl PileValue {
|
|||||||
|
|
||||||
out = e.get(state, idx).await?;
|
out = e.get(state, idx).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
PathSegment::Range {
|
|
||||||
start,
|
|
||||||
end,
|
|
||||||
inclusive,
|
|
||||||
} => {
|
|
||||||
let e = match out.map(|x| x.list_extractor()) {
|
|
||||||
Some(e) => e,
|
|
||||||
None => {
|
|
||||||
out = None;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let len = e.len(state).await? as i64;
|
|
||||||
|
|
||||||
let start_idx = if *start >= 0 { *start } else { len + start };
|
|
||||||
let end_idx = if *end >= 0 { *end } else { len + end };
|
|
||||||
let end_idx = if *inclusive { end_idx + 1 } else { end_idx };
|
|
||||||
|
|
||||||
let start_idx = start_idx.max(0) as usize;
|
|
||||||
let end_idx = (end_idx.max(0) as usize).min(len as usize);
|
|
||||||
|
|
||||||
let mut items = Vec::new();
|
|
||||||
for i in start_idx..end_idx {
|
|
||||||
match e.get(state, i).await? {
|
|
||||||
Some(v) => items.push(v),
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: lazy view?
|
|
||||||
out = Some(PileValue::Array(Arc::new(items)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,113 +0,0 @@
|
|||||||
use anyhow::{Context, Result};
|
|
||||||
use clap::Args;
|
|
||||||
use pile_config::{Label, objectpath::ObjectPath};
|
|
||||||
use pile_dataset::Datasets;
|
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
|
||||||
use pile_value::{extract::traits::ExtractState, value::PileValue};
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
use crate::{CliCmd, GlobalContext};
|
|
||||||
|
|
||||||
#[derive(Debug, Args)]
|
|
||||||
pub struct ItemCommand {
|
|
||||||
/// Source name (as defined in pile.toml)
|
|
||||||
source: String,
|
|
||||||
|
|
||||||
/// Item key within the source
|
|
||||||
key: String,
|
|
||||||
|
|
||||||
/// If present, extract a specific field
|
|
||||||
#[arg(long, short = 'p')]
|
|
||||||
path: Option<String>,
|
|
||||||
|
|
||||||
/// If present, print the schema fields instead of item data
|
|
||||||
#[arg(long)]
|
|
||||||
fields: bool,
|
|
||||||
|
|
||||||
#[arg(long, short = 'x')]
|
|
||||||
exclude: Vec<String>,
|
|
||||||
|
|
||||||
/// Path to dataset config
|
|
||||||
#[arg(long, short = 'c', default_value = "./pile.toml")]
|
|
||||||
config: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CliCmd for ItemCommand {
|
|
||||||
#[expect(clippy::print_stdout)]
|
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
async fn run(
|
|
||||||
self,
|
|
||||||
_ctx: GlobalContext,
|
|
||||||
_flag: CancelFlag,
|
|
||||||
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
|
|
||||||
let source = Label::new(&self.source)
|
|
||||||
.ok_or_else(|| anyhow::anyhow!("invalid source name {:?}", self.source))?;
|
|
||||||
|
|
||||||
let ds = Datasets::open(&self.config)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
|
|
||||||
|
|
||||||
let state = ExtractState { ignore_mime: false };
|
|
||||||
|
|
||||||
let item = ds.get(&source, &self.key).await.ok_or_else(|| {
|
|
||||||
anyhow::anyhow!("{:?} not found in source {:?}", self.key, self.source)
|
|
||||||
})?;
|
|
||||||
let pv = PileValue::Item(item);
|
|
||||||
|
|
||||||
if self.fields {
|
|
||||||
let mut map = serde_json::Map::new();
|
|
||||||
for (name, spec) in &ds.config.schema {
|
|
||||||
if self.exclude.contains(&name.to_string()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut value = None;
|
|
||||||
for path in &spec.path {
|
|
||||||
let v = pv
|
|
||||||
.query(&state, path)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("while extracting field {name}"))?;
|
|
||||||
if let Some(v) = v
|
|
||||||
&& !matches!(v, PileValue::Null)
|
|
||||||
{
|
|
||||||
let j = v
|
|
||||||
.to_json(&state)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("while extracting field {name}"))?;
|
|
||||||
value = Some(j);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
map.insert(name.to_string(), value.unwrap_or(serde_json::Value::Null));
|
|
||||||
}
|
|
||||||
let json = serde_json::to_string_pretty(&serde_json::Value::Object(map)).unwrap();
|
|
||||||
println!("{json}");
|
|
||||||
return Ok(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
let json = if let Some(path_str) = self.path {
|
|
||||||
let path: ObjectPath = path_str
|
|
||||||
.parse()
|
|
||||||
.with_context(|| format!("invalid path {path_str:?}"))?;
|
|
||||||
|
|
||||||
let v = pv
|
|
||||||
.query(&state, &path)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("while extracting {}", self.key))?
|
|
||||||
.ok_or_else(|| {
|
|
||||||
anyhow::anyhow!("{:?} not found in source {:?}", self.key, self.source)
|
|
||||||
})?;
|
|
||||||
v.to_json(&state)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("while extracting {}", self.key))?
|
|
||||||
} else {
|
|
||||||
pv.to_json(&state)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("while extracting {}", self.key))?
|
|
||||||
};
|
|
||||||
|
|
||||||
let json = serde_json::to_string_pretty(&json).unwrap();
|
|
||||||
println!("{json}");
|
|
||||||
return Ok(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -8,7 +8,6 @@ mod check;
|
|||||||
mod fields;
|
mod fields;
|
||||||
mod index;
|
mod index;
|
||||||
mod init;
|
mod init;
|
||||||
mod item;
|
|
||||||
mod list;
|
mod list;
|
||||||
mod lookup;
|
mod lookup;
|
||||||
mod probe;
|
mod probe;
|
||||||
@@ -60,18 +59,12 @@ pub enum SubCommand {
|
|||||||
cmd: fields::FieldsCommand,
|
cmd: fields::FieldsCommand,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Print all metadata from a file
|
/// Print all metadata from an item
|
||||||
Probe {
|
Probe {
|
||||||
#[command(flatten)]
|
#[command(flatten)]
|
||||||
cmd: probe::ProbeCommand,
|
cmd: probe::ProbeCommand,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Print all metadata from an item
|
|
||||||
Item {
|
|
||||||
#[command(flatten)]
|
|
||||||
cmd: item::ItemCommand,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Expose a dataset via an http api
|
/// Expose a dataset via an http api
|
||||||
Serve {
|
Serve {
|
||||||
#[command(flatten)]
|
#[command(flatten)]
|
||||||
@@ -95,7 +88,6 @@ impl CliCmdDispatch for SubCommand {
|
|||||||
Self::Lookup { cmd } => cmd.start(ctx),
|
Self::Lookup { cmd } => cmd.start(ctx),
|
||||||
Self::Fields { cmd } => cmd.start(ctx),
|
Self::Fields { cmd } => cmd.start(ctx),
|
||||||
Self::Probe { cmd } => cmd.start(ctx),
|
Self::Probe { cmd } => cmd.start(ctx),
|
||||||
Self::Item { cmd } => cmd.start(ctx),
|
|
||||||
Self::Serve { cmd } => cmd.start(ctx),
|
Self::Serve { cmd } => cmd.start(ctx),
|
||||||
Self::Upload { cmd } => cmd.start(ctx),
|
Self::Upload { cmd } => cmd.start(ctx),
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use pile_config::objectpath::ObjectPath;
|
use pile_config::{Label, objectpath::ObjectPath};
|
||||||
use pile_dataset::Datasets;
|
use pile_dataset::Datasets;
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
use pile_value::{extract::traits::ExtractState, value::PileValue};
|
use pile_value::{extract::traits::ExtractState, value::PileValue};
|
||||||
@@ -10,12 +10,19 @@ use crate::{CliCmd, GlobalContext};
|
|||||||
|
|
||||||
#[derive(Debug, Args)]
|
#[derive(Debug, Args)]
|
||||||
pub struct ProbeCommand {
|
pub struct ProbeCommand {
|
||||||
/// The file to probe
|
/// Source name (as defined in pile.toml)
|
||||||
file: PathBuf,
|
source: String,
|
||||||
|
|
||||||
|
/// Item key within the source
|
||||||
|
key: String,
|
||||||
|
|
||||||
/// If present, extract a specific field
|
/// If present, extract a specific field
|
||||||
#[arg(long, short = 'p')]
|
#[arg(long, short = 'p')]
|
||||||
path: Option<String>,
|
path: Option<String>,
|
||||||
|
|
||||||
|
/// Path to dataset config
|
||||||
|
#[arg(long, short = 'c', default_value = "./pile.toml")]
|
||||||
|
config: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CliCmd for ProbeCommand {
|
impl CliCmd for ProbeCommand {
|
||||||
@@ -26,37 +33,35 @@ impl CliCmd for ProbeCommand {
|
|||||||
_ctx: GlobalContext,
|
_ctx: GlobalContext,
|
||||||
_flag: CancelFlag,
|
_flag: CancelFlag,
|
||||||
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
|
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
|
||||||
let ds = Datasets::virt(".")
|
let source = Label::new(&self.source)
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("invalid source name {:?}", self.source))?;
|
||||||
|
|
||||||
|
let ds = Datasets::open(&self.config)
|
||||||
.await
|
.await
|
||||||
.with_context(|| "while opening virtual dataset".to_owned())?;
|
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
|
||||||
|
|
||||||
let state = ExtractState { ignore_mime: false };
|
let state = ExtractState { ignore_mime: false };
|
||||||
let key = self.file.to_str().context("path is not utf-8")?;
|
|
||||||
|
|
||||||
let json = if let Some(path_str) = self.path {
|
let json = if let Some(path_str) = self.path {
|
||||||
let path: ObjectPath = path_str
|
let path: ObjectPath = path_str
|
||||||
.parse()
|
.parse()
|
||||||
.with_context(|| format!("invalid path {path_str:?}"))?;
|
.with_context(|| format!("invalid path {path_str:?}"))?;
|
||||||
|
|
||||||
ds.get_field(
|
ds.get_field(&state, &source, &self.key, &path)
|
||||||
&state,
|
|
||||||
&Datasets::virt_source(),
|
|
||||||
self.file.to_str().context("path is not utf-8")?,
|
|
||||||
&path,
|
|
||||||
)
|
|
||||||
.await
|
.await
|
||||||
.with_context(|| format!("while extracting {key}"))?
|
.with_context(|| format!("while extracting {}", self.key))?
|
||||||
.ok_or_else(|| anyhow::anyhow!("{key:?} not found"))?
|
.ok_or_else(|| {
|
||||||
|
anyhow::anyhow!("{:?} not found in source {:?}", self.key, self.source)
|
||||||
|
})?
|
||||||
} else {
|
} else {
|
||||||
let item = ds
|
let item = ds.get(&source, &self.key).await.ok_or_else(|| {
|
||||||
.get(&Datasets::virt_source(), key)
|
anyhow::anyhow!("{:?} not found in source {:?}", self.key, self.source)
|
||||||
.await
|
})?;
|
||||||
.ok_or_else(|| anyhow::anyhow!("{key:?} not found"))?;
|
|
||||||
|
|
||||||
let item = PileValue::Item(item);
|
let item = PileValue::Item(item);
|
||||||
item.to_json(&state)
|
item.to_json(&state)
|
||||||
.await
|
.await
|
||||||
.with_context(|| format!("while extracting {key}"))?
|
.with_context(|| format!("while extracting {}", self.key))?
|
||||||
};
|
};
|
||||||
|
|
||||||
let json = serde_json::to_string_pretty(&json).unwrap();
|
let json = serde_json::to_string_pretty(&json).unwrap();
|
||||||
|
|||||||
Reference in New Issue
Block a user