Auto-update fts index
This commit is contained in:
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -1041,7 +1041,6 @@ dependencies = [
|
|||||||
"pile-toolbox",
|
"pile-toolbox",
|
||||||
"serde",
|
"serde",
|
||||||
"signal-hook",
|
"signal-hook",
|
||||||
"tantivy",
|
|
||||||
"tokio",
|
"tokio",
|
||||||
"toml",
|
"toml",
|
||||||
"tracing",
|
"tracing",
|
||||||
@@ -1084,8 +1083,11 @@ dependencies = [
|
|||||||
"jsonpath-rust",
|
"jsonpath-rust",
|
||||||
"pile-audio",
|
"pile-audio",
|
||||||
"pile-config",
|
"pile-config",
|
||||||
|
"pile-toolbox",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tantivy",
|
"tantivy",
|
||||||
|
"thiserror",
|
||||||
|
"toml",
|
||||||
"tracing",
|
"tracing",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ str_to_string = "deny"
|
|||||||
string_add = "deny"
|
string_add = "deny"
|
||||||
implicit_clone = "deny"
|
implicit_clone = "deny"
|
||||||
use_debug = "allow"
|
use_debug = "allow"
|
||||||
verbose_file_reads = "deny"
|
verbose_file_reads = "allow"
|
||||||
large_types_passed_by_value = "deny"
|
large_types_passed_by_value = "deny"
|
||||||
wildcard_dependencies = "deny"
|
wildcard_dependencies = "deny"
|
||||||
negative_feature_names = "deny"
|
negative_feature_names = "deny"
|
||||||
@@ -58,7 +58,6 @@ unwrap_used = "warn"
|
|||||||
expect_used = "warn"
|
expect_used = "warn"
|
||||||
type_complexity = "allow"
|
type_complexity = "allow"
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# MARK: dependencies
|
# MARK: dependencies
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ impl Display for PictureTypeError {
|
|||||||
impl std::error::Error for PictureTypeError {}
|
impl std::error::Error for PictureTypeError {}
|
||||||
|
|
||||||
/// A picture type according to the ID3v2 APIC frame
|
/// A picture type according to the ID3v2 APIC frame
|
||||||
#[allow(missing_docs)]
|
#[expect(missing_docs)]
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
pub enum PictureType {
|
pub enum PictureType {
|
||||||
Other,
|
Other,
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ use crate::flac::blocks::{FlacMetablockDecode, FlacMetablockEncode, FlacPictureB
|
|||||||
use super::tagtype::TagType;
|
use super::tagtype::TagType;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
#[allow(missing_docs)]
|
#[expect(missing_docs)]
|
||||||
pub enum VorbisCommentDecodeError {
|
pub enum VorbisCommentDecodeError {
|
||||||
/// We encountered an IoError while processing a block
|
/// We encountered an IoError while processing a block
|
||||||
IoError(std::io::Error),
|
IoError(std::io::Error),
|
||||||
@@ -74,7 +74,7 @@ impl From<FromUtf8Error> for VorbisCommentDecodeError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
#[allow(missing_docs)]
|
#[expect(missing_docs)]
|
||||||
pub enum VorbisCommentEncodeError {
|
pub enum VorbisCommentEncodeError {
|
||||||
/// We encountered an IoError while processing a block
|
/// We encountered an IoError while processing a block
|
||||||
IoError(std::io::Error),
|
IoError(std::io::Error),
|
||||||
@@ -132,39 +132,52 @@ impl VorbisComment {
|
|||||||
let mut block = [0u8; 4];
|
let mut block = [0u8; 4];
|
||||||
|
|
||||||
let vendor = {
|
let vendor = {
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut block)
|
d.read_exact(&mut block)
|
||||||
.map_err(|_| VorbisCommentDecodeError::MalformedData)?;
|
.map_err(|_err| VorbisCommentDecodeError::MalformedData)?;
|
||||||
|
|
||||||
let length = u32::from_le_bytes(block);
|
let length = u32::from_le_bytes(block);
|
||||||
let mut text = vec![0u8; length.try_into().unwrap()];
|
|
||||||
|
|
||||||
#[expect(clippy::map_err_ignore)]
|
#[expect(clippy::expect_used)]
|
||||||
|
let mut text = vec![
|
||||||
|
0u8;
|
||||||
|
length
|
||||||
|
.try_into()
|
||||||
|
.expect("vendor length does not fit into usize")
|
||||||
|
];
|
||||||
|
|
||||||
d.read_exact(&mut text)
|
d.read_exact(&mut text)
|
||||||
.map_err(|_| VorbisCommentDecodeError::MalformedData)?;
|
.map_err(|_err| VorbisCommentDecodeError::MalformedData)?;
|
||||||
|
|
||||||
String::from_utf8(text)?
|
String::from_utf8(text)?
|
||||||
};
|
};
|
||||||
|
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut block)
|
d.read_exact(&mut block)
|
||||||
.map_err(|_| VorbisCommentDecodeError::MalformedData)?;
|
.map_err(|_err| VorbisCommentDecodeError::MalformedData)?;
|
||||||
let n_comments: usize = u32::from_le_bytes(block).try_into().unwrap();
|
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
|
let n_comments: usize = u32::from_le_bytes(block)
|
||||||
|
.try_into()
|
||||||
|
.expect("comment count does not fit into usize");
|
||||||
|
|
||||||
let mut comments = Vec::new();
|
let mut comments = Vec::new();
|
||||||
let mut pictures = Vec::new();
|
let mut pictures = Vec::new();
|
||||||
for _ in 0..n_comments {
|
for _ in 0..n_comments {
|
||||||
let comment = {
|
let comment = {
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut block)
|
d.read_exact(&mut block)
|
||||||
.map_err(|_| VorbisCommentDecodeError::MalformedData)?;
|
.map_err(|_err| VorbisCommentDecodeError::MalformedData)?;
|
||||||
|
|
||||||
let length = u32::from_le_bytes(block);
|
let length = u32::from_le_bytes(block);
|
||||||
let mut text = vec![0u8; length.try_into().unwrap()];
|
|
||||||
|
|
||||||
#[expect(clippy::map_err_ignore)]
|
#[expect(clippy::expect_used)]
|
||||||
|
let mut text = vec![
|
||||||
|
0u8;
|
||||||
|
length
|
||||||
|
.try_into()
|
||||||
|
.expect("comment length does not fit into usize")
|
||||||
|
];
|
||||||
|
|
||||||
d.read_exact(&mut text)
|
d.read_exact(&mut text)
|
||||||
.map_err(|_| VorbisCommentDecodeError::MalformedData)?;
|
.map_err(|_err| VorbisCommentDecodeError::MalformedData)?;
|
||||||
|
|
||||||
String::from_utf8(text)?
|
String::from_utf8(text)?
|
||||||
};
|
};
|
||||||
@@ -218,9 +231,10 @@ impl VorbisComment {
|
|||||||
|
|
||||||
impl VorbisComment {
|
impl VorbisComment {
|
||||||
/// Get the number of bytes that `encode()` will write.
|
/// Get the number of bytes that `encode()` will write.
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
pub fn get_len(&self) -> u32 {
|
pub fn get_len(&self) -> u32 {
|
||||||
let mut sum: u32 = 0;
|
let mut sum: u32 = 0;
|
||||||
sum += u32::try_from(self.vendor.len()).unwrap() + 4;
|
sum += u32::try_from(self.vendor.len()).expect("vendor length does not fit into u32") + 4;
|
||||||
sum += 4;
|
sum += 4;
|
||||||
|
|
||||||
for (tagtype, value) in &self.comments {
|
for (tagtype, value) in &self.comments {
|
||||||
@@ -244,7 +258,8 @@ impl VorbisComment {
|
|||||||
.to_uppercase();
|
.to_uppercase();
|
||||||
|
|
||||||
let str = format!("{tagtype_str}={value}");
|
let str = format!("{tagtype_str}={value}");
|
||||||
sum += 4 + u32::try_from(str.len()).unwrap();
|
sum +=
|
||||||
|
4 + u32::try_from(str.len()).expect("comment string length does not fit into u32");
|
||||||
}
|
}
|
||||||
|
|
||||||
for p in &self.pictures {
|
for p in &self.pictures {
|
||||||
@@ -271,13 +286,18 @@ impl VorbisComment {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Try to encode this vorbis comment
|
/// Try to encode this vorbis comment
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
pub fn encode(&self, target: &mut impl Write) -> Result<(), VorbisCommentEncodeError> {
|
pub fn encode(&self, target: &mut impl Write) -> Result<(), VorbisCommentEncodeError> {
|
||||||
target.write_all(&u32::try_from(self.vendor.len()).unwrap().to_le_bytes())?;
|
target.write_all(
|
||||||
|
&u32::try_from(self.vendor.len())
|
||||||
|
.expect("vendor length does not fit into u32")
|
||||||
|
.to_le_bytes(),
|
||||||
|
)?;
|
||||||
target.write_all(self.vendor.as_bytes())?;
|
target.write_all(self.vendor.as_bytes())?;
|
||||||
|
|
||||||
target.write_all(
|
target.write_all(
|
||||||
&u32::try_from(self.comments.len() + self.pictures.len())
|
&u32::try_from(self.comments.len() + self.pictures.len())
|
||||||
.unwrap()
|
.expect("total comment count does not fit into u32")
|
||||||
.to_le_bytes(),
|
.to_le_bytes(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@@ -302,7 +322,11 @@ impl VorbisComment {
|
|||||||
.to_uppercase();
|
.to_uppercase();
|
||||||
|
|
||||||
let str = format!("{tagtype_str}={value}");
|
let str = format!("{tagtype_str}={value}");
|
||||||
target.write_all(&u32::try_from(str.len()).unwrap().to_le_bytes())?;
|
target.write_all(
|
||||||
|
&u32::try_from(str.len())
|
||||||
|
.expect("comment string length does not fit into u32")
|
||||||
|
.to_le_bytes(),
|
||||||
|
)?;
|
||||||
target.write_all(str.as_bytes())?;
|
target.write_all(str.as_bytes())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -318,7 +342,11 @@ impl VorbisComment {
|
|||||||
&base64::prelude::BASE64_STANDARD.encode(&pic_data)
|
&base64::prelude::BASE64_STANDARD.encode(&pic_data)
|
||||||
);
|
);
|
||||||
|
|
||||||
target.write_all(&u32::try_from(pic_string.len()).unwrap().to_le_bytes())?;
|
target.write_all(
|
||||||
|
&u32::try_from(pic_string.len())
|
||||||
|
.expect("picture string length does not fit into u32")
|
||||||
|
.to_le_bytes(),
|
||||||
|
)?;
|
||||||
target.write_all(pic_string.as_bytes())?;
|
target.write_all(pic_string.as_bytes())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ enum FlacBlockType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
#[allow(missing_docs)]
|
#[expect(missing_docs)]
|
||||||
pub enum FlacBlock {
|
pub enum FlacBlock {
|
||||||
Streaminfo(FlacStreaminfoBlock),
|
Streaminfo(FlacStreaminfoBlock),
|
||||||
Picture(FlacPictureBlock),
|
Picture(FlacPictureBlock),
|
||||||
@@ -212,9 +212,16 @@ impl FlacBlockReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
'outer: while last_read_size != 0 {
|
'outer: while last_read_size != 0 {
|
||||||
match self.current_block.as_mut().unwrap() {
|
#[expect(clippy::expect_used)]
|
||||||
|
match self
|
||||||
|
.current_block
|
||||||
|
.as_mut()
|
||||||
|
.expect("current_block is Some, checked above")
|
||||||
|
{
|
||||||
FlacBlockType::MagicBits { data, left_to_read } => {
|
FlacBlockType::MagicBits { data, left_to_read } => {
|
||||||
last_read_size = buf.read(&mut data[4 - *left_to_read..4]).unwrap();
|
last_read_size = buf
|
||||||
|
.read(&mut data[4 - *left_to_read..4])
|
||||||
|
.map_err(FlacDecodeError::from)?;
|
||||||
*left_to_read -= last_read_size;
|
*left_to_read -= last_read_size;
|
||||||
|
|
||||||
if *left_to_read == 0 {
|
if *left_to_read == 0 {
|
||||||
@@ -235,7 +242,9 @@ impl FlacBlockReader {
|
|||||||
data,
|
data,
|
||||||
left_to_read,
|
left_to_read,
|
||||||
} => {
|
} => {
|
||||||
last_read_size = buf.read(&mut data[4 - *left_to_read..4]).unwrap();
|
last_read_size = buf
|
||||||
|
.read(&mut data[4 - *left_to_read..4])
|
||||||
|
.map_err(FlacDecodeError::from)?;
|
||||||
*left_to_read -= last_read_size;
|
*left_to_read -= last_read_size;
|
||||||
|
|
||||||
if *left_to_read == 0 {
|
if *left_to_read == 0 {
|
||||||
@@ -253,13 +262,24 @@ impl FlacBlockReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
FlacBlockType::MetaBlock { header, data } => {
|
FlacBlockType::MetaBlock { header, data } => {
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
|
{
|
||||||
last_read_size = buf
|
last_read_size = buf
|
||||||
.by_ref()
|
.by_ref()
|
||||||
.take(u64::from(header.length) - u64::try_from(data.len()).unwrap())
|
.take(
|
||||||
|
u64::from(header.length)
|
||||||
|
- u64::try_from(data.len())
|
||||||
|
.expect("data length does not fit into u64"),
|
||||||
|
)
|
||||||
.read_to_end(data)
|
.read_to_end(data)
|
||||||
.unwrap();
|
.map_err(FlacDecodeError::from)?;
|
||||||
|
}
|
||||||
|
|
||||||
if data.len() == usize::try_from(header.length).unwrap() {
|
#[expect(clippy::expect_used)]
|
||||||
|
if data.len()
|
||||||
|
== usize::try_from(header.length)
|
||||||
|
.expect("header length does not fit into usize")
|
||||||
|
{
|
||||||
// If we picked this block type, add it to the queue
|
// If we picked this block type, add it to the queue
|
||||||
if self.selector.should_pick_meta(header.block_type) {
|
if self.selector.should_pick_meta(header.block_type) {
|
||||||
let b = FlacBlock::decode(header.block_type, data)?;
|
let b = FlacBlock::decode(header.block_type, data)?;
|
||||||
@@ -283,7 +303,11 @@ impl FlacBlockReader {
|
|||||||
// Limit the number of bytes we read at once, so we don't re-clone
|
// Limit the number of bytes we read at once, so we don't re-clone
|
||||||
// large amounts of data if `buf` contains multiple sync sequences.
|
// large amounts of data if `buf` contains multiple sync sequences.
|
||||||
// 5kb is a pretty reasonable frame size.
|
// 5kb is a pretty reasonable frame size.
|
||||||
last_read_size = buf.by_ref().take(5_000).read_to_end(data).unwrap();
|
last_read_size = buf
|
||||||
|
.by_ref()
|
||||||
|
.take(5_000)
|
||||||
|
.read_to_end(data)
|
||||||
|
.map_err(FlacDecodeError::from)?;
|
||||||
if last_read_size == 0 {
|
if last_read_size == 0 {
|
||||||
continue 'outer;
|
continue 'outer;
|
||||||
}
|
}
|
||||||
@@ -335,9 +359,10 @@ impl FlacBlockReader {
|
|||||||
|
|
||||||
// Backtrack to the first bit AFTER this new sync sequence
|
// Backtrack to the first bit AFTER this new sync sequence
|
||||||
buf.seek(std::io::SeekFrom::Current(
|
buf.seek(std::io::SeekFrom::Current(
|
||||||
-i64::try_from(data.len() - i).unwrap(),
|
-i64::try_from(data.len() - i)
|
||||||
|
.expect("seek offset does not fit into i64"),
|
||||||
))
|
))
|
||||||
.unwrap();
|
.map_err(FlacDecodeError::from)?;
|
||||||
|
|
||||||
self.current_block = Some(FlacBlockType::AudioData {
|
self.current_block = Some(FlacBlockType::AudioData {
|
||||||
data: {
|
data: {
|
||||||
@@ -406,6 +431,7 @@ mod tests {
|
|||||||
flac::tests::{FlacBlockOutput, FlacTestCase, VorbisCommentTestValue, manifest},
|
flac::tests::{FlacBlockOutput, FlacTestCase, VorbisCommentTestValue, manifest},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
fn read_file(
|
fn read_file(
|
||||||
test_case: &FlacTestCase,
|
test_case: &FlacTestCase,
|
||||||
fragment_size_range: Option<Range<usize>>,
|
fragment_size_range: Option<Range<usize>>,
|
||||||
@@ -447,6 +473,7 @@ mod tests {
|
|||||||
return Ok(out_blocks);
|
return Ok(out_blocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
fn test_identical(
|
fn test_identical(
|
||||||
test_case: &FlacTestCase,
|
test_case: &FlacTestCase,
|
||||||
fragment_size_range: Option<Range<usize>>,
|
fragment_size_range: Option<Range<usize>>,
|
||||||
|
|||||||
@@ -50,8 +50,11 @@ impl FlacMetablockDecode for FlacApplicationBlock {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl FlacMetablockEncode for FlacApplicationBlock {
|
impl FlacMetablockEncode for FlacApplicationBlock {
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
fn get_len(&self) -> u32 {
|
fn get_len(&self) -> u32 {
|
||||||
(self.data.len() + 4).try_into().unwrap()
|
(self.data.len() + 4)
|
||||||
|
.try_into()
|
||||||
|
.expect("application block size does not fit into u32")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode(
|
fn encode(
|
||||||
|
|||||||
@@ -25,8 +25,12 @@ impl FlacMetablockDecode for FlacCuesheetBlock {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl FlacMetablockEncode for FlacCuesheetBlock {
|
impl FlacMetablockEncode for FlacCuesheetBlock {
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
fn get_len(&self) -> u32 {
|
fn get_len(&self) -> u32 {
|
||||||
self.data.len().try_into().unwrap()
|
self.data
|
||||||
|
.len()
|
||||||
|
.try_into()
|
||||||
|
.expect("cuesheet size does not fit into u32")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode(
|
fn encode(
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::fmt::Debug;
|
|||||||
use crate::flac::errors::{FlacDecodeError, FlacEncodeError};
|
use crate::flac::errors::{FlacDecodeError, FlacEncodeError};
|
||||||
|
|
||||||
/// A type of flac metadata block
|
/// A type of flac metadata block
|
||||||
#[allow(missing_docs)]
|
#[expect(missing_docs)]
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
pub enum FlacMetablockType {
|
pub enum FlacMetablockType {
|
||||||
Streaminfo,
|
Streaminfo,
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
//! Read and write impelementations for all flac block types
|
//! Read and write implementations for all flac block types
|
||||||
|
|
||||||
// Not metadata blocks
|
// Not metadata blocks
|
||||||
mod header;
|
mod header;
|
||||||
|
|||||||
@@ -12,13 +12,17 @@ pub struct FlacPaddingBlock {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl FlacMetablockDecode for FlacPaddingBlock {
|
impl FlacMetablockDecode for FlacPaddingBlock {
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
fn decode(data: &[u8]) -> Result<Self, FlacDecodeError> {
|
fn decode(data: &[u8]) -> Result<Self, FlacDecodeError> {
|
||||||
if data.iter().any(|x| *x != 0u8) {
|
if data.iter().any(|x| *x != 0u8) {
|
||||||
return Err(FlacDecodeError::MalformedBlock);
|
return Err(FlacDecodeError::MalformedBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
size: data.len().try_into().unwrap(),
|
size: data
|
||||||
|
.len()
|
||||||
|
.try_into()
|
||||||
|
.expect("padding size does not fit into u32"),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -63,16 +63,17 @@ impl FlacMetablockDecode for FlacPictureBlock {
|
|||||||
|
|
||||||
// Image format
|
// Image format
|
||||||
let mime = {
|
let mime = {
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut block)
|
d.read_exact(&mut block)
|
||||||
.map_err(|_| FlacDecodeError::MalformedBlock)?;
|
.map_err(|_err| FlacDecodeError::MalformedBlock)?;
|
||||||
|
|
||||||
let mime_length = u32::from_be_bytes(block).try_into().unwrap();
|
#[expect(clippy::expect_used)]
|
||||||
|
let mime_length = u32::from_be_bytes(block)
|
||||||
|
.try_into()
|
||||||
|
.expect("mime length does not fit into usize");
|
||||||
let mut mime = vec![0u8; mime_length];
|
let mut mime = vec![0u8; mime_length];
|
||||||
|
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut mime)
|
d.read_exact(&mut mime)
|
||||||
.map_err(|_| FlacDecodeError::MalformedBlock)?;
|
.map_err(|_err| FlacDecodeError::MalformedBlock)?;
|
||||||
|
|
||||||
String::from_utf8(mime)
|
String::from_utf8(mime)
|
||||||
.ok()
|
.ok()
|
||||||
@@ -82,16 +83,17 @@ impl FlacMetablockDecode for FlacPictureBlock {
|
|||||||
|
|
||||||
// Image description
|
// Image description
|
||||||
let description = {
|
let description = {
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut block)
|
d.read_exact(&mut block)
|
||||||
.map_err(|_| FlacDecodeError::MalformedBlock)?;
|
.map_err(|_err| FlacDecodeError::MalformedBlock)?;
|
||||||
|
|
||||||
let desc_length = u32::from_be_bytes(block).try_into().unwrap();
|
#[expect(clippy::expect_used)]
|
||||||
|
let desc_length = u32::from_be_bytes(block)
|
||||||
|
.try_into()
|
||||||
|
.expect("description length does not fit into usize");
|
||||||
let mut desc = vec![0u8; desc_length];
|
let mut desc = vec![0u8; desc_length];
|
||||||
|
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut desc)
|
d.read_exact(&mut desc)
|
||||||
.map_err(|_| FlacDecodeError::MalformedBlock)?;
|
.map_err(|_err| FlacDecodeError::MalformedBlock)?;
|
||||||
|
|
||||||
String::from_utf8(desc)?
|
String::from_utf8(desc)?
|
||||||
};
|
};
|
||||||
@@ -122,16 +124,17 @@ impl FlacMetablockDecode for FlacPictureBlock {
|
|||||||
|
|
||||||
// Image data length
|
// Image data length
|
||||||
let img_data = {
|
let img_data = {
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut block)
|
d.read_exact(&mut block)
|
||||||
.map_err(|_| FlacDecodeError::MalformedBlock)?;
|
.map_err(|_err| FlacDecodeError::MalformedBlock)?;
|
||||||
|
|
||||||
let data_length = u32::from_be_bytes(block).try_into().unwrap();
|
#[expect(clippy::expect_used)]
|
||||||
|
let data_length = u32::from_be_bytes(block)
|
||||||
|
.try_into()
|
||||||
|
.expect("image data length does not fit into usize");
|
||||||
let mut img_data = vec![0u8; data_length];
|
let mut img_data = vec![0u8; data_length];
|
||||||
|
|
||||||
#[expect(clippy::map_err_ignore)]
|
|
||||||
d.read_exact(&mut img_data)
|
d.read_exact(&mut img_data)
|
||||||
.map_err(|_| FlacDecodeError::MalformedBlock)?;
|
.map_err(|_err| FlacDecodeError::MalformedBlock)?;
|
||||||
|
|
||||||
img_data
|
img_data
|
||||||
};
|
};
|
||||||
@@ -150,13 +153,14 @@ impl FlacMetablockDecode for FlacPictureBlock {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl FlacMetablockEncode for FlacPictureBlock {
|
impl FlacMetablockEncode for FlacPictureBlock {
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
fn get_len(&self) -> u32 {
|
fn get_len(&self) -> u32 {
|
||||||
(4 + (4 + self.mime.to_string().len())
|
(4 + (4 + self.mime.to_string().len())
|
||||||
+ (4 + self.description.len())
|
+ (4 + self.description.len())
|
||||||
+ 4 + 4 + 4
|
+ 4 + 4 + 4
|
||||||
+ 4 + (4 + self.img_data.len()))
|
+ 4 + (4 + self.img_data.len()))
|
||||||
.try_into()
|
.try_into()
|
||||||
.unwrap()
|
.expect("picture block size does not fit into u32")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode(
|
fn encode(
|
||||||
@@ -176,12 +180,22 @@ impl FlacMetablockEncode for FlacPictureBlock {
|
|||||||
|
|
||||||
target.write_all(&self.picture_type.to_idx().to_be_bytes())?;
|
target.write_all(&self.picture_type.to_idx().to_be_bytes())?;
|
||||||
|
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
|
{
|
||||||
let mime = self.mime.to_string();
|
let mime = self.mime.to_string();
|
||||||
target.write_all(&u32::try_from(mime.len()).unwrap().to_be_bytes())?;
|
target.write_all(
|
||||||
|
&u32::try_from(mime.len())
|
||||||
|
.expect("mime length does not fit into u32")
|
||||||
|
.to_be_bytes(),
|
||||||
|
)?;
|
||||||
target.write_all(self.mime.to_string().as_bytes())?;
|
target.write_all(self.mime.to_string().as_bytes())?;
|
||||||
drop(mime);
|
drop(mime);
|
||||||
|
|
||||||
target.write_all(&u32::try_from(self.description.len()).unwrap().to_be_bytes())?;
|
target.write_all(
|
||||||
|
&u32::try_from(self.description.len())
|
||||||
|
.expect("description length does not fit into u32")
|
||||||
|
.to_be_bytes(),
|
||||||
|
)?;
|
||||||
target.write_all(self.description.as_bytes())?;
|
target.write_all(self.description.as_bytes())?;
|
||||||
|
|
||||||
target.write_all(&self.width.to_be_bytes())?;
|
target.write_all(&self.width.to_be_bytes())?;
|
||||||
@@ -189,7 +203,12 @@ impl FlacMetablockEncode for FlacPictureBlock {
|
|||||||
target.write_all(&self.bit_depth.to_be_bytes())?;
|
target.write_all(&self.bit_depth.to_be_bytes())?;
|
||||||
target.write_all(&self.color_count.to_be_bytes())?;
|
target.write_all(&self.color_count.to_be_bytes())?;
|
||||||
|
|
||||||
target.write_all(&u32::try_from(self.img_data.len()).unwrap().to_be_bytes())?;
|
target.write_all(
|
||||||
|
&u32::try_from(self.img_data.len())
|
||||||
|
.expect("image data length does not fit into u32")
|
||||||
|
.to_be_bytes(),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
target.write_all(&self.img_data)?;
|
target.write_all(&self.img_data)?;
|
||||||
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
|||||||
@@ -25,8 +25,12 @@ impl FlacMetablockDecode for FlacSeektableBlock {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl FlacMetablockEncode for FlacSeektableBlock {
|
impl FlacMetablockEncode for FlacSeektableBlock {
|
||||||
|
#[expect(clippy::expect_used)]
|
||||||
fn get_len(&self) -> u32 {
|
fn get_len(&self) -> u32 {
|
||||||
self.data.len().try_into().unwrap()
|
self.data
|
||||||
|
.len()
|
||||||
|
.try_into()
|
||||||
|
.expect("seektable size does not fit into u32")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode(
|
fn encode(
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use crate::common::{
|
|||||||
use std::string::FromUtf8Error;
|
use std::string::FromUtf8Error;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
#[allow(missing_docs)]
|
#[expect(missing_docs)]
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum FlacDecodeError {
|
pub enum FlacDecodeError {
|
||||||
/// FLAC does not start with 0x66 0x4C 0x61 0x43
|
/// FLAC does not start with 0x66 0x4C 0x61 0x43
|
||||||
@@ -46,7 +46,7 @@ pub enum FlacDecodeError {
|
|||||||
PictureTypeError(#[from] PictureTypeError),
|
PictureTypeError(#[from] PictureTypeError),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(missing_docs)]
|
#[expect(missing_docs)]
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum FlacEncodeError {
|
pub enum FlacEncodeError {
|
||||||
/// We encountered an i/o error while processing
|
/// We encountered an i/o error while processing
|
||||||
|
|||||||
@@ -213,6 +213,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A list of test files and their expected output
|
/// A list of test files and their expected output
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
pub fn manifest() -> [FlacTestCase; 23] {
|
pub fn manifest() -> [FlacTestCase; 23] {
|
||||||
[
|
[
|
||||||
FlacTestCase::Error {
|
FlacTestCase::Error {
|
||||||
|
|||||||
@@ -88,7 +88,11 @@ impl FlacMetaStrip {
|
|||||||
// We don't need to store audioframes in our last_block buffer,
|
// We don't need to store audioframes in our last_block buffer,
|
||||||
// since they do not have an `is_last` flag.
|
// since they do not have an `is_last` flag.
|
||||||
if matches!(self.last_block, Some(FlacBlock::AudioFrame(_))) {
|
if matches!(self.last_block, Some(FlacBlock::AudioFrame(_))) {
|
||||||
let x = self.last_block.take().unwrap();
|
#[expect(clippy::expect_used)]
|
||||||
|
let x = self
|
||||||
|
.last_block
|
||||||
|
.take()
|
||||||
|
.expect("last_block is Some(AudioFrame), just matched");
|
||||||
x.encode(false, true, target)?;
|
x.encode(false, true, target)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -107,6 +111,7 @@ mod tests {
|
|||||||
tests::manifest,
|
tests::manifest,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
fn test_strip(
|
fn test_strip(
|
||||||
test_case: &FlacTestCase,
|
test_case: &FlacTestCase,
|
||||||
fragment_size_range: Option<std::ops::Range<usize>>,
|
fragment_size_range: Option<std::ops::Range<usize>>,
|
||||||
|
|||||||
@@ -90,6 +90,7 @@ mod tests {
|
|||||||
tests::{FlacBlockOutput, FlacTestCase, manifest},
|
tests::{FlacBlockOutput, FlacTestCase, manifest},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
fn test_pictures(
|
fn test_pictures(
|
||||||
test_case: &FlacTestCase,
|
test_case: &FlacTestCase,
|
||||||
fragment_size_range: Option<std::ops::Range<usize>>,
|
fragment_size_range: Option<std::ops::Range<usize>>,
|
||||||
|
|||||||
112
crates/pile-audio/src/nodes/extractcovers.rs
Normal file
112
crates/pile-audio/src/nodes/extractcovers.rs
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
use crate::flac::proc::pictures::FlacPictureReader;
|
||||||
|
use std::{collections::BTreeMap, sync::Arc};
|
||||||
|
use tracing::{debug, trace};
|
||||||
|
|
||||||
|
pub struct ExtractCovers {}
|
||||||
|
|
||||||
|
impl NodeBuilder for ExtractCovers {
|
||||||
|
fn build<'ctx>(&self) -> Box<dyn Node<'ctx>> {
|
||||||
|
Box::new(Self {})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inputs: "data" - Bytes
|
||||||
|
#[async_trait]
|
||||||
|
impl<'ctx> Node<'ctx> for ExtractCovers {
|
||||||
|
async fn run(
|
||||||
|
&self,
|
||||||
|
ctx: &CopperContext<'ctx>,
|
||||||
|
this_node: ThisNodeInfo,
|
||||||
|
params: NodeParameters,
|
||||||
|
mut input: BTreeMap<PortName, Option<PipeData>>,
|
||||||
|
) -> Result<BTreeMap<PortName, PipeData>, RunNodeError> {
|
||||||
|
//
|
||||||
|
// Extract parameters
|
||||||
|
//
|
||||||
|
params.err_if_not_empty()?;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Extract arguments
|
||||||
|
//
|
||||||
|
let data = input.remove(&PortName::new("data"));
|
||||||
|
if data.is_none() {
|
||||||
|
return Err(RunNodeError::MissingInput {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if let Some((port, _)) = input.pop_first() {
|
||||||
|
return Err(RunNodeError::UnrecognizedInput { port });
|
||||||
|
}
|
||||||
|
|
||||||
|
trace!(
|
||||||
|
message = "Inputs ready, preparing reader",
|
||||||
|
node_id = ?this_node.id
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut reader = match data.unwrap() {
|
||||||
|
None => {
|
||||||
|
return Err(RunNodeError::RequiredInputNull {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(PipeData::Blob { source, .. }) => source.build(ctx).await?,
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
return Err(RunNodeError::BadInputType {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// Setup is done, extract covers
|
||||||
|
//
|
||||||
|
debug!(
|
||||||
|
message = "Extracting covers",
|
||||||
|
node_id = ?this_node.id
|
||||||
|
);
|
||||||
|
let mut picreader = FlacPictureReader::new();
|
||||||
|
|
||||||
|
while let Some(data) = reader.next_fragment().await? {
|
||||||
|
picreader
|
||||||
|
.push_data(&data)
|
||||||
|
.map_err(|e| RunNodeError::Other(Arc::new(e)))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
picreader
|
||||||
|
.finish()
|
||||||
|
.map_err(|e| RunNodeError::Other(Arc::new(e)))?;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Send the first cover we find
|
||||||
|
//
|
||||||
|
|
||||||
|
let mut output = BTreeMap::new();
|
||||||
|
|
||||||
|
if let Some(picture) = picreader.pop_picture() {
|
||||||
|
debug!(
|
||||||
|
message = "Found a cover, sending",
|
||||||
|
node_id = ?this_node.id,
|
||||||
|
picture = ?picture
|
||||||
|
);
|
||||||
|
|
||||||
|
output.insert(
|
||||||
|
PortName::new("cover_data"),
|
||||||
|
PipeData::Blob {
|
||||||
|
source: BytesProcessorBuilder::new(RawBytesSource::Array {
|
||||||
|
mime: picture.mime.clone(),
|
||||||
|
data: Arc::new(picture.img_data),
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
debug!(
|
||||||
|
message = "Did not find a cover, sending None",
|
||||||
|
node_id = ?this_node.id
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
164
crates/pile-audio/src/nodes/extracttags.rs
Normal file
164
crates/pile-audio/src/nodes/extracttags.rs
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
use crate::{
|
||||||
|
common::tagtype::TagType,
|
||||||
|
flac::blockread::{FlacBlock, FlacBlockReader, FlacBlockSelector},
|
||||||
|
};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use copper_piper::{
|
||||||
|
base::{Node, NodeBuilder, NodeParameterValue, PortName, RunNodeError, ThisNodeInfo},
|
||||||
|
data::PipeData,
|
||||||
|
helpers::NodeParameters,
|
||||||
|
CopperContext,
|
||||||
|
};
|
||||||
|
use std::{collections::BTreeMap, sync::Arc};
|
||||||
|
use tracing::{debug, trace};
|
||||||
|
|
||||||
|
/// Extract tags from audio metadata
|
||||||
|
pub struct ExtractTags {}
|
||||||
|
|
||||||
|
impl NodeBuilder for ExtractTags {
|
||||||
|
fn build<'ctx>(&self) -> Box<dyn Node<'ctx>> {
|
||||||
|
Box::new(Self {})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inputs: "data" - Bytes
|
||||||
|
// Outputs: variable, depends on tags
|
||||||
|
#[async_trait]
|
||||||
|
impl<'ctx> Node<'ctx> for ExtractTags {
|
||||||
|
async fn run(
|
||||||
|
&self,
|
||||||
|
ctx: &CopperContext<'ctx>,
|
||||||
|
this_node: ThisNodeInfo,
|
||||||
|
mut params: NodeParameters,
|
||||||
|
mut input: BTreeMap<PortName, Option<PipeData>>,
|
||||||
|
) -> Result<BTreeMap<PortName, PipeData>, RunNodeError> {
|
||||||
|
//
|
||||||
|
// Extract parameters
|
||||||
|
//
|
||||||
|
|
||||||
|
let tags = {
|
||||||
|
let mut tags: BTreeMap<PortName, TagType> = BTreeMap::new();
|
||||||
|
let val = params.pop_val("tags")?;
|
||||||
|
|
||||||
|
match val {
|
||||||
|
NodeParameterValue::List(list) => {
|
||||||
|
for t in list {
|
||||||
|
match t {
|
||||||
|
NodeParameterValue::String(s) => {
|
||||||
|
tags.insert(PortName::new(s.as_str()), s.as_str().into());
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(RunNodeError::BadParameterType {
|
||||||
|
parameter: "tags".into(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(RunNodeError::BadParameterType {
|
||||||
|
parameter: "tags".into(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
tags
|
||||||
|
};
|
||||||
|
|
||||||
|
params.err_if_not_empty()?;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Extract arguments
|
||||||
|
//
|
||||||
|
let data = input.remove(&PortName::new("data"));
|
||||||
|
if data.is_none() {
|
||||||
|
return Err(RunNodeError::MissingInput {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if let Some((port, _)) = input.pop_first() {
|
||||||
|
return Err(RunNodeError::UnrecognizedInput { port });
|
||||||
|
}
|
||||||
|
|
||||||
|
trace!(
|
||||||
|
message = "Inputs ready, preparing reader",
|
||||||
|
node_id = ?this_node.id
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut reader = match data.unwrap() {
|
||||||
|
None => {
|
||||||
|
return Err(RunNodeError::RequiredInputNull {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(PipeData::Blob { source, .. }) => source.build(ctx).await?,
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
return Err(RunNodeError::BadInputType {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// Setup is done, extract tags
|
||||||
|
//
|
||||||
|
debug!(
|
||||||
|
message = "Extracting tags",
|
||||||
|
node_id = ?this_node.id
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut block_reader = FlacBlockReader::new(FlacBlockSelector {
|
||||||
|
pick_vorbiscomment: true,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
|
||||||
|
while let Some(data) = reader.next_fragment().await? {
|
||||||
|
block_reader
|
||||||
|
.push_data(&data)
|
||||||
|
.map_err(|e| RunNodeError::Other(Arc::new(e)))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
block_reader
|
||||||
|
.finish()
|
||||||
|
.map_err(|e| RunNodeError::Other(Arc::new(e)))?;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Return tags
|
||||||
|
//
|
||||||
|
|
||||||
|
let mut output = BTreeMap::new();
|
||||||
|
|
||||||
|
while block_reader.has_block() {
|
||||||
|
let b = block_reader.pop_block().unwrap();
|
||||||
|
match b {
|
||||||
|
FlacBlock::VorbisComment(comment) => {
|
||||||
|
for (port, tag_type) in tags.iter() {
|
||||||
|
if let Some((_, tag_value)) =
|
||||||
|
comment.comment.comments.iter().find(|(t, _)| t == tag_type)
|
||||||
|
{
|
||||||
|
let x = output.insert(
|
||||||
|
port.clone(),
|
||||||
|
PipeData::Text {
|
||||||
|
value: tag_value.clone(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Each insertion should be new
|
||||||
|
assert!(x.is_none());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// `reader` filters blocks for us
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
|
||||||
|
// We should only have one comment block
|
||||||
|
assert!(!block_reader.has_block());
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
36
crates/pile-audio/src/nodes/mod.rs
Normal file
36
crates/pile-audio/src/nodes/mod.rs
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
//! Pipeline nodes for processing audio files
|
||||||
|
use copper_piper::base::{NodeDispatcher, RegisterNodeError};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
mod extractcovers;
|
||||||
|
mod extracttags;
|
||||||
|
mod striptags;
|
||||||
|
|
||||||
|
/// Register all nodes in this module into the given dispatcher
|
||||||
|
pub fn register(dispatcher: &mut NodeDispatcher) -> Result<(), RegisterNodeError> {
|
||||||
|
dispatcher
|
||||||
|
.register_node(
|
||||||
|
"StripTags",
|
||||||
|
BTreeMap::new(),
|
||||||
|
Box::new(striptags::StripTags {}),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
dispatcher
|
||||||
|
.register_node(
|
||||||
|
"ExtractCovers",
|
||||||
|
BTreeMap::new(),
|
||||||
|
Box::new(extractcovers::ExtractCovers {}),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
dispatcher
|
||||||
|
.register_node(
|
||||||
|
"ExtractTags",
|
||||||
|
BTreeMap::new(),
|
||||||
|
Box::new(extracttags::ExtractTags {}),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
181
crates/pile-audio/src/nodes/striptags.rs
Normal file
181
crates/pile-audio/src/nodes/striptags.rs
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
//! Strip all tags from an audio file
|
||||||
|
|
||||||
|
use crate::flac::proc::metastrip::FlacMetaStrip;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use copper_piper::{
|
||||||
|
base::{Node, NodeBuilder, NodeId, PortName, RunNodeError, ThisNodeInfo},
|
||||||
|
data::PipeData,
|
||||||
|
helpers::{
|
||||||
|
processor::{StreamProcessor, StreamProcessorBuilder},
|
||||||
|
NodeParameters,
|
||||||
|
},
|
||||||
|
CopperContext,
|
||||||
|
};
|
||||||
|
use copper_util::MimeType;
|
||||||
|
use smartstring::{LazyCompact, SmartString};
|
||||||
|
use std::{collections::BTreeMap, sync::Arc};
|
||||||
|
use tokio::sync::mpsc::{Receiver, Sender};
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
|
/// Strip all metadata from an audio file
|
||||||
|
pub struct StripTags {}
|
||||||
|
|
||||||
|
impl NodeBuilder for StripTags {
|
||||||
|
fn build<'ctx>(&self) -> Box<dyn Node<'ctx>> {
|
||||||
|
Box::new(Self {})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Input: "data" - Blob
|
||||||
|
// Output: "out" - Blob
|
||||||
|
#[async_trait]
|
||||||
|
impl<'ctx> Node<'ctx> for StripTags {
|
||||||
|
async fn run(
|
||||||
|
&self,
|
||||||
|
_ctx: &CopperContext<'ctx>,
|
||||||
|
this_node: ThisNodeInfo,
|
||||||
|
params: NodeParameters,
|
||||||
|
mut input: BTreeMap<PortName, Option<PipeData>>,
|
||||||
|
) -> Result<BTreeMap<PortName, PipeData>, RunNodeError> {
|
||||||
|
//
|
||||||
|
// Extract parameters
|
||||||
|
//
|
||||||
|
params.err_if_not_empty()?;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Extract arguments
|
||||||
|
//
|
||||||
|
let data = input.remove(&PortName::new("data"));
|
||||||
|
if data.is_none() {
|
||||||
|
return Err(RunNodeError::MissingInput {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if let Some((port, _)) = input.pop_first() {
|
||||||
|
return Err(RunNodeError::UnrecognizedInput { port });
|
||||||
|
}
|
||||||
|
|
||||||
|
let source = match data.unwrap() {
|
||||||
|
None => {
|
||||||
|
return Err(RunNodeError::RequiredInputNull {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(PipeData::Blob { source, .. }) => source,
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
return Err(RunNodeError::BadInputType {
|
||||||
|
port: PortName::new("data"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
message = "Setup done, stripping tags",
|
||||||
|
node_id = ?this_node.id
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut output = BTreeMap::new();
|
||||||
|
|
||||||
|
output.insert(
|
||||||
|
PortName::new("out"),
|
||||||
|
PipeData::Blob {
|
||||||
|
source: source.add_processor(Arc::new(TagStripProcessor {
|
||||||
|
node_id: this_node.id.clone(),
|
||||||
|
node_type: this_node.node_type.clone(),
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
return Ok(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct TagStripProcessor {
|
||||||
|
node_id: NodeId,
|
||||||
|
node_type: SmartString<LazyCompact>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StreamProcessorBuilder for TagStripProcessor {
|
||||||
|
fn build(&self) -> Box<dyn StreamProcessor> {
|
||||||
|
Box::new(self.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl StreamProcessor for TagStripProcessor {
|
||||||
|
fn mime(&self) -> &MimeType {
|
||||||
|
return &MimeType::Flac;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"TagStripProcessor"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn source_node_id(&self) -> &NodeId {
|
||||||
|
&self.node_id
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the type of the node that created this processor
|
||||||
|
fn source_node_type(&self) -> &str {
|
||||||
|
&self.node_type
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run(
|
||||||
|
&self,
|
||||||
|
mut source: Receiver<Arc<Vec<u8>>>,
|
||||||
|
sink: Sender<Arc<Vec<u8>>>,
|
||||||
|
max_buffer_size: usize,
|
||||||
|
) -> Result<(), RunNodeError> {
|
||||||
|
//
|
||||||
|
// Strip tags
|
||||||
|
//
|
||||||
|
|
||||||
|
let mut strip = FlacMetaStrip::new();
|
||||||
|
let mut out_bytes = Vec::new();
|
||||||
|
|
||||||
|
while let Some(data) = source.recv().await {
|
||||||
|
strip
|
||||||
|
.push_data(&data)
|
||||||
|
.map_err(|e| RunNodeError::Other(Arc::new(e)))?;
|
||||||
|
|
||||||
|
strip
|
||||||
|
.read_data(&mut out_bytes)
|
||||||
|
.map_err(|e| RunNodeError::Other(Arc::new(e)))?;
|
||||||
|
|
||||||
|
if out_bytes.len() >= max_buffer_size {
|
||||||
|
let x = std::mem::take(&mut out_bytes);
|
||||||
|
|
||||||
|
match sink.send(Arc::new(x)).await {
|
||||||
|
Ok(()) => {}
|
||||||
|
|
||||||
|
// Not an error, our receiver was dropped.
|
||||||
|
// Exit early if that happens!
|
||||||
|
Err(_) => return Ok(()),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
strip
|
||||||
|
.finish()
|
||||||
|
.map_err(|e| RunNodeError::Other(Arc::new(e)))?;
|
||||||
|
|
||||||
|
while strip.has_data() {
|
||||||
|
strip
|
||||||
|
.read_data(&mut out_bytes)
|
||||||
|
.map_err(|e| RunNodeError::Other(Arc::new(e)))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
match sink.send(Arc::new(out_bytes)).await {
|
||||||
|
Ok(()) => {}
|
||||||
|
|
||||||
|
// Not an error, our receiver was dropped.
|
||||||
|
// Exit early if that happens!
|
||||||
|
Err(_) => return Ok(()),
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,7 +8,7 @@ name = "dataset"
|
|||||||
#
|
#
|
||||||
# working_dir = ".pile"
|
# working_dir = ".pile"
|
||||||
|
|
||||||
# Data sources avaliable in this dataset
|
# Data sources available in this dataset
|
||||||
source."music" = { type = "flac", path = ["music", "music-2"] }
|
source."music" = { type = "flac", path = ["music", "music-2"] }
|
||||||
|
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ source."music" = { type = "flac", path = ["music", "music-2"] }
|
|||||||
#
|
#
|
||||||
# "field-name" = {
|
# "field-name" = {
|
||||||
# # The type of data this field contains.
|
# # The type of data this field contains.
|
||||||
# # only text is supportedin this verison.
|
# # only text is supported in this version.
|
||||||
# type = "text",
|
# type = "text",
|
||||||
#
|
#
|
||||||
# # An array of jsonpaths (rfc9535) used to extract this field from each source entry.
|
# # An array of jsonpaths (rfc9535) used to extract this field from each source entry.
|
||||||
|
|||||||
@@ -10,8 +10,9 @@ pub use misc::*;
|
|||||||
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
pub static INIT_DB_TOML: &str = include_str!("./config.toml");
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
fn init_db_toml_valid() {
|
fn init_db_toml_valid() {
|
||||||
toml::from_str::<ConfigToml>(INIT_DB_TOML).unwrap();
|
toml::from_str::<ConfigToml>(INIT_DB_TOML).expect("INIT_DB_TOML should be valid TOML");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ impl<T: Debug + Clone> OneOrMany<T> {
|
|||||||
|
|
||||||
pub fn as_slice(&self) -> &[T] {
|
pub fn as_slice(&self) -> &[T] {
|
||||||
match self {
|
match self {
|
||||||
Self::One(x) => slice::from_ref(&x),
|
Self::One(x) => slice::from_ref(x),
|
||||||
Self::Many(x) => &x[..],
|
Self::Many(x) => &x[..],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ impl FieldSpecPost {
|
|||||||
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
||||||
|
|
||||||
Value::String(x) => {
|
Value::String(x) => {
|
||||||
Value::String(x.strip_suffix(trim_suffix).unwrap_or(&x).to_owned())
|
Value::String(x.strip_suffix(trim_suffix).unwrap_or(x).to_owned())
|
||||||
}
|
}
|
||||||
|
|
||||||
Value::Array(x) => {
|
Value::Array(x) => {
|
||||||
@@ -73,7 +73,7 @@ impl FieldSpecPost {
|
|||||||
x.iter()
|
x.iter()
|
||||||
.map(|x| {
|
.map(|x| {
|
||||||
(
|
(
|
||||||
x.0.strip_suffix(trim_suffix).unwrap_or(&x.0).to_owned(),
|
x.0.strip_suffix(trim_suffix).unwrap_or(x.0).to_owned(),
|
||||||
self.apply(x.1),
|
self.apply(x.1),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
@@ -88,7 +88,7 @@ impl FieldSpecPost {
|
|||||||
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
Value::Bool(_) | Value::Number(_) => Value::String(val.to_string()),
|
||||||
|
|
||||||
Value::String(x) => {
|
Value::String(x) => {
|
||||||
Value::String(x.strip_prefix(trim_prefix).unwrap_or(&x).to_owned())
|
Value::String(x.strip_prefix(trim_prefix).unwrap_or(x).to_owned())
|
||||||
}
|
}
|
||||||
|
|
||||||
Value::Array(x) => {
|
Value::Array(x) => {
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
pile-config = { workspace = true }
|
pile-config = { workspace = true }
|
||||||
pile-audio = { workspace = true }
|
pile-audio = { workspace = true }
|
||||||
|
pile-toolbox = { workspace = true }
|
||||||
|
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
itertools = { workspace = true }
|
itertools = { workspace = true }
|
||||||
@@ -18,3 +19,5 @@ tantivy = { workspace = true }
|
|||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
jsonpath-rust = { workspace = true }
|
jsonpath-rust = { workspace = true }
|
||||||
chrono = { workspace = true }
|
chrono = { workspace = true }
|
||||||
|
toml = { workspace = true }
|
||||||
|
thiserror = { workspace = true }
|
||||||
|
|||||||
229
crates/pile-dataset/src/dataset.rs
Normal file
229
crates/pile-dataset/src/dataset.rs
Normal file
@@ -0,0 +1,229 @@
|
|||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use pile_config::{ConfigToml, Source};
|
||||||
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
|
use std::{io::ErrorKind, path::PathBuf, sync::Arc};
|
||||||
|
use tantivy::{Executor, Index, IndexWriter, TantivyError, collector::TopDocs};
|
||||||
|
use thiserror::Error;
|
||||||
|
use tracing::{info, trace, warn};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
DataSource,
|
||||||
|
index::{DbFtsIndex, FtsLookupResult},
|
||||||
|
path_ts_earliest,
|
||||||
|
source::DirDataSource,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum DatasetError {
|
||||||
|
#[error("{0}")]
|
||||||
|
IoError(#[from] std::io::Error),
|
||||||
|
|
||||||
|
#[error("{0}")]
|
||||||
|
TantivyError(#[from] TantivyError),
|
||||||
|
|
||||||
|
#[error("this dataset does not have an fts index")]
|
||||||
|
NoFtsIndex,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Dataset {
|
||||||
|
pub path_config: PathBuf,
|
||||||
|
pub path_parent: PathBuf,
|
||||||
|
pub path_workdir: PathBuf,
|
||||||
|
|
||||||
|
pub config: ConfigToml,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Dataset {
|
||||||
|
pub fn open(config: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
|
||||||
|
let path_config = config.into();
|
||||||
|
let path_parent = path_config
|
||||||
|
.parent()
|
||||||
|
.ok_or(std::io::Error::new(
|
||||||
|
ErrorKind::NotADirectory,
|
||||||
|
format!("Config file {} has no parent", path_config.display()),
|
||||||
|
))?
|
||||||
|
.to_owned();
|
||||||
|
|
||||||
|
let config = {
|
||||||
|
let config = std::fs::read_to_string(&path_config)?;
|
||||||
|
let config: Result<ConfigToml, _> = toml::from_str(&config);
|
||||||
|
|
||||||
|
match config {
|
||||||
|
Ok(config) => {
|
||||||
|
trace!(message = "Loaded config", ?config);
|
||||||
|
config
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(error) => {
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
ErrorKind::InvalidData,
|
||||||
|
format!("{} is invalid:\n{error}", path_config.display()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let path_workdir = config
|
||||||
|
.dataset
|
||||||
|
.working_dir
|
||||||
|
.clone()
|
||||||
|
.unwrap_or(path_parent.join(".pile"))
|
||||||
|
.join(config.dataset.name.as_str());
|
||||||
|
|
||||||
|
return Ok(Self {
|
||||||
|
path_config,
|
||||||
|
path_parent,
|
||||||
|
path_workdir,
|
||||||
|
config,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// MARK: fts
|
||||||
|
//
|
||||||
|
|
||||||
|
/// Refresh this dataset's fts index
|
||||||
|
pub fn fts_refresh(
|
||||||
|
&self,
|
||||||
|
flag: Option<CancelFlag>,
|
||||||
|
) -> Result<(), CancelableTaskError<DatasetError>> {
|
||||||
|
let fts_tmp_dir = self.path_workdir.join(".tmp-fts");
|
||||||
|
let fts_dir = self.path_workdir.join("fts");
|
||||||
|
|
||||||
|
if fts_tmp_dir.is_dir() {
|
||||||
|
warn!("Removing temporary index in {}", fts_dir.display());
|
||||||
|
std::fs::remove_dir_all(&fts_tmp_dir).map_err(DatasetError::from)?;
|
||||||
|
}
|
||||||
|
if fts_dir.is_dir() {
|
||||||
|
warn!("Removing existing index in {}", fts_dir.display());
|
||||||
|
std::fs::remove_dir_all(&fts_dir).map_err(DatasetError::from)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fs::create_dir_all(&fts_tmp_dir).map_err(DatasetError::from)?;
|
||||||
|
|
||||||
|
let mut sources = Vec::new();
|
||||||
|
for (name, source) in &self.config.dataset.source {
|
||||||
|
match source {
|
||||||
|
Source::Flac { path: dir } => {
|
||||||
|
let source = DirDataSource::new(name, dir.clone().to_vec());
|
||||||
|
sources.push(source);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let db_index = DbFtsIndex::new(&fts_tmp_dir, &self.config);
|
||||||
|
let mut index = Index::create_in_dir(&fts_tmp_dir, db_index.schema.clone())
|
||||||
|
.map_err(DatasetError::from)?;
|
||||||
|
index.set_executor(Executor::multi_thread(10, "build-fts").map_err(DatasetError::from)?);
|
||||||
|
let mut index_writer: IndexWriter = index.writer(15_000_000).map_err(DatasetError::from)?;
|
||||||
|
|
||||||
|
for s in sources {
|
||||||
|
info!("Processing source {:?}", s.name);
|
||||||
|
|
||||||
|
for i in s.iter() {
|
||||||
|
let (k, v) = i.map_err(DatasetError::from)?;
|
||||||
|
|
||||||
|
let doc = match db_index
|
||||||
|
.entry_to_document(&*v)
|
||||||
|
.map_err(DatasetError::from)?
|
||||||
|
{
|
||||||
|
Some(x) => x,
|
||||||
|
None => {
|
||||||
|
warn!("Skipping {k:?}, document is empty");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
index_writer.add_document(doc).map_err(DatasetError::from)?;
|
||||||
|
|
||||||
|
if let Some(flag) = flag.as_ref()
|
||||||
|
&& flag.is_cancelled()
|
||||||
|
{
|
||||||
|
return Err(CancelableTaskError::Cancelled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Committing index");
|
||||||
|
index_writer.commit().map_err(DatasetError::from)?;
|
||||||
|
std::fs::rename(&fts_tmp_dir, &fts_dir).map_err(DatasetError::from)?;
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fts_lookup(
|
||||||
|
&self,
|
||||||
|
query: &str,
|
||||||
|
top_n: usize,
|
||||||
|
) -> Result<Vec<FtsLookupResult>, DatasetError> {
|
||||||
|
let fts_dir = self.path_workdir.join("fts");
|
||||||
|
|
||||||
|
if !fts_dir.exists() {
|
||||||
|
return Err(DatasetError::NoFtsIndex);
|
||||||
|
}
|
||||||
|
if !fts_dir.is_dir() {
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
ErrorKind::NotADirectory,
|
||||||
|
format!("fts index {} is not a directory", fts_dir.display()),
|
||||||
|
)
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let db_index = DbFtsIndex::new(&fts_dir, &self.config);
|
||||||
|
let results = db_index.lookup(query, Arc::new(TopDocs::with_limit(top_n)))?;
|
||||||
|
return Ok(results);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Time at which fts was created
|
||||||
|
pub fn ts_fts(&self) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
||||||
|
let fts_dir = self.path_workdir.join("fts");
|
||||||
|
|
||||||
|
if !fts_dir.exists() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
if !fts_dir.is_dir() {
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
ErrorKind::NotADirectory,
|
||||||
|
format!("fts index {} is not a directory", fts_dir.display()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
return path_ts_earliest(&fts_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Time at which data was last modified
|
||||||
|
pub fn ts_data(&self) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
||||||
|
let mut ts: Option<DateTime<Utc>> = None;
|
||||||
|
|
||||||
|
for (label, source) in &self.config.dataset.source {
|
||||||
|
match source {
|
||||||
|
Source::Flac { path } => {
|
||||||
|
let s = DirDataSource::new(label, path.clone().to_vec());
|
||||||
|
match (ts, s.latest_change()?) {
|
||||||
|
(_, None) => continue,
|
||||||
|
(None, Some(new)) => ts = Some(new),
|
||||||
|
(Some(old), Some(new)) => ts = Some(old.max(new)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if we do not have an fts index,
|
||||||
|
/// or if our fts index is older than our data.
|
||||||
|
pub fn needs_fts(&self) -> Result<bool, std::io::Error> {
|
||||||
|
let ts_fts = self.ts_fts()?;
|
||||||
|
let ts_data = self.ts_data()?;
|
||||||
|
|
||||||
|
match (ts_fts, ts_data) {
|
||||||
|
(None, Some(_)) => return Ok(true),
|
||||||
|
(None, None) | (Some(_), None) => {
|
||||||
|
warn!("Could not determine data age");
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
(Some(ts_fts), Some(ts_data)) => return Ok(ts_data > ts_fts),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -27,8 +27,8 @@ pub struct DbFtsIndex {
|
|||||||
|
|
||||||
impl DbFtsIndex {
|
impl DbFtsIndex {
|
||||||
fn fts_cfg(&self) -> &DatasetFts {
|
fn fts_cfg(&self) -> &DatasetFts {
|
||||||
static DEFAULT: LazyLock<DatasetFts> = LazyLock::new(|| DatasetFts::default());
|
static DEFAULT: LazyLock<DatasetFts> = LazyLock::new(DatasetFts::default);
|
||||||
&self.cfg.fts.as_ref().unwrap_or(&DEFAULT)
|
self.cfg.fts.as_ref().unwrap_or(&DEFAULT)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -47,9 +47,9 @@ impl DbFtsIndex {
|
|||||||
let fields = &cfg.fts.as_ref().unwrap_or(&default).fields;
|
let fields = &cfg.fts.as_ref().unwrap_or(&default).fields;
|
||||||
for (name, field) in fields {
|
for (name, field) in fields {
|
||||||
if field.tokenize {
|
if field.tokenize {
|
||||||
schema_builder.add_text_field(&name, schema::TEXT);
|
schema_builder.add_text_field(name, schema::TEXT);
|
||||||
} else {
|
} else {
|
||||||
schema_builder.add_text_field(&name, schema::STRING);
|
schema_builder.add_text_field(name, schema::STRING);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -70,19 +70,23 @@ impl DbFtsIndex {
|
|||||||
) -> Result<Option<TantivyDocument>, TantivyError> {
|
) -> Result<Option<TantivyDocument>, TantivyError> {
|
||||||
let mut doc = TantivyDocument::default();
|
let mut doc = TantivyDocument::default();
|
||||||
|
|
||||||
{
|
let key = match item.key().to_string() {
|
||||||
let f_ptr = self.schema.get_field("_meta_source")?;
|
Some(x) => x,
|
||||||
doc.add_text(f_ptr, item.source_name());
|
None => {
|
||||||
|
warn!(
|
||||||
|
message = "Item key cannot be converted to a string, skipping",
|
||||||
|
key = ?item.key(),
|
||||||
|
);
|
||||||
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
{
|
doc.add_text(self.schema.get_field("_meta_source")?, item.source_name());
|
||||||
let f_ptr = self.schema.get_field("_meta_key")?;
|
doc.add_text(self.schema.get_field("_meta_key")?, key);
|
||||||
doc.add_text(f_ptr, item.key().to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
let json = item.json().unwrap();
|
let json = item.json()?;
|
||||||
let mut empty = true;
|
let mut empty = true;
|
||||||
for (name, _field) in &self.fts_cfg().fields {
|
for name in self.fts_cfg().fields.keys() {
|
||||||
let val = match self.get_field(&json, name)? {
|
let val = match self.get_field(&json, name)? {
|
||||||
Some(x) => x,
|
Some(x) => x,
|
||||||
None => continue,
|
None => continue,
|
||||||
@@ -118,7 +122,7 @@ impl DbFtsIndex {
|
|||||||
|
|
||||||
// Try paths in order, using the first value we find
|
// Try paths in order, using the first value we find
|
||||||
'outer: for path in field.path.as_slice() {
|
'outer: for path in field.path.as_slice() {
|
||||||
let val = match json.query(&path) {
|
let val = match json.query(path) {
|
||||||
Ok(mut x) => {
|
Ok(mut x) => {
|
||||||
if x.len() > 1 {
|
if x.len() > 1 {
|
||||||
warn!(
|
warn!(
|
||||||
@@ -168,6 +172,7 @@ impl DbFtsIndex {
|
|||||||
|
|
||||||
loop {
|
loop {
|
||||||
val = match val {
|
val = match val {
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
Value::Array(ref mut x) => {
|
Value::Array(ref mut x) => {
|
||||||
if x.len() == 1 {
|
if x.len() == 1 {
|
||||||
x.pop().unwrap()
|
x.pop().unwrap()
|
||||||
|
|||||||
@@ -6,13 +6,14 @@ use std::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
use pile_audio::flac::blockread::{FlacBlock, FlacBlockReader, FlacBlockSelector};
|
use pile_audio::flac::blockread::{FlacBlock, FlacBlockReader, FlacBlockSelector};
|
||||||
|
use pile_config::Label;
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
use crate::Item;
|
use crate::Item;
|
||||||
|
|
||||||
pub struct FlacItem {
|
pub struct FlacItem {
|
||||||
pub(crate) path: PathBuf,
|
pub(crate) path: PathBuf,
|
||||||
pub(crate) source_name: String,
|
pub(crate) source_name: Label,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for FlacItem {
|
impl Debug for FlacItem {
|
||||||
@@ -47,8 +48,10 @@ impl Item for FlacItem {
|
|||||||
let mut data = Vec::new();
|
let mut data = Vec::new();
|
||||||
file.read_to_end(&mut data)?;
|
file.read_to_end(&mut data)?;
|
||||||
|
|
||||||
block_reader.push_data(&data).unwrap();
|
block_reader
|
||||||
block_reader.finish().unwrap();
|
.push_data(&data)
|
||||||
|
.map_err(std::io::Error::other)?;
|
||||||
|
block_reader.finish().map_err(std::io::Error::other)?;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Return tags
|
// Return tags
|
||||||
@@ -56,9 +59,8 @@ impl Item for FlacItem {
|
|||||||
|
|
||||||
let mut output = Map::new();
|
let mut output = Map::new();
|
||||||
|
|
||||||
while block_reader.has_block() {
|
while let Some(block) = block_reader.pop_block() {
|
||||||
let b = block_reader.pop_block().unwrap();
|
match block {
|
||||||
match b {
|
|
||||||
FlacBlock::VorbisComment(comment) => {
|
FlacBlock::VorbisComment(comment) => {
|
||||||
for (k, v) in comment.comment.comments {
|
for (k, v) in comment.comment.comments {
|
||||||
let k = k.to_string();
|
let k = k.to_string();
|
||||||
@@ -67,9 +69,11 @@ impl Item for FlacItem {
|
|||||||
|
|
||||||
match e {
|
match e {
|
||||||
None => {
|
None => {
|
||||||
output.insert(k.to_string(), Value::Array(vec![v]));
|
output.insert(k.clone(), Value::Array(vec![v]));
|
||||||
}
|
}
|
||||||
Some(e) => {
|
Some(e) => {
|
||||||
|
// We always insert an array
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
e.as_array_mut().unwrap().push(v);
|
e.as_array_mut().unwrap().push(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,9 @@ pub use traits::*;
|
|||||||
mod misc;
|
mod misc;
|
||||||
pub use misc::*;
|
pub use misc::*;
|
||||||
|
|
||||||
|
mod dataset;
|
||||||
|
pub use dataset::*;
|
||||||
|
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod item;
|
pub mod item;
|
||||||
pub mod source;
|
pub mod source;
|
||||||
|
|||||||
@@ -2,41 +2,65 @@ use chrono::{DateTime, Utc};
|
|||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
/// Returns the age of a path as a chrono DateTime.
|
/// Returns the age of a path as a [DateTime].
|
||||||
/// - If the path doesn't exist, returns None
|
/// - If the path doesn't exist, returns [None]
|
||||||
/// - If it's a file, returns the modified time
|
/// - If it's a file, returns the modified time
|
||||||
/// - If it's a directory, returns the LATEST modified time of all files within
|
/// - If it's a directory, returns the LATEST modified time of all files within
|
||||||
pub fn path_age(path: impl AsRef<Path>) -> Option<DateTime<Utc>> {
|
pub fn path_ts_latest(path: impl AsRef<Path>) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
|
|
||||||
// Check if path exists
|
|
||||||
if !path.exists() {
|
if !path.exists() {
|
||||||
return None;
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
let metadata = fs::metadata(path).ok()?;
|
let metadata = fs::metadata(path)?;
|
||||||
|
|
||||||
if metadata.is_file() {
|
if metadata.is_file() {
|
||||||
// For files, return the modified time
|
let modified = metadata.modified()?;
|
||||||
let modified = metadata.modified().ok()?;
|
Ok(Some(modified.into()))
|
||||||
Some(modified.into())
|
|
||||||
} else if metadata.is_dir() {
|
} else if metadata.is_dir() {
|
||||||
// For directories, find the latest modified time of all files
|
|
||||||
find_latest_modified(path)
|
find_latest_modified(path)
|
||||||
} else {
|
} else {
|
||||||
None
|
Ok(None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_latest_modified(dir: &Path) -> Option<DateTime<Utc>> {
|
/// Returns the age of a path as a [DateTime].
|
||||||
|
/// - If the path doesn't exist, returns [None]
|
||||||
|
/// - If it's a file, returns the modified time
|
||||||
|
/// - If it's a directory, returns the EARLIEST modified time of all files within
|
||||||
|
pub fn path_ts_earliest(path: impl AsRef<Path>) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
||||||
|
let path = path.as_ref();
|
||||||
|
if !path.exists() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let metadata = fs::metadata(path)?;
|
||||||
|
|
||||||
|
if metadata.is_file() {
|
||||||
|
let modified = metadata.modified()?;
|
||||||
|
Ok(Some(modified.into()))
|
||||||
|
} else if metadata.is_dir() {
|
||||||
|
find_earliest_modified(path)
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_latest_modified(dir: &Path) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
||||||
let mut latest: Option<DateTime<Utc>> = None;
|
let mut latest: Option<DateTime<Utc>> = None;
|
||||||
|
|
||||||
// Read directory entries
|
// Include the directory's own modification time
|
||||||
let entries = fs::read_dir(dir).ok()?;
|
let dir_metadata = fs::metadata(dir)?;
|
||||||
|
if let Ok(modified) = dir_metadata.modified() {
|
||||||
|
let dt: DateTime<Utc> = modified.into();
|
||||||
|
latest = Some(dt);
|
||||||
|
}
|
||||||
|
|
||||||
|
let entries = fs::read_dir(dir)?;
|
||||||
|
|
||||||
for entry in entries.flatten() {
|
for entry in entries.flatten() {
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
let metadata = entry.metadata().ok()?;
|
let metadata = entry.metadata()?;
|
||||||
|
|
||||||
if metadata.is_file() {
|
if metadata.is_file() {
|
||||||
if let Ok(modified) = metadata.modified() {
|
if let Ok(modified) = metadata.modified() {
|
||||||
@@ -46,16 +70,50 @@ fn find_latest_modified(dir: &Path) -> Option<DateTime<Utc>> {
|
|||||||
_ => dt,
|
_ => dt,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else if metadata.is_dir() {
|
} else if metadata.is_dir()
|
||||||
// Recursively check subdirectories
|
&& let Some(dir_latest) = find_latest_modified(&path)? {
|
||||||
if let Some(dir_latest) = find_latest_modified(&path) {
|
|
||||||
latest = Some(match latest {
|
latest = Some(match latest {
|
||||||
Some(prev) if prev > dir_latest => prev,
|
Some(prev) if prev > dir_latest => prev,
|
||||||
_ => dir_latest,
|
_ => dir_latest,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return Ok(latest);
|
||||||
}
|
}
|
||||||
|
|
||||||
latest
|
fn find_earliest_modified(dir: &Path) -> Result<Option<DateTime<Utc>>, std::io::Error> {
|
||||||
|
let mut earliest: Option<DateTime<Utc>> = None;
|
||||||
|
|
||||||
|
// Include the directory's own modification time
|
||||||
|
let dir_metadata = fs::metadata(dir)?;
|
||||||
|
if let Ok(modified) = dir_metadata.modified() {
|
||||||
|
let dt: DateTime<Utc> = modified.into();
|
||||||
|
earliest = Some(dt);
|
||||||
|
}
|
||||||
|
|
||||||
|
let entries = fs::read_dir(dir)?;
|
||||||
|
|
||||||
|
for entry in entries.flatten() {
|
||||||
|
let path = entry.path();
|
||||||
|
let metadata = entry.metadata()?;
|
||||||
|
|
||||||
|
if metadata.is_file() {
|
||||||
|
if let Ok(modified) = metadata.modified() {
|
||||||
|
let dt: DateTime<Utc> = modified.into();
|
||||||
|
earliest = Some(match earliest {
|
||||||
|
Some(prev) if prev < dt => prev,
|
||||||
|
_ => dt,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if metadata.is_dir()
|
||||||
|
&& let Some(dir_earliest) = find_earliest_modified(&path)? {
|
||||||
|
earliest = Some(match earliest {
|
||||||
|
Some(prev) if prev < dir_earliest => prev,
|
||||||
|
_ => dir_earliest,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(earliest);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,19 +1,21 @@
|
|||||||
|
use chrono::{DateTime, Utc};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use std::{io::ErrorKind, path::PathBuf};
|
use pile_config::Label;
|
||||||
|
use std::path::PathBuf;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
use crate::{DataSource, Item, item::FlacItem};
|
use crate::{DataSource, Item, item::FlacItem, path_ts_latest};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct DirDataSource {
|
pub struct DirDataSource {
|
||||||
pub name: String,
|
pub name: Label,
|
||||||
pub dirs: Vec<PathBuf>,
|
pub dirs: Vec<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DirDataSource {
|
impl DirDataSource {
|
||||||
pub fn new(name: impl Into<String>, dirs: Vec<PathBuf>) -> Self {
|
pub fn new(name: &Label, dirs: Vec<PathBuf>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
name: name.into(),
|
name: name.clone(),
|
||||||
dirs,
|
dirs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -40,16 +42,12 @@ impl DataSource for DirDataSource {
|
|||||||
return self
|
return self
|
||||||
.dirs
|
.dirs
|
||||||
.iter()
|
.iter()
|
||||||
.map(|x| WalkDir::new(x).into_iter().map_ok(move |d| (x, d)))
|
.flat_map(|x| WalkDir::new(x).into_iter().map_ok(move |d| (x, d)))
|
||||||
.flatten()
|
.filter_ok(|(_, entry)| !entry.file_type().is_dir())
|
||||||
.into_iter()
|
|
||||||
.filter_ok(|(_, entry)| entry.file_type().is_file())
|
|
||||||
.map(|x| match x {
|
.map(|x| match x {
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
let msg = format!("other walkdir error: {err:?}");
|
let msg = format!("other walkdir error: {err:?}");
|
||||||
Err(err
|
Err(err.into_io_error().unwrap_or(std::io::Error::other(msg)))
|
||||||
.into_io_error()
|
|
||||||
.unwrap_or(std::io::Error::new(ErrorKind::Other, msg)))
|
|
||||||
}
|
}
|
||||||
Ok((_, entry)) => {
|
Ok((_, entry)) => {
|
||||||
let path = entry.into_path();
|
let path = entry.into_path();
|
||||||
@@ -63,4 +61,23 @@ impl DataSource for DirDataSource {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn latest_change(&self) -> Result<Option<DateTime<Utc>>, Self::Error> {
|
||||||
|
let mut ts: Option<DateTime<Utc>> = None;
|
||||||
|
|
||||||
|
for path in &self.dirs {
|
||||||
|
if !path.exists() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let new = path_ts_latest(path)?;
|
||||||
|
match (ts, new) {
|
||||||
|
(_, None) => continue,
|
||||||
|
(None, Some(new)) => ts = Some(new),
|
||||||
|
(Some(old), Some(new)) => ts = Some(old.max(new)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(ts);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use chrono::{DateTime, Utc};
|
||||||
use std::{error::Error, fmt::Debug, path::PathBuf};
|
use std::{error::Error, fmt::Debug, path::PathBuf};
|
||||||
|
|
||||||
/// A read-only set of [Item]s.
|
/// A read-only set of [Item]s.
|
||||||
@@ -15,6 +16,9 @@ pub trait DataSource {
|
|||||||
fn iter(
|
fn iter(
|
||||||
&self,
|
&self,
|
||||||
) -> impl Iterator<Item = Result<(Self::Key, Box<dyn Item<Key = Self::Key>>), Self::Error>>;
|
) -> impl Iterator<Item = Result<(Self::Key, Box<dyn Item<Key = Self::Key>>), Self::Error>>;
|
||||||
|
|
||||||
|
/// Return the time of the latest change to the data in this source
|
||||||
|
fn latest_change(&self) -> Result<Option<DateTime<Utc>>, Self::Error>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Item: Debug + Send + Sync + 'static {
|
pub trait Item: Debug + Send + Sync + 'static {
|
||||||
@@ -34,7 +38,10 @@ pub trait Item: Debug + Send + Sync + 'static {
|
|||||||
//
|
//
|
||||||
|
|
||||||
pub trait Key: Debug + Clone + Send + Sync + 'static {
|
pub trait Key: Debug + Clone + Send + Sync + 'static {
|
||||||
fn to_string(&self) -> String;
|
/// Convert this key to a string, returning `None`
|
||||||
|
/// if we encounter any kind of error.
|
||||||
|
fn to_string(&self) -> Option<String>;
|
||||||
|
|
||||||
fn from_string(str: &str) -> Option<Self>;
|
fn from_string(str: &str) -> Option<Self>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,7 +50,7 @@ impl Key for PathBuf {
|
|||||||
str.parse().ok()
|
str.parse().ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn to_string(&self) -> String {
|
fn to_string(&self) -> Option<String> {
|
||||||
self.to_str().expect("path is not a string").to_owned()
|
self.to_str().map(|x| x.to_owned())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,4 +24,3 @@ tracing-indicatif = { workspace = true }
|
|||||||
signal-hook = { workspace = true }
|
signal-hook = { workspace = true }
|
||||||
anstyle = { workspace = true }
|
anstyle = { workspace = true }
|
||||||
toml = { workspace = true }
|
toml = { workspace = true }
|
||||||
tantivy = { workspace = true }
|
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
use anyhow::{Context, Result, anyhow};
|
use anyhow::{Context, Result, anyhow};
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use pile_config::ConfigToml;
|
use pile_config::ConfigToml;
|
||||||
|
use pile_dataset::Dataset;
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
use std::{fmt::Debug, path::PathBuf};
|
use std::{fmt::Debug, path::PathBuf};
|
||||||
use tracing::{debug, error, info};
|
use tracing::{debug, error, info, warn};
|
||||||
|
|
||||||
use crate::{CliCmd, GlobalContext};
|
use crate::{CliCmd, GlobalContext};
|
||||||
|
|
||||||
@@ -34,7 +35,6 @@ impl CliCmd for CheckCommand {
|
|||||||
Ok(config) => {
|
Ok(config) => {
|
||||||
info!("Config in {} is valid", self.config.display());
|
info!("Config in {} is valid", self.config.display());
|
||||||
debug!("{config:#?}");
|
debug!("{config:#?}");
|
||||||
return Ok(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
@@ -42,5 +42,29 @@ impl CliCmd for CheckCommand {
|
|||||||
return Ok(1);
|
return Ok(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let ds = Dataset::open(&self.config)
|
||||||
|
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
|
||||||
|
|
||||||
|
let ts_fts = ds.ts_fts().context("while determining fts age")?;
|
||||||
|
let ts_data = ds.ts_data().context("while determining data age")?;
|
||||||
|
|
||||||
|
match (ts_fts, ts_data) {
|
||||||
|
(None, Some(_)) => warn!("Could not determine fts age"),
|
||||||
|
(None, None) | (Some(_), None) => warn!("Could not determine data age"),
|
||||||
|
(Some(ts_fts), Some(ts_data)) => {
|
||||||
|
let delta_secs = ((ts_data - ts_fts).as_seconds_f64() * 100.0).round() / 100.0;
|
||||||
|
if ts_data > ts_fts {
|
||||||
|
info!(
|
||||||
|
message = "FTS index is out-of-date and should be regenerated",
|
||||||
|
?delta_secs,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
debug!(message = "FTS index is up-to-date", ?delta_secs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,8 @@
|
|||||||
use anyhow::{Context, Result, anyhow};
|
use anyhow::{Context, Result};
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use pile_config::{ConfigToml, Source};
|
use pile_dataset::Dataset;
|
||||||
use pile_dataset::{DataSource, index::DbFtsIndex, source::DirDataSource};
|
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
use std::{fmt::Debug, path::PathBuf};
|
use std::{fmt::Debug, path::PathBuf};
|
||||||
use tantivy::{Executor, Index, IndexWriter};
|
|
||||||
use tracing::{error, info, trace, warn};
|
|
||||||
|
|
||||||
use crate::{CliCmd, GlobalContext};
|
use crate::{CliCmd, GlobalContext};
|
||||||
|
|
||||||
@@ -22,88 +19,17 @@ impl CliCmd for IndexCommand {
|
|||||||
_ctx: GlobalContext,
|
_ctx: GlobalContext,
|
||||||
flag: CancelFlag,
|
flag: CancelFlag,
|
||||||
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
|
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
|
||||||
let parent = self
|
let ds = Dataset::open(&self.config)
|
||||||
.config
|
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
|
||||||
.parent()
|
|
||||||
.with_context(|| format!("Config file {} has no parent", self.config.display()))?;
|
|
||||||
|
|
||||||
if !self.config.exists() {
|
ds.fts_refresh(Some(flag)).map_err(|x| {
|
||||||
return Err(anyhow!("{} does not exist", self.config.display()).into());
|
x.map_err(|x| {
|
||||||
}
|
anyhow::Error::from(x).context(format!(
|
||||||
|
"while opening dataset for {}",
|
||||||
let config = {
|
self.config.display()
|
||||||
let config = std::fs::read_to_string(&self.config)
|
))
|
||||||
.with_context(|| format!("while reading {}", self.config.display()))?;
|
})
|
||||||
let config: Result<ConfigToml, _> = toml::from_str(&config);
|
})?;
|
||||||
|
|
||||||
match config {
|
|
||||||
Ok(config) => {
|
|
||||||
trace!(message = "Loaded config", ?config);
|
|
||||||
config
|
|
||||||
}
|
|
||||||
|
|
||||||
Err(error) => {
|
|
||||||
error!("{} is invalid:\n{error}", self.config.display());
|
|
||||||
return Ok(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let working_dir = config
|
|
||||||
.dataset
|
|
||||||
.working_dir
|
|
||||||
.clone()
|
|
||||||
.unwrap_or(parent.join(".pile"))
|
|
||||||
.join(&config.dataset.name.as_str());
|
|
||||||
let fts_dir = working_dir.join("fts");
|
|
||||||
|
|
||||||
if fts_dir.is_dir() {
|
|
||||||
warn!("Removing existing index in {}", fts_dir.display());
|
|
||||||
std::fs::remove_dir_all(&fts_dir)
|
|
||||||
.with_context(|| format!("while removing {}", fts_dir.display()))?;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::fs::create_dir_all(&fts_dir)
|
|
||||||
.with_context(|| format!("while creating dir {}", fts_dir.display()))?;
|
|
||||||
|
|
||||||
let mut sources = Vec::new();
|
|
||||||
for (name, source) in &config.dataset.source {
|
|
||||||
match source {
|
|
||||||
Source::Flac { path: dir } => {
|
|
||||||
let source = DirDataSource::new(name.as_str(), dir.clone().to_vec());
|
|
||||||
sources.push(source);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let db_index = DbFtsIndex::new(&fts_dir, &config);
|
|
||||||
let mut index = Index::create_in_dir(&fts_dir, db_index.schema.clone()).unwrap();
|
|
||||||
index.set_executor(Executor::multi_thread(10, "build-fts").unwrap());
|
|
||||||
let mut index_writer: IndexWriter = index.writer(15_000_000).unwrap();
|
|
||||||
|
|
||||||
for s in sources {
|
|
||||||
info!("Processing source {:?}", s.name);
|
|
||||||
|
|
||||||
for i in s.iter() {
|
|
||||||
let (k, v) = i.unwrap();
|
|
||||||
|
|
||||||
let doc = match db_index.entry_to_document(&*v).unwrap() {
|
|
||||||
Some(x) => x,
|
|
||||||
None => {
|
|
||||||
warn!("Skipping {k:?}, document is empty");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
index_writer.add_document(doc).unwrap();
|
|
||||||
|
|
||||||
if flag.is_cancelled() {
|
|
||||||
return Err(CancelableTaskError::Cancelled);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("Committing index");
|
|
||||||
index_writer.commit().unwrap();
|
|
||||||
|
|
||||||
return Ok(0);
|
return Ok(0);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
use anyhow::{Context, Result, anyhow};
|
use anyhow::{Context, Result};
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use pile_config::ConfigToml;
|
use pile_dataset::Dataset;
|
||||||
use pile_dataset::index::DbFtsIndex;
|
|
||||||
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError};
|
||||||
use std::{fmt::Debug, path::PathBuf, sync::Arc};
|
use std::{fmt::Debug, path::PathBuf};
|
||||||
use tantivy::collector::TopDocs;
|
use tracing::info;
|
||||||
use tracing::{error, trace};
|
|
||||||
|
|
||||||
use crate::{CliCmd, GlobalContext};
|
use crate::{CliCmd, GlobalContext};
|
||||||
|
|
||||||
@@ -24,57 +22,37 @@ pub struct LookupCommand {
|
|||||||
/// Path to dataset config
|
/// Path to dataset config
|
||||||
#[arg(long, short = 'c', default_value = "./pile.toml")]
|
#[arg(long, short = 'c', default_value = "./pile.toml")]
|
||||||
config: PathBuf,
|
config: PathBuf,
|
||||||
|
|
||||||
|
/// If provided, do not refresh fts
|
||||||
|
#[arg(long)]
|
||||||
|
no_refresh: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CliCmd for LookupCommand {
|
impl CliCmd for LookupCommand {
|
||||||
|
#[expect(clippy::print_stdout)]
|
||||||
async fn run(
|
async fn run(
|
||||||
self,
|
self,
|
||||||
_ctx: GlobalContext,
|
_ctx: GlobalContext,
|
||||||
_flag: CancelFlag,
|
flag: CancelFlag,
|
||||||
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
|
) -> Result<i32, CancelableTaskError<anyhow::Error>> {
|
||||||
let parent = self
|
let ds = Dataset::open(&self.config)
|
||||||
.config
|
.with_context(|| format!("while opening dataset for {}", self.config.display()))?;
|
||||||
.parent()
|
|
||||||
.with_context(|| format!("Config file {} has no parent", self.config.display()))?;
|
|
||||||
|
|
||||||
if !self.config.exists() {
|
if ds.needs_fts().context("while checking dataset fts")? {
|
||||||
return Err(anyhow!("{} does not exist", self.config.display()).into());
|
info!("FTS index is missing or out-of-date, regenerating");
|
||||||
|
ds.fts_refresh(Some(flag)).map_err(|x| {
|
||||||
|
x.map_err(|x| {
|
||||||
|
anyhow::Error::from(x).context(format!(
|
||||||
|
"while opening dataset for {}",
|
||||||
|
self.config.display()
|
||||||
|
))
|
||||||
|
})
|
||||||
|
})?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let config = {
|
let results = ds
|
||||||
let config = std::fs::read_to_string(&self.config)
|
.fts_lookup(&self.query, self.topn)
|
||||||
.with_context(|| format!("while reading {}", self.config.display()))?;
|
.context("while running fts lookup")?;
|
||||||
let config: Result<ConfigToml, _> = toml::from_str(&config);
|
|
||||||
|
|
||||||
match config {
|
|
||||||
Ok(config) => {
|
|
||||||
trace!(message = "Loaded config", ?config);
|
|
||||||
config
|
|
||||||
}
|
|
||||||
|
|
||||||
Err(error) => {
|
|
||||||
error!("{} is invalid:\n{error}", self.config.display());
|
|
||||||
return Ok(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let working_dir = config
|
|
||||||
.dataset
|
|
||||||
.working_dir
|
|
||||||
.clone()
|
|
||||||
.unwrap_or(parent.join(".pile"))
|
|
||||||
.join(&config.dataset.name.as_str());
|
|
||||||
let fts_dir = working_dir.join("fts");
|
|
||||||
|
|
||||||
if !fts_dir.is_dir() {
|
|
||||||
return Err(anyhow!("fts index does not exist").into());
|
|
||||||
}
|
|
||||||
|
|
||||||
let db_index = DbFtsIndex::new(&fts_dir, &config);
|
|
||||||
let results = db_index
|
|
||||||
.lookup(self.query, Arc::new(TopDocs::with_limit(self.topn)))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
if self.score {
|
if self.score {
|
||||||
for res in results {
|
for res in results {
|
||||||
|
|||||||
Reference in New Issue
Block a user