diff --git a/Cargo.lock b/Cargo.lock index 2306592..2623661 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "adler32" version = "1.2.0" @@ -131,18 +137,375 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "aws-credential-types" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-lc-rs" +version = "1.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "aws-runtime" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-s3" +version = "1.124.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744c09d75dfec039a05cf8e117c995ded3b0baffa6eb83f3ed7075a01d8d8947" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "http-body 1.0.1", + "lru 0.16.3", + "percent-encoding", + "regex-lite", + "sha2 0.10.9", + "tracing", + "url", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" +dependencies = [ + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "crypto-bigint 0.5.5", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "p256", + "percent-encoding", + "ring", + "sha2 0.10.9", + "subtle", + "time", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-checksums" +version = "0.64.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180dddf5ef0f52a2f99e2fada10e16ea610e507ef6148a42bdc4d5867596aa00" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "md-5", + "pin-project-lite", + "sha1", + "sha2 0.10.9", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c0b3e587fbaa5d7f7e870544508af8ce82ea47cd30376e69e1e37c4ac746f79" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.13", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.8.1", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.37", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27b3a779093e18cad88bbae08dc4261e1d95018c4c5b9356a52bcae7c0b6e9bb" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bitflags" version = "2.11.0" @@ -242,6 +605,16 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "cbc" version = "0.1.2" @@ -338,6 +711,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +[[package]] +name = "cmake" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -357,6 +739,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "const-oid" version = "0.10.2" @@ -369,6 +757,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -393,6 +791,33 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc-fast" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd92aca2c6001b1bf5ba0ff84ee74ec8501b52bbef0cac80bf25a6c1d87a83d" +dependencies = [ + "crc", + "digest 0.10.7", + "rustversion", + "spin", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -442,6 +867,28 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core", + "subtle", +] + [[package]] name = "crypto-common" version = "0.1.7" @@ -530,6 +977,16 @@ dependencies = [ "adler32", ] +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid 0.9.6", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.7" @@ -548,6 +1005,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.4", "crypto-common 0.1.7", + "subtle", ] [[package]] @@ -557,22 +1015,71 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8bf3682cdec91817be507e4aa104314898b95b84d74f3d43882210101a545b6" dependencies = [ "block-buffer 0.11.0", - "const-oid", + "const-oid 0.10.2", "crypto-common 0.2.0", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "downcast-rs" version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der", + "elliptic-curve", + "rfc6979", + "signature", +] + [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der", + "digest 0.10.7", + "ff", + "generic-array", + "group", + "pkcs8", + "rand_core", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "encode_unicode" version = "1.0.0" @@ -627,12 +1134,32 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core", + "subtle", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -651,6 +1178,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + [[package]] name = "fs4" version = "0.13.1" @@ -661,6 +1197,21 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", +] + [[package]] name = "futures-core" version = "0.3.32" @@ -678,6 +1229,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + [[package]] name = "futures-task" version = "0.3.32" @@ -755,6 +1312,55 @@ dependencies = [ "web-time", ] +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -783,12 +1389,94 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.7", +] + [[package]] name = "htmlescape" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "hybrid-array" version = "0.4.7" @@ -798,6 +1486,107 @@ dependencies = [ "typenum", ] +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.4.0", + "hyper 1.8.1", + "hyper-util", + "rustls 0.23.37", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.2", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "hyperloglogplus" version = "0.4.1" @@ -831,18 +1620,131 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + [[package]] name = "id-arena" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "id3" +version = "1.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "965c5e6a62a241f2f673df956ea5f52c27780bc1031855890a551ed9b869e2d1" +dependencies = [ + "bitflags", + "byteorder", + "flate2", +] + [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.13.0" @@ -880,6 +1782,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1005,6 +1913,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + [[package]] name = "lock_api" version = "0.4.14" @@ -1029,6 +1943,15 @@ dependencies = [ "hashbrown 0.15.5", ] +[[package]] +name = "lru" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" +dependencies = [ + "hashbrown 0.16.1", +] + [[package]] name = "lz4_flex" version = "0.11.5" @@ -1044,6 +1967,16 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest 0.10.7", +] + [[package]] name = "md5" version = "0.7.0" @@ -1086,6 +2019,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" version = "1.1.1" @@ -1128,6 +2071,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1156,6 +2108,18 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "ownedbytes" version = "0.9.0" @@ -1165,6 +2129,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2 0.10.9", +] + [[package]] name = "parking_lot" version = "0.12.5" @@ -1233,6 +2208,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + [[package]] name = "pile" version = "0.0.1" @@ -1248,6 +2229,7 @@ dependencies = [ "serde_json", "signal-hook", "tokio", + "tokio-stream", "toml", "tracing", "tracing-indicatif", @@ -1268,20 +2250,23 @@ dependencies = [ name = "pile-dataset" version = "0.0.1" dependencies = [ + "async-trait", + "aws-sdk-s3", "blake3", "chrono", + "id3", "itertools 0.14.0", "pdf", "pile-config", "pile-flac", "pile-toolbox", - "rayon", "serde_json", "smartstring", "tantivy", "thiserror", + "tokio", + "tokio-stream", "toml", - "toml_edit", "tracing", "walkdir", ] @@ -1314,6 +2299,22 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -1326,6 +2327,15 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -1467,12 +2477,43 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rle-decode-fast" version = "1.0.3" @@ -1495,6 +2536,15 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "1.1.3" @@ -1508,12 +2558,87 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "aws-lc-rs", + "once_cell", + "rustls-pki-types", + "rustls-webpki 0.103.9", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "same-file" version = "1.0.6" @@ -1523,12 +2648,68 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.27" @@ -1587,6 +2768,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest 0.10.7", +] + [[package]] name = "sha2" version = "0.10.9" @@ -1644,6 +2836,22 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest 0.10.7", + "rand_core", +] + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "sketches-ddsketch" version = "0.3.0" @@ -1697,6 +2905,16 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.2" @@ -1707,6 +2925,22 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" + +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -1757,6 +2991,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "1.0.109" @@ -1779,6 +3019,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "tantivy" version = "0.25.0" @@ -1803,7 +3054,7 @@ dependencies = [ "itertools 0.14.0", "levenshtein_automata", "log", - "lru", + "lru 0.12.5", "lz4_flex", "measure_time", "memmap2", @@ -1998,6 +3249,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.10.0" @@ -2025,7 +3286,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.2", "tokio-macros", "windows-sys 0.61.2", ] @@ -2041,6 +3302,50 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls 0.23.37", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "1.0.3+spec-1.1.0" @@ -2065,19 +3370,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "toml_edit" -version = "0.25.4+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7193cbd0ce53dc966037f54351dbbcf0d5a642c7f0038c382ef9e677ce8c13f2" -dependencies = [ - "indexmap", - "toml_datetime", - "toml_parser", - "toml_writer", - "winnow", -] - [[package]] name = "toml_parser" version = "1.0.9+spec-1.1.0" @@ -2093,6 +3385,28 @@ version = "1.0.6+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + [[package]] name = "tracing" version = "0.1.44" @@ -2179,6 +3493,12 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + [[package]] name = "tuple" version = "0.5.2" @@ -2246,12 +3566,36 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + [[package]] name = "utf8-ranges" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2282,6 +3626,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "vt100" version = "0.16.2" @@ -2313,6 +3663,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2522,6 +3881,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.59.0" @@ -2683,9 +4051,6 @@ name = "winnow" version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" -dependencies = [ - "memchr", -] [[package]] name = "wit-bindgen" @@ -2781,6 +4146,41 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.8.35" @@ -2801,6 +4201,66 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index 7f1ef14..d8b8d74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,10 @@ tantivy = "0.25.0" # Async & Parallelism tokio = { version = "1.49.0", features = ["full"] } +tokio-stream = "0.1" +async-trait = "0.1" +aws-sdk-s3 = "1" +aws-config = "1" # CLI & logging tracing = "0.1.44" diff --git a/crates/pile-config/src/config.toml b/crates/pile-config/src/config.toml index 689a6c1..36a94b5 100644 --- a/crates/pile-config/src/config.toml +++ b/crates/pile-config/src/config.toml @@ -9,8 +9,7 @@ name = "dataset" # working_dir = ".pile" # Data sources available in this dataset -source."music" = { type = "flac", path = ["music", "music-2"] } - +source."music" = { type = "filesystem", path = "music" } # This dataset's schema. # Defines normalized fields that are extracted from source entries on-demand. diff --git a/crates/pile-config/src/lib.rs b/crates/pile-config/src/lib.rs index 461a271..42991ac 100644 --- a/crates/pile-config/src/lib.rs +++ b/crates/pile-config/src/lib.rs @@ -46,16 +46,21 @@ pub struct DatasetConfig { pub post: Vec, } +#[derive(Debug, Clone, Deserialize)] +pub struct S3Credentials { + pub access_key_id: String, + pub secret_access_key: String, +} + #[derive(Debug, Clone, Deserialize)] #[serde(tag = "type")] #[serde(rename_all = "lowercase")] pub enum Source { - /// A directory files + /// A directory of files Filesystem { /// The directories to scan. /// Must be relative. - #[serde(alias = "paths")] - path: OneOrMany, + path: PathBuf, /// If true, all toml files are ignored. /// Metadata can be added to any file using a {filename}.toml. @@ -65,6 +70,23 @@ pub enum Source { #[serde(default = "default_true")] sidecars: bool, }, + + /// An S3-compatible object store bucket + S3 { + bucket: String, + prefix: Option, + + /// Custom endpoint URL (for MinIO, etc.) + endpoint: Option, + + region: String, + + credentials: S3Credentials, + + /// If true, all .toml objects are treated as sidecar metadata files. + #[serde(default = "default_true")] + sidecars: bool, + }, } // diff --git a/crates/pile-dataset/Cargo.toml b/crates/pile-dataset/Cargo.toml index b5d0520..f04c52d 100644 --- a/crates/pile-dataset/Cargo.toml +++ b/crates/pile-dataset/Cargo.toml @@ -20,9 +20,11 @@ tracing = { workspace = true } chrono = { workspace = true } toml = { workspace = true } thiserror = { workspace = true } -rayon = { workspace = true } smartstring = { workspace = true } blake3 = { workspace = true } -toml_edit = { workspace = true } pdf = { workspace = true } id3 = { workspace = true } +tokio = { workspace = true } +tokio-stream = { workspace = true } +async-trait = { workspace = true } +aws-sdk-s3 = { workspace = true } diff --git a/crates/pile-dataset/src/dataset.rs b/crates/pile-dataset/src/dataset.rs index c20823a..17c9337 100644 --- a/crates/pile-dataset/src/dataset.rs +++ b/crates/pile-dataset/src/dataset.rs @@ -1,30 +1,17 @@ use chrono::{DateTime, Utc}; use pile_config::{ConfigToml, Label, Source}; use pile_toolbox::cancelabletask::{CancelFlag, CancelableTaskError}; -use rayon::{ - ThreadPoolBuilder, - iter::{IntoParallelIterator, ParallelIterator}, -}; -use std::{ - io::ErrorKind, - path::PathBuf, - sync::{ - Arc, - atomic::{AtomicU64, Ordering}, - mpsc::Receiver, - }, - thread::JoinHandle, - time::Instant, -}; +use std::{collections::HashMap, io::ErrorKind, path::PathBuf, sync::Arc, time::Instant}; use tantivy::{Executor, Index, IndexWriter, TantivyError, collector::TopDocs}; use thiserror::Error; +use tokio_stream::{StreamExt, wrappers::ReceiverStream}; use tracing::{debug, info, trace, warn}; use crate::{ - DataSource, FileItem, + DataSource, Item, index::{DbFtsIndex, FtsLookupResult}, path_ts_earliest, - source::DirDataSource, + source::{DirDataSource, S3DataSource}, }; #[derive(Debug, Error)] @@ -39,15 +26,54 @@ pub enum DatasetError { NoFtsIndex, } -pub struct Dataset { +// +// MARK: Dataset enum +// + +/// An opened data source — either a local filesystem directory or an S3 bucket. +pub enum Dataset { + Dir(Arc), + S3(Arc), +} + +impl Dataset { + pub async fn get(&self, key: &str) -> Option { + match self { + Self::Dir(ds) => ds.get(key).await.ok().flatten(), + Self::S3(ds) => ds.get(key).await.ok().flatten(), + } + } + + pub fn iter(&self) -> ReceiverStream> { + match self { + Self::Dir(ds) => ds.iter(), + Self::S3(ds) => ds.iter(), + } + } + + pub async fn latest_change(&self) -> Result>, std::io::Error> { + match self { + Self::Dir(ds) => ds.latest_change().await, + Self::S3(ds) => ds.latest_change().await, + } + } +} + +// +// MARK: Datasets collection +// + +/// An opened dataset: config, working directory, and all opened sources. +pub struct Datasets { pub path_config: PathBuf, pub path_parent: PathBuf, pub path_workdir: PathBuf, pub config: ConfigToml, + pub sources: HashMap, } -impl Dataset { +impl Datasets { pub fn open(config: impl Into) -> Result { let path_config = config.into(); let path_parent = path_config @@ -84,11 +110,54 @@ impl Dataset { .unwrap_or(path_parent.join(".pile")) .join(config.dataset.name.as_str()); + let mut sources = HashMap::new(); + for (label, source) in &config.dataset.source { + match source { + Source::Filesystem { path, sidecars } => { + sources.insert( + label.clone(), + Dataset::Dir(Arc::new(DirDataSource::new( + label, + path_parent.join(path), + *sidecars, + ))), + ); + } + + Source::S3 { + bucket, + prefix, + endpoint, + region, + credentials, + sidecars, + } => { + match S3DataSource::new( + label, + bucket.clone(), + prefix.clone(), + endpoint.clone(), + region.clone(), + credentials, + *sidecars, + ) { + Ok(ds) => { + sources.insert(label.clone(), Dataset::S3(Arc::new(ds))); + } + Err(err) => { + warn!("Could not open S3 source {label}: {err}"); + } + } + } + } + } + return Ok(Self { path_config, path_parent, path_workdir, config, + sources, }); } @@ -96,15 +165,8 @@ impl Dataset { // MARK: get // - pub fn get(&self, source: &Label, key: &PathBuf) -> Option { - let s = self.config.dataset.source.get(source)?; - let s = match s { - Source::Filesystem { path, sidecars } => { - DirDataSource::new(source, path.clone().to_vec(), *sidecars) - } - }; - - s.get(key).ok().flatten() + pub async fn get(&self, source: &Label, key: &str) -> Option { + self.sources.get(source)?.get(key).await } // @@ -112,9 +174,9 @@ impl Dataset { // /// Refresh this dataset's fts index. - pub fn fts_refresh( + pub async fn fts_refresh( &self, - threads: usize, + _threads: usize, flag: Option, ) -> Result<(), CancelableTaskError> { let fts_tmp_dir = self.path_workdir.join(".tmp-fts"); @@ -134,58 +196,40 @@ impl Dataset { let mut index_writer: IndexWriter = index.writer(50 * 1024 * 1024).map_err(DatasetError::from)?; - let batch_size = 1000; - let (_read_task, read_rx) = start_read_task(&self.config, batch_size); - - #[expect(clippy::unwrap_used)] - let write_pool = ThreadPoolBuilder::new() - .num_threads(threads.max(1)) - .thread_name(|x| format!("fts_refresh_thread_{x}")) - .build() - .unwrap(); - let mut total = 0u64; - while let Ok(batch) = read_rx.recv() { - let batch = batch?; - if let Some(flag) = &flag - && flag.is_cancelled() - { - return Err(CancelableTaskError::Cancelled); + let mut logged_at = Instant::now(); + + for (name, dataset) in &self.sources { + info!("Loading source {name}"); + + let mut stream = dataset.iter(); + while let Some(item_result) = stream.next().await { + if let Some(flag) = &flag + && flag.is_cancelled() + { + return Err(CancelableTaskError::Cancelled); + } + + let item = item_result.map_err(DatasetError::from)?; + let key = item.key(); + + match db_index.entry_to_document(&item).await { + Ok(Some(doc)) => { + index_writer.add_document(doc).map_err(DatasetError::from)?; + total += 1; + if logged_at.elapsed().as_secs() >= 5 { + debug!("Indexed {total} documents so far"); + logged_at = Instant::now(); + } + } + Ok(None) => { + warn!("Skipping {key:?}, document is empty"); + } + Err(err) => { + warn!("Could not read {key:?}, skipping. {err}"); + } + } } - - let this = AtomicU64::new(0); - let start = Instant::now(); - write_pool - .install(|| { - batch - .into_par_iter() - .filter_map(|(key, item)| match db_index.entry_to_document(&item) { - Ok(Some(doc)) => Some((key, doc)), - Ok(None) => { - warn!("Skipping {key:?}, document is empty"); - None - } - Err(err) => { - warn!("Could not read {key:?}, skipping. {err}"); - None - } - }) - .map(|(key, doc)| { - this.fetch_add(1, Ordering::Relaxed); - index_writer - .add_document(doc) - .map_err(|err| (key, err)) - .map(|_| ()) - }) - .find_first(|x| x.is_err()) - .unwrap_or(Ok(())) - }) - .map_err(|(_key, err)| DatasetError::from(err))?; - - let this = this.load(Ordering::Relaxed); - total += this; - let time_ms = start.elapsed().as_millis(); - debug!("Added a batch of {this} in {time_ms} ms ({total} total)"); } if let Some(flag) = flag.as_ref() @@ -194,7 +238,7 @@ impl Dataset { return Err(CancelableTaskError::Cancelled); } - info!("Committing index"); + info!("Committing {total} documents"); index_writer.commit().map_err(DatasetError::from)?; if fts_dir.is_dir() { @@ -247,19 +291,14 @@ impl Dataset { } /// Time at which data was last modified - pub fn ts_data(&self) -> Result>, std::io::Error> { + pub async fn ts_data(&self) -> Result>, std::io::Error> { let mut ts: Option> = None; - for (label, source) in &self.config.dataset.source { - match source { - Source::Filesystem { path, sidecars } => { - let s = DirDataSource::new(label, path.clone().to_vec(), *sidecars); - match (ts, s.latest_change()?) { - (_, None) => continue, - (None, Some(new)) => ts = Some(new), - (Some(old), Some(new)) => ts = Some(old.max(new)), - }; - } + for dataset in self.sources.values() { + match (ts, dataset.latest_change().await?) { + (_, None) => continue, + (None, Some(new)) => ts = Some(new), + (Some(old), Some(new)) => ts = Some(old.max(new)), } } @@ -268,10 +307,10 @@ impl Dataset { /// Returns true if we do not have an fts index, /// or if our fts index is older than our data. - pub fn needs_fts(&self) -> Result { + pub async fn needs_fts(&self) -> Result { let start = Instant::now(); let ts_fts = self.ts_fts()?; - let ts_data = self.ts_data()?; + let ts_data = self.ts_data().await?; let result = match (ts_fts, ts_data) { (None, Some(_)) => true, @@ -292,59 +331,3 @@ impl Dataset { return Ok(result); } } - -// -// MARK: read_task -// - -fn start_read_task( - config: &ConfigToml, - batch_size: usize, -) -> ( - JoinHandle<()>, - Receiver, DatasetError>>, -) { - let config = config.clone(); - let (read_tx, read_rx) = std::sync::mpsc::sync_channel(2); - - let read_task = std::thread::spawn(move || { - let mut batch = Vec::with_capacity(batch_size); - for (name, source) in &config.dataset.source { - info!("Loading source {name}"); - - match source { - Source::Filesystem { path, sidecars } => { - let source = DirDataSource::new(name, path.clone().to_vec(), *sidecars); - for i in source.iter() { - match i { - Ok(x) => batch.push(x), - Err(err) => { - let err = Err(DatasetError::from(err)); - let _ = read_tx.send(err); - return; - } - } - - if batch.len() >= batch_size { - let b = std::mem::replace(&mut batch, Vec::with_capacity(batch_size)); - - match read_tx.send(Ok(b)) { - Ok(()) => {} - Err(_) => return, - }; - } - } - } - } - } - - if !batch.is_empty() { - match read_tx.send(Ok(batch)) { - Ok(()) => {} - Err(_) => return, - }; - } - }); - - return (read_task, read_rx); -} diff --git a/crates/pile-dataset/src/extract/flac.rs b/crates/pile-dataset/src/extract/flac.rs index 223ea48..47b0ef1 100644 --- a/crates/pile-dataset/src/extract/flac.rs +++ b/crates/pile-dataset/src/extract/flac.rs @@ -1,34 +1,40 @@ use pile_config::Label; use pile_flac::{FlacBlock, FlacReader}; -use std::{collections::HashMap, fs::File, io::BufReader, sync::OnceLock}; +use std::{collections::HashMap, io::BufReader, sync::OnceLock}; -use crate::{FileItem, PileValue, extract::Extractor}; +use crate::{Item, PileValue, extract::Extractor}; pub struct FlacExtractor<'a> { - item: &'a FileItem, - output: OnceLock>>, + item: &'a Item, + output: OnceLock>>, } impl<'a> FlacExtractor<'a> { - pub fn new(item: &'a FileItem) -> Self { + pub fn new(item: &'a Item) -> Self { Self { item, output: OnceLock::new(), } } - fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { + async fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { if let Some(x) = self.output.get() { return Ok(x); } - // If this isn't a flac file, ignore it. - if self.item.path.extension().map(|x| x.to_str()).flatten() != Some("flac") { - return Ok(self.output.get_or_init(|| HashMap::new())); + let key = match self.item { + Item::File { path, .. } => path.to_str().unwrap_or_default().to_owned(), + Item::S3 { key, .. } => key.to_string(), + }; + + if !key.ends_with(".flac") { + let _ = self.output.set(HashMap::new()); + #[expect(clippy::unwrap_used)] + return Ok(self.output.get().unwrap()); } - let file = File::open(&self.item.path)?; - let reader = FlacReader::new(BufReader::new(file)); + let bytes = self.item.read().await?.read_to_end().await?; + let reader = FlacReader::new(BufReader::new(std::io::Cursor::new(bytes))); let mut output: HashMap> = HashMap::new(); for block in reader { @@ -53,19 +59,22 @@ impl<'a> FlacExtractor<'a> { .map(|(k, v)| (k, PileValue::Array(v))) .collect(); - return Ok(self.output.get_or_init(|| output)); + let _ = self.output.set(output); + #[expect(clippy::unwrap_used)] + return Ok(self.output.get().unwrap()); } } -impl Extractor for FlacExtractor<'_> { - fn field<'a>( +#[async_trait::async_trait] +impl Extractor for FlacExtractor<'_> { + async fn field<'a>( &'a self, name: &Label, - ) -> Result>, std::io::Error> { - Ok(self.get_inner()?.get(name)) + ) -> Result>, std::io::Error> { + Ok(self.get_inner().await?.get(name)) } - fn fields(&self) -> Result, std::io::Error> { - Ok(self.get_inner()?.keys().cloned().collect()) + async fn fields(&self) -> Result, std::io::Error> { + Ok(self.get_inner().await?.keys().cloned().collect()) } } diff --git a/crates/pile-dataset/src/extract/fs.rs b/crates/pile-dataset/src/extract/fs.rs index b6fbca5..1c218e2 100644 --- a/crates/pile-dataset/src/extract/fs.rs +++ b/crates/pile-dataset/src/extract/fs.rs @@ -1,50 +1,48 @@ use pile_config::Label; use std::{collections::HashMap, path::Component, sync::OnceLock}; -use crate::{FileItem, Key, PileValue, extract::Extractor}; +use crate::{Item, PileValue, extract::Extractor}; pub struct FsExtractor<'a> { - item: &'a FileItem, - output: OnceLock>>, + item: &'a Item, + output: OnceLock>>, } impl<'a> FsExtractor<'a> { - pub fn new(item: &'a FileItem) -> Self { + pub fn new(item: &'a Item) -> Self { Self { item, output: OnceLock::new(), } } - fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { + fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { if let Some(x) = self.output.get() { return Ok(x); } + let Item::File { path, .. } = self.item else { + return Ok(self.output.get_or_init(HashMap::new)); + }; + #[expect(clippy::unwrap_used)] let output = HashMap::from([ ( Label::new("extension").unwrap(), - self.item - .path - .extension() + path.extension() .and_then(|x| x.to_str()) .map(|x| PileValue::String(x.into())) .unwrap_or(PileValue::Null), ), ( Label::new("path").unwrap(), - self.item - .path - .to_string() + path.to_str() .map(|x| PileValue::String(x.into())) .unwrap_or(PileValue::Null), ), ( Label::new("segments").unwrap(), - self.item - .path - .components() + path.components() .map(|x| match x { Component::CurDir => Some(".".to_owned()), Component::Normal(x) => x.to_str().map(|x| x.to_owned()), @@ -63,15 +61,16 @@ impl<'a> FsExtractor<'a> { } } -impl Extractor for FsExtractor<'_> { - fn field<'a>( +#[async_trait::async_trait] +impl Extractor for FsExtractor<'_> { + async fn field<'a>( &'a self, name: &Label, - ) -> Result>, std::io::Error> { + ) -> Result>, std::io::Error> { Ok(self.get_inner()?.get(name)) } - fn fields(&self) -> Result, std::io::Error> { + async fn fields(&self) -> Result, std::io::Error> { Ok(self.get_inner()?.keys().cloned().collect()) } } diff --git a/crates/pile-dataset/src/extract/id3.rs b/crates/pile-dataset/src/extract/id3.rs index 951bf41..22a41e5 100644 --- a/crates/pile-dataset/src/extract/id3.rs +++ b/crates/pile-dataset/src/extract/id3.rs @@ -1,38 +1,50 @@ use id3::Tag; use pile_config::Label; -use std::{borrow::Cow, collections::HashMap, sync::OnceLock}; +use std::{borrow::Cow, collections::HashMap, io::BufReader, sync::OnceLock}; -use crate::{FileItem, PileValue, extract::Extractor}; +use crate::{Item, PileValue, extract::Extractor}; pub struct Id3Extractor<'a> { - item: &'a FileItem, - output: OnceLock>>, + item: &'a Item, + output: OnceLock>>, } impl<'a> Id3Extractor<'a> { - pub fn new(item: &'a FileItem) -> Self { + pub fn new(item: &'a Item) -> Self { Self { item, output: OnceLock::new(), } } - fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { + async fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { if let Some(x) = self.output.get() { return Ok(x); } - let ext = self.item.path.extension().and_then(|x| x.to_str()); + let key = match self.item { + Item::File { path, .. } => path.to_str().unwrap_or_default().to_owned(), + Item::S3 { key, .. } => key.to_string(), + }; + + let ext = key.rsplit('.').next(); if !matches!(ext, Some("mp3") | Some("aiff") | Some("aif") | Some("wav")) { - return Ok(self.output.get_or_init(HashMap::new)); + let _ = self.output.set(HashMap::new()); + #[expect(clippy::unwrap_used)] + return Ok(self.output.get().unwrap()); } - let tag = match Tag::read_from_path(&self.item.path) { + let bytes = self.item.read().await?.read_to_end().await?; + let tag = match Tag::read_from2(BufReader::new(std::io::Cursor::new(bytes))) { Ok(tag) => tag, Err(id3::Error { kind: id3::ErrorKind::NoTag, .. - }) => return Ok(self.output.get_or_init(HashMap::new)), + }) => { + let _ = self.output.set(HashMap::new()); + #[expect(clippy::unwrap_used)] + return Ok(self.output.get().unwrap()); + } Err(id3::Error { kind: id3::ErrorKind::Io(e), .. @@ -40,7 +52,7 @@ impl<'a> Id3Extractor<'a> { Err(e) => return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e)), }; - let mut output: HashMap>> = HashMap::new(); + let mut output: HashMap>> = HashMap::new(); for frame in tag.frames() { if let Some(text) = frame.content().text() { let name = frame_id_to_field(frame.id()); @@ -58,7 +70,9 @@ impl<'a> Id3Extractor<'a> { .map(|(k, v)| (k, PileValue::Array(v))) .collect(); - return Ok(self.output.get_or_init(|| output)); + let _ = self.output.set(output); + #[expect(clippy::unwrap_used)] + return Ok(self.output.get().unwrap()); } } @@ -66,6 +80,7 @@ impl<'a> Id3Extractor<'a> { /// Falls back to the lowercased frame ID if no mapping exists. fn frame_id_to_field(id: &str) -> Cow<'static, str> { match id { + // spell:off "TIT2" => Cow::Borrowed("title"), "TIT1" => Cow::Borrowed("grouping"), "TIT3" => Cow::Borrowed("subtitle"), @@ -98,18 +113,20 @@ fn frame_id_to_field(id: &str) -> Cow<'static, str> { "MVNM" => Cow::Borrowed("movement"), "MVIN" => Cow::Borrowed("movementnumber"), _ => Cow::Owned(id.to_lowercase()), + // spell:on } } -impl Extractor for Id3Extractor<'_> { - fn field<'a>( +#[async_trait::async_trait] +impl Extractor for Id3Extractor<'_> { + async fn field<'a>( &'a self, name: &Label, - ) -> Result>, std::io::Error> { - Ok(self.get_inner()?.get(name)) + ) -> Result>, std::io::Error> { + Ok(self.get_inner().await?.get(name)) } - fn fields(&self) -> Result, std::io::Error> { - Ok(self.get_inner()?.keys().cloned().collect()) + async fn fields(&self) -> Result, std::io::Error> { + Ok(self.get_inner().await?.keys().cloned().collect()) } } diff --git a/crates/pile-dataset/src/extract/map.rs b/crates/pile-dataset/src/extract/map.rs index 6f68e65..c095324 100644 --- a/crates/pile-dataset/src/extract/map.rs +++ b/crates/pile-dataset/src/extract/map.rs @@ -1,18 +1,22 @@ use pile_config::Label; use std::collections::HashMap; -use crate::{Item, PileValue, extract::Extractor}; +use crate::{PileValue, extract::Extractor}; -pub struct MapExtractor<'a, I: Item> { - pub(crate) inner: HashMap>, +pub struct MapExtractor<'a> { + pub(crate) inner: HashMap>, } -impl Extractor for MapExtractor<'_, I> { - fn field<'a>(&'a self, name: &Label) -> Result>, std::io::Error> { +#[async_trait::async_trait] +impl Extractor for MapExtractor<'_> { + async fn field<'a>( + &'a self, + name: &Label, + ) -> Result>, std::io::Error> { Ok(self.inner.get(name)) } - fn fields(&self) -> Result, std::io::Error> { + async fn fields(&self) -> Result, std::io::Error> { Ok(self.inner.keys().cloned().collect()) } } diff --git a/crates/pile-dataset/src/extract/mod.rs b/crates/pile-dataset/src/extract/mod.rs index 597b121..6feeaaa 100644 --- a/crates/pile-dataset/src/extract/mod.rs +++ b/crates/pile-dataset/src/extract/mod.rs @@ -1,5 +1,5 @@ use pile_config::Label; -use std::{collections::HashMap, rc::Rc}; +use std::{collections::HashMap, sync::Arc}; mod flac; pub use flac::*; @@ -13,59 +13,73 @@ pub use fs::*; mod pdf; pub use pdf::*; -mod sidecar; -pub use sidecar::*; +mod toml; +pub use toml::*; mod map; pub use map::*; +mod sidecar; +pub use sidecar::*; + +use crate::Item; + /// An attachment that extracts metadata from an [Item]. /// /// Metadata is exposed as an immutable map of {label: value}, /// much like a json object. -pub trait Extractor { +#[async_trait::async_trait] +pub trait Extractor: Send + Sync { /// Get the field at `name` from `item`. /// - returns `None` if `name` is not a valid field /// - returns `Some(Null)` if `name` is not available - fn field<'a>( + async fn field<'a>( &'a self, name: &pile_config::Label, - ) -> Result>, std::io::Error>; + ) -> Result>, std::io::Error>; /// Return all fields in this extractor. /// `Self::field` must return [Some] for all these keys /// and [None] for all others. - fn fields(&self) -> Result, std::io::Error>; + async fn fields(&self) -> Result, std::io::Error>; } -pub struct MetaExtractor<'a, I: crate::Item> { - inner: MapExtractor<'a, I>, +pub struct MetaExtractor<'a> { + inner: MapExtractor<'a>, } -impl<'a> MetaExtractor<'a, crate::FileItem> { +// +// MARK: file +// + +impl<'a> MetaExtractor<'a> { #[expect(clippy::unwrap_used)] - pub fn new(item: &'a crate::FileItem) -> Self { + pub fn new(item: &'a Item) -> Self { let inner = MapExtractor { inner: HashMap::from([ ( Label::new("flac").unwrap(), - crate::PileValue::Extractor(Rc::new(FlacExtractor::new(item))), + crate::PileValue::Extractor(Arc::new(FlacExtractor::new(item))), ), ( Label::new("id3").unwrap(), - crate::PileValue::Extractor(Rc::new(Id3Extractor::new(item))), + crate::PileValue::Extractor(Arc::new(Id3Extractor::new(item))), ), ( Label::new("fs").unwrap(), - crate::PileValue::Extractor(Rc::new(FsExtractor::new(item))), + crate::PileValue::Extractor(Arc::new(FsExtractor::new(item))), ), ( Label::new("pdf").unwrap(), - crate::PileValue::Extractor(Rc::new(PdfExtractor::new(item))), + crate::PileValue::Extractor(Arc::new(PdfExtractor::new(item))), + ), + ( + Label::new("toml").unwrap(), + crate::PileValue::Extractor(Arc::new(TomlExtractor::new(item))), ), ( Label::new("sidecar").unwrap(), - crate::PileValue::Extractor(Rc::new(SidecarExtractor::new(item))), + crate::PileValue::Extractor(Arc::new(SidecarExtractor::new(item))), ), ]), }; @@ -74,16 +88,17 @@ impl<'a> MetaExtractor<'a, crate::FileItem> { } } -impl Extractor for MetaExtractor<'_, crate::FileItem> { - fn field<'a>( +#[async_trait::async_trait] +impl Extractor for MetaExtractor<'_> { + async fn field<'a>( &'a self, name: &pile_config::Label, - ) -> Result>, std::io::Error> { - self.inner.field(name) + ) -> Result>, std::io::Error> { + self.inner.field(name).await } #[expect(clippy::unwrap_used)] - fn fields(&self) -> Result, std::io::Error> { + async fn fields(&self) -> Result, std::io::Error> { return Ok(vec![ Label::new("flac").unwrap(), Label::new("id3").unwrap(), @@ -92,4 +107,4 @@ impl Extractor for MetaExtractor<'_, crate::FileItem> { Label::new("sidecar").unwrap(), ]); } -} \ No newline at end of file +} diff --git a/crates/pile-dataset/src/extract/pdf/mod.rs b/crates/pile-dataset/src/extract/pdf/mod.rs index 21f6d0c..d8c4d7e 100644 --- a/crates/pile-dataset/src/extract/pdf/mod.rs +++ b/crates/pile-dataset/src/extract/pdf/mod.rs @@ -1,5 +1,5 @@ use pile_config::Label; -use std::{collections::HashMap, rc::Rc}; +use std::{collections::HashMap, sync::Arc}; mod pdf_meta; pub use pdf_meta::*; @@ -8,26 +8,26 @@ mod pdf_text; pub use pdf_text::*; use crate::{ - FileItem, PileValue, + Item, PileValue, extract::{Extractor, MapExtractor}, }; pub struct PdfExtractor<'a> { - inner: MapExtractor<'a, FileItem>, + inner: MapExtractor<'a>, } impl<'a> PdfExtractor<'a> { #[expect(clippy::unwrap_used)] - pub fn new(item: &'a FileItem) -> Self { + pub fn new(item: &'a Item) -> Self { let inner = MapExtractor { inner: HashMap::from([ ( Label::new("text").unwrap(), - PileValue::Extractor(Rc::new(PdfTextExtractor::new(item))), + PileValue::Extractor(Arc::new(PdfTextExtractor::new(item))), ), ( Label::new("meta").unwrap(), - PileValue::Extractor(Rc::new(PdfMetaExtractor::new(item))), + PileValue::Extractor(Arc::new(PdfMetaExtractor::new(item))), ), ]), }; @@ -36,23 +36,25 @@ impl<'a> PdfExtractor<'a> { } } -impl Extractor for PdfExtractor<'_> { - fn field<'a>( +#[async_trait::async_trait] +impl Extractor for PdfExtractor<'_> { + async fn field<'a>( &'a self, name: &pile_config::Label, - ) -> Result>, std::io::Error> { + ) -> Result>, std::io::Error> { + #[expect(clippy::unwrap_used)] if name.as_str() == "text" { match self.inner.inner.get(name).unwrap() { - PileValue::Extractor(x) => return x.field(name), + PileValue::Extractor(x) => return x.field(name).await, _ => unreachable!(), }; } - self.inner.field(name) + self.inner.field(name).await } #[expect(clippy::unwrap_used)] - fn fields(&self) -> Result, std::io::Error> { + async fn fields(&self) -> Result, std::io::Error> { Ok(vec![ Label::new("text").unwrap(), Label::new("meta").unwrap(), diff --git a/crates/pile-dataset/src/extract/pdf/pdf_meta.rs b/crates/pile-dataset/src/extract/pdf/pdf_meta.rs index db88091..332ebff 100644 --- a/crates/pile-dataset/src/extract/pdf/pdf_meta.rs +++ b/crates/pile-dataset/src/extract/pdf/pdf_meta.rs @@ -1,40 +1,44 @@ use pdf::file::FileOptions; -use pdf::primitive::{Date, TimeRel}; +use pdf::primitive::{Date, PdfString, TimeRel}; use pile_config::Label; use std::{collections::HashMap, sync::OnceLock}; +use tracing::debug; -use crate::{FileItem, PileValue, extract::Extractor}; +use crate::{Item, PileValue, extract::Extractor}; pub struct PdfMetaExtractor<'a> { - item: &'a FileItem, - output: OnceLock>>, + item: &'a Item, + output: OnceLock>>, } impl<'a> PdfMetaExtractor<'a> { - pub fn new(item: &'a FileItem) -> Self { + pub fn new(item: &'a Item) -> Self { Self { item, output: OnceLock::new(), } } - fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { + async fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { if let Some(x) = self.output.get() { return Ok(x); } - if self.item.path.extension().map(|x| x.to_str()).flatten() != Some("pdf") { - return Ok(self.output.get_or_init(|| HashMap::new())); - } + let bytes = self.item.read().await?.read_to_end().await?; - let file = FileOptions::cached() - .open(&self.item.path) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?; + let file = match FileOptions::cached().load(bytes) { + Ok(x) => x, + Err(pdf::PdfError::Io { source }) => return Err(source), + Err(error) => { + debug!(message = "Could not process pdf", ?error, key = ?self.item.key()); + return Ok(self.output.get_or_init(HashMap::new)); + } + }; - let mut output: HashMap> = HashMap::new(); + let mut output: HashMap> = HashMap::new(); if let Some(info) = &file.trailer.info_dict { - let fields: &[(&str, Option<&_>)] = &[ + let fields: &[(&str, Option<&PdfString>)] = &[ ("title", info.title.as_ref()), ("author", info.author.as_ref()), ("subject", info.subject.as_ref()), @@ -88,15 +92,16 @@ fn format_date(d: &Date) -> String { ) } -impl Extractor for PdfMetaExtractor<'_> { - fn field<'a>( +#[async_trait::async_trait] +impl Extractor for PdfMetaExtractor<'_> { + async fn field<'a>( &'a self, name: &Label, - ) -> Result>, std::io::Error> { - Ok(self.get_inner()?.get(name)) + ) -> Result>, std::io::Error> { + Ok(self.get_inner().await?.get(name)) } - fn fields(&self) -> Result, std::io::Error> { - Ok(self.get_inner()?.keys().cloned().collect()) + async fn fields(&self) -> Result, std::io::Error> { + Ok(self.get_inner().await?.keys().cloned().collect()) } } diff --git a/crates/pile-dataset/src/extract/pdf/pdf_text.rs b/crates/pile-dataset/src/extract/pdf/pdf_text.rs index 830311f..1c707d6 100644 --- a/crates/pile-dataset/src/extract/pdf/pdf_text.rs +++ b/crates/pile-dataset/src/extract/pdf/pdf_text.rs @@ -2,34 +2,38 @@ use pdf::content::{Op, TextDrawAdjusted}; use pdf::file::FileOptions; use pile_config::Label; use std::{collections::HashMap, sync::OnceLock}; +use tracing::debug; -use crate::{FileItem, PileValue, extract::Extractor}; +use crate::{Item, PileValue, extract::Extractor}; pub struct PdfTextExtractor<'a> { - item: &'a FileItem, - output: OnceLock>>, + item: &'a Item, + output: OnceLock>>, } impl<'a> PdfTextExtractor<'a> { - pub fn new(item: &'a FileItem) -> Self { + pub fn new(item: &'a Item) -> Self { Self { item, output: OnceLock::new(), } } - fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { + async fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { if let Some(x) = self.output.get() { return Ok(x); } - if self.item.path.extension().map(|x| x.to_str()).flatten() != Some("pdf") { - return Ok(self.output.get_or_init(|| HashMap::new())); - } + let bytes = self.item.read().await?.read_to_end().await?; - let file = FileOptions::cached() - .open(&self.item.path) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?; + let file = match FileOptions::cached().load(bytes) { + Ok(x) => x, + Err(pdf::PdfError::Io { source }) => return Err(source), + Err(error) => { + debug!(message = "Could not process pdf", ?error, key = ?self.item.key()); + return Ok(self.output.get_or_init(HashMap::new)); + } + }; let mut text_parts: Vec = Vec::new(); @@ -65,19 +69,22 @@ impl<'a> PdfTextExtractor<'a> { #[expect(clippy::unwrap_used)] let output = HashMap::from([(Label::new("text").unwrap(), PileValue::String(text.into()))]); - return Ok(self.output.get_or_init(|| output)); + let _ = self.output.set(output); + #[expect(clippy::unwrap_used)] + return Ok(self.output.get().unwrap()); } } -impl Extractor for PdfTextExtractor<'_> { - fn field<'a>( +#[async_trait::async_trait] +impl Extractor for PdfTextExtractor<'_> { + async fn field<'a>( &'a self, name: &Label, - ) -> Result>, std::io::Error> { - Ok(self.get_inner()?.get(name)) + ) -> Result>, std::io::Error> { + Ok(self.get_inner().await?.get(name)) } - fn fields(&self) -> Result, std::io::Error> { - Ok(self.get_inner()?.keys().cloned().collect()) + async fn fields(&self) -> Result, std::io::Error> { + Ok(self.get_inner().await?.keys().cloned().collect()) } } diff --git a/crates/pile-dataset/src/extract/sidecar.rs b/crates/pile-dataset/src/extract/sidecar.rs index 77f09ce..3553233 100644 --- a/crates/pile-dataset/src/extract/sidecar.rs +++ b/crates/pile-dataset/src/extract/sidecar.rs @@ -1,71 +1,47 @@ use pile_config::Label; -use std::{collections::HashMap, sync::OnceLock}; +use std::sync::OnceLock; -use crate::{FileItem, Item, PileValue, extract::Extractor}; - -fn toml_to_pile(value: toml::Value) -> PileValue<'static, I> { - match value { - toml::Value::String(s) => PileValue::String(s.into()), - toml::Value::Integer(i) => PileValue::String(i.to_string().into()), - toml::Value::Float(f) => PileValue::String(f.to_string().into()), - toml::Value::Boolean(b) => PileValue::String(b.to_string().into()), - toml::Value::Datetime(d) => PileValue::String(d.to_string().into()), - toml::Value::Array(a) => PileValue::Array(a.into_iter().map(toml_to_pile).collect()), - toml::Value::Table(_) => PileValue::Null, - } -} +use crate::{ + Item, PileValue, + extract::{Extractor, TomlExtractor}, +}; pub struct SidecarExtractor<'a> { - item: &'a FileItem, - output: OnceLock>>, + item: &'a Item, + output: OnceLock>>, } impl<'a> SidecarExtractor<'a> { - pub fn new(item: &'a FileItem) -> Self { + pub fn new(item: &'a Item) -> Self { Self { item, output: OnceLock::new(), } } - - fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { - if let Some(x) = self.output.get() { - return Ok(x); - } - - let sidecar_file = self.item.path.with_extension("toml"); - - if !(sidecar_file.is_file() && self.item.sidecar) { - return Ok(self.output.get_or_init(HashMap::new)); - } - - let sidecar = std::fs::read(&sidecar_file)?; - let sidecar: toml::Value = match toml::from_slice(&sidecar) { - Ok(x) => x, - Err(_) => return Ok(self.output.get_or_init(HashMap::new)), - }; - - let output: HashMap> = match sidecar { - toml::Value::Table(t) => t - .into_iter() - .filter_map(|(k, v)| Label::new(&k).map(|label| (label, toml_to_pile(v)))) - .collect(), - _ => HashMap::new(), - }; - - return Ok(self.output.get_or_init(|| output)); - } } -impl Extractor for SidecarExtractor<'_> { - fn field<'a>( +#[async_trait::async_trait] +impl Extractor for SidecarExtractor<'_> { + async fn field<'a>( &'a self, name: &Label, - ) -> Result>, std::io::Error> { - Ok(self.get_inner()?.get(name)) + ) -> Result>, std::io::Error> { + match self + .output + .get_or_init(|| self.item.sidecar().map(TomlExtractor::new)) + { + Some(x) => Ok(x.field(name).await?), + None => Ok(Some(&PileValue::Null)), + } } - fn fields(&self) -> Result, std::io::Error> { - Ok(self.get_inner()?.keys().cloned().collect()) + async fn fields(&self) -> Result, std::io::Error> { + match self + .output + .get_or_init(|| self.item.sidecar().map(TomlExtractor::new)) + { + Some(x) => Ok(x.fields().await?), + None => Ok(Vec::new()), + } } } diff --git a/crates/pile-dataset/src/extract/toml.rs b/crates/pile-dataset/src/extract/toml.rs new file mode 100644 index 0000000..40d608e --- /dev/null +++ b/crates/pile-dataset/src/extract/toml.rs @@ -0,0 +1,66 @@ +use pile_config::Label; +use std::{collections::HashMap, sync::OnceLock}; + +use crate::{Item, PileValue, extract::Extractor}; + +fn toml_to_pile(value: toml::Value) -> PileValue<'static> { + match value { + toml::Value::String(s) => PileValue::String(s.into()), + toml::Value::Integer(i) => PileValue::String(i.to_string().into()), + toml::Value::Float(f) => PileValue::String(f.to_string().into()), + toml::Value::Boolean(b) => PileValue::String(b.to_string().into()), + toml::Value::Datetime(d) => PileValue::String(d.to_string().into()), + toml::Value::Array(a) => PileValue::Array(a.into_iter().map(toml_to_pile).collect()), + toml::Value::Table(_) => PileValue::Null, + } +} + +pub struct TomlExtractor<'a> { + item: &'a Item, + output: OnceLock>>, +} + +impl<'a> TomlExtractor<'a> { + pub fn new(item: &'a Item) -> Self { + Self { + item, + output: OnceLock::new(), + } + } + + async fn get_inner(&self) -> Result<&HashMap>, std::io::Error> { + if let Some(x) = self.output.get() { + return Ok(x); + } + + let bytes = self.item.read().await?.read_to_end().await?; + let toml: toml::Value = match toml::from_slice(&bytes) { + Ok(x) => x, + Err(_) => return Ok(self.output.get_or_init(HashMap::new)), + }; + + let output: HashMap> = match toml { + toml::Value::Table(t) => t + .into_iter() + .filter_map(|(k, v)| Label::new(&k).map(|label| (label, toml_to_pile(v)))) + .collect(), + _ => HashMap::new(), + }; + + return Ok(self.output.get_or_init(|| output)); + } +} + +#[async_trait::async_trait] +impl Extractor for TomlExtractor<'_> { + async fn field<'a>( + &'a self, + name: &Label, + ) -> Result>, std::io::Error> { + Ok(self.get_inner().await?.get(name)) + } + + async fn fields(&self) -> Result, std::io::Error> { + Ok(self.get_inner().await?.keys().cloned().collect()) + } +} diff --git a/crates/pile-dataset/src/index/index_fts.rs b/crates/pile-dataset/src/index/index_fts.rs index 8937fe3..11efe8a 100644 --- a/crates/pile-dataset/src/index/index_fts.rs +++ b/crates/pile-dataset/src/index/index_fts.rs @@ -1,6 +1,9 @@ use itertools::Itertools; use pile_config::{Case, ConfigToml, DatasetFts, FieldSpecPost, Label}; -use std::{path::PathBuf, rc::Rc, sync::LazyLock}; +use std::{ + path::PathBuf, + sync::{Arc, LazyLock}, +}; use tantivy::{ DocAddress, Index, ReloadPolicy, TantivyDocument, TantivyError, collector::Collector, @@ -9,7 +12,7 @@ use tantivy::{ }; use tracing::{debug, trace, warn}; -use crate::{Item, Key, PileValue, extract::MetaExtractor}; +use crate::{Item, PileValue, extract::MetaExtractor}; #[derive(Debug, Clone)] pub struct FtsLookupResult { @@ -63,37 +66,21 @@ impl DbFtsIndex { // /// Turn an entry into a tantivy document - pub fn entry_to_document>( + pub async fn entry_to_document( &self, - item: &I, + item: &Item, ) -> Result, TantivyError> { let mut doc = TantivyDocument::default(); - - let key = match item.key().to_string() { - Some(x) => x, - None => { - warn!( - message = "Item key cannot be converted to a string, skipping", - key = ?item.key(), - ); - return Ok(None); - } - }; + let key = item.key(); doc.add_text(self.schema.get_field("_meta_source")?, item.source_name()); doc.add_text(self.schema.get_field("_meta_key")?, key); - let item = match item.as_file() { - Some(x) => x, - None => return Ok(None), - }; - - let extractor = MetaExtractor::new(item); - let extractor = PileValue::Extractor(Rc::new(extractor)); + let extractor = PileValue::Extractor(Arc::new(MetaExtractor::new(item))); let mut empty = true; for name in self.fts_cfg().fields.keys() { - let x = self.get_field(&extractor, name)?; + let x = self.get_field(&extractor, name).await?; let val = match x { Some(x) => x, @@ -115,9 +102,9 @@ impl DbFtsIndex { // MARK: read // - pub fn get_field( + pub async fn get_field( &self, - extractor: &PileValue<'_, I>, + extractor: &PileValue<'_>, field_name: &Label, ) -> Result, std::io::Error> { let field = match self.cfg.schema.get(field_name) { @@ -130,7 +117,7 @@ impl DbFtsIndex { // Try paths in order, using the first value we find 'outer: for path in field.path.as_slice() { - let val = match extractor.query(path)? { + let val = match extractor.query(path).await? { Some(x) => x, None => return Ok(None), }; @@ -292,10 +279,7 @@ impl DbFtsIndex { } } -pub fn apply<'a, I: Item>( - post: &FieldSpecPost, - val: &PileValue<'a, I>, -) -> Option> { +pub fn apply<'a>(post: &FieldSpecPost, val: &PileValue<'a>) -> Option> { Some(match post { FieldSpecPost::NotEmpty { notempty: false } => val.clone(), FieldSpecPost::NotEmpty { notempty: true } => match val { diff --git a/crates/pile-dataset/src/item.rs b/crates/pile-dataset/src/item.rs index 1513679..3fcc3c5 100644 --- a/crates/pile-dataset/src/item.rs +++ b/crates/pile-dataset/src/item.rs @@ -1,178 +1,222 @@ -use pile_config::Label; -use std::{fmt::Debug, path::PathBuf, rc::Rc}; +use smartstring::{LazyCompact, SmartString}; +use std::{fs::File, io::Seek, path::PathBuf, sync::Arc}; -use crate::{ - PileValue, - extract::{Extractor, SidecarExtractor}, -}; - -// -// MARK: key -// - -pub trait Key: Debug + Clone + Send + Sync + 'static { - /// Convert this key to a string, returning `None` - /// if we encounter any kind of error. - fn to_string(&self) -> Option; - - fn from_string(str: &str) -> Option; -} - -impl Key for PathBuf { - fn from_string(str: &str) -> Option { - str.parse().ok() - } - - fn to_string(&self) -> Option { - self.to_str().map(|x| x.to_owned()) - } -} +use crate::source::{DirDataSource, S3DataSource}; // // MARK: item // -/// A pointer to raw data -pub trait Item: Debug + Send + Sync + 'static + Sized { - type Key: Key; +#[derive(Debug, Clone)] +pub enum Item { + File { + source: Arc, - fn source_name(&self) -> &str; - fn key(&self) -> &Self::Key; + path: PathBuf, + sidecar: Option>, + }, - /// Get this item's sidecar metadata - fn sidecar(&self) -> Result + '_>>, std::io::Error>; + S3 { + source: Arc, - /// Set this file's sidecar metadata, - /// overwriting any existing file. - fn write_sidecar( - &self, - path: Vec