1
0

Tokenizer

This commit is contained in:
2025-12-11 17:37:33 -08:00
parent 62fcf781c1
commit 1805b7f430
10 changed files with 7678 additions and 0 deletions

97
Cargo.toml Normal file
View File

@@ -0,0 +1,97 @@
[workspace]
members = ["crates/*"]
exclude = ["**.ignore"]
resolver = "2"
[workspace.package]
rust-version = "1.91.0"
edition = "2024"
version = "0.0.1"
[workspace.lints.rust]
unused_import_braces = "deny"
unit_bindings = "deny"
single_use_lifetimes = "deny"
non_ascii_idents = "deny"
macro_use_extern_crate = "deny"
elided_lifetimes_in_paths = "deny"
absolute_paths_not_starting_with_crate = "deny"
explicit_outlives_requirements = "warn"
unused_crate_dependencies = "warn"
redundant_lifetimes = "warn"
missing_docs = "allow"
[workspace.lints.clippy]
todo = "deny"
uninlined_format_args = "allow"
result_large_err = "allow"
too_many_arguments = "allow"
upper_case_acronyms = "deny"
needless_return = "allow"
new_without_default = "allow"
tabs_in_doc_comments = "allow"
dbg_macro = "deny"
allow_attributes = "deny"
create_dir = "deny"
filetype_is_file = "deny"
integer_division = "allow"
lossy_float_literal = "deny"
map_err_ignore = "deny"
mutex_atomic = "deny"
needless_raw_strings = "deny"
str_to_string = "deny"
string_add = "deny"
implicit_clone = "deny"
use_debug = "allow"
verbose_file_reads = "deny"
large_types_passed_by_value = "deny"
wildcard_dependencies = "deny"
negative_feature_names = "deny"
redundant_feature_names = "deny"
multiple_crate_versions = "allow"
missing_safety_doc = "warn"
identity_op = "allow"
print_stderr = "deny"
print_stdout = "deny"
comparison_chain = "allow"
unimplemented = "deny"
unwrap_used = "warn"
expect_used = "warn"
type_complexity = "allow"
obfuscated_if_else = "allow"
#
# MARK: dependencies
#
[workspace.dependencies]
tokenizer = { path = "crates/tokenizer" }
anstyle = "1.0.13"
anyhow = "1.0.100"
ahash = "0.8.12"
clap = { version = "4.5.49", features = ["derive"] }
compact_str = "0.9.0"
dary_heap = "0.3.8"
fancy-regex = "0.16.2"
indicatif = { version = "0.18.3", features = ["improved_unicode"] }
futures-util = "0.3.31"
ndarray = { version = "0.16.1", features = ["serde"] }
parking_lot = "0.12.5"
parquet = "56.2.0"
rayon = "1.11.0"
reqwest = { version = "0.12.24", features = ["json", "stream"] }
serde = "1.0.228"
serde_json = "1.0.145"
strum = { version = "0.27.2", features = ["derive"] }
thiserror = "2.0.17"
tokio = { version = "1.48.0", features = ["full"] }
tracing = "0.1.43"
tracing-indicatif = "0.3.13"
tracing-subscriber = { version = "0.3.20", features = ["env-filter", "json"] }
url = "2.5.7"
[workspace.dependencies.burn]
version = "0.19.1"
default-features = false
features = ["std", "fusion", "ndarray", "webgpu", "cuda"]