pile/crates/pile-config/src/config.toml

[dataset]
# This dataset's name. Must be unique.
name = "dataset"

# root directory for fts indices, relative to the parent dir of this config file.
# Files are written to {working_dir}/{dataset.name}/*.
# Default is ".pile"
#
# working_dir = ".pile"

# Data sources available in this dataset
source."music" = { type = "filesystem", path = "music" }

# This dataset's schema.
# Defines normalized fields that are extracted from source entries on-demand.
#
# Format is as follows:
#
# "field-name" = {
#   # The type of data this field contains.
#   # only text is supported in this version.
#   type = "text",
#
#   # An array of jsonpaths (rfc9535) used to extract this field from each source entry.
#   # These are evaluated in order, the first non-null value is used.
#   # A single string is equivalent to an array with one element.
#   path = "$.json.path"
# }
[schema]
album = { type = "text", path = "$.Album" }
isrc = { type = "text", path = "$.Isrc" }
artist = { type = "text", path = ["$.Artist", "$.TrackArtist"] }
lyrics = { type = "text", path = "$.Lyrics" }
genre = { type = "text", path = "$.Genre" }
title = { type = "text", path = ["$.Title", "$.TrackTitle"] }

# Fts configuration.
# Determines which fields (defined in `schema`) are included in the fts index.
[fts]
field.album = { tokenize = true }
field.isrc = { tokenize = true }
field.artist = { tokenize = true }
field.lyrics = { tokenize = true }
field.genre = { tokenize = true }
field.title = { tokenize = true }