Add md-footnote

This commit is contained in:
2025-11-06 21:16:09 -08:00
parent 4d8093c4a3
commit dc4260e147
28 changed files with 1868 additions and 7 deletions

View File

@@ -0,0 +1,107 @@
//! Plugin to add anchor(s) to footnote definitions,
//! with links back to the reference(s).
//!
//! ```rust
//! let parser = &mut markdown_it::MarkdownIt::new();
//! markdown_it::plugins::cmark::add(parser);
//! md_footnote::references::add(parser);
//! md_footnote::definitions::add(parser);
//! md_footnote::back_refs::add(parser);
//! let root = parser.parse("[^label]\n\n[^label]: This is a footnote");
//! let mut names = vec![];
//! root.walk(|node,_| { names.push(node.name()); });
//! assert_eq!(names, vec![
//! "markdown_it::parser::core::root::Root",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "md_footnote::references::FootnoteReference",
//! "md_footnote::definitions::FootnoteDefinition",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "markdown_it::parser::inline::builtin::skip_text::Text",
//! "md_footnote::back_refs::FootnoteRefAnchor",
//! ]);
//! ```
use markdown_it::{
MarkdownIt, Node, NodeValue,
parser::core::{CoreRule, Root},
plugins::cmark::block::paragraph::Paragraph,
};
use crate::{FootnoteMap, definitions::FootnoteDefinition};
pub fn add(md: &mut MarkdownIt) {
// insert this rule into parser
md.add_rule::<FootnoteBackrefRule>();
}
#[derive(Debug)]
pub struct FootnoteRefAnchor {
pub ref_ids: Vec<usize>,
}
impl NodeValue for FootnoteRefAnchor {
fn render(&self, _: &Node, fmt: &mut dyn markdown_it::Renderer) {
for ref_id in self.ref_ids.iter() {
fmt.text(" ");
fmt.open(
"a",
&[
("href", format!("#fnref{}", ref_id)),
("class", String::from("footnote-backref")),
],
);
// # ↩ with escape code to prevent display as Apple Emoji on iOS
fmt.text("\u{21a9}\u{FE0E}");
fmt.close("a");
}
}
}
// This is an extension for the markdown parser.
struct FootnoteBackrefRule;
impl CoreRule for FootnoteBackrefRule {
fn run(root: &mut Node, _: &MarkdownIt) {
// TODO this seems very cumbersome
// but it is also how the markdown_it::InlineParserRule works
#[expect(clippy::unwrap_used)]
let data = root.cast_mut::<Root>().unwrap();
let root_ext = std::mem::take(&mut data.ext);
let map = match root_ext.get::<FootnoteMap>() {
Some(map) => map,
None => return,
};
// walk through the AST and add backref anchors to footnote definitions
root.walk_mut(|node, _| {
if let Some(def_node) = node.cast::<FootnoteDefinition>() {
let ref_ids = {
match def_node.def_id {
Some(def_id) => map.referenced_by(def_id),
None => Vec::new(),
}
};
if !ref_ids.is_empty() {
// if the final child is a paragraph node,
// append the anchor to its children,
// otherwise simply append to the end of the node children
match node.children.last_mut() {
Some(last) => {
if last.is::<Paragraph>() {
last.children.push(Node::new(FootnoteRefAnchor { ref_ids }));
} else {
node.children.push(Node::new(FootnoteRefAnchor { ref_ids }));
}
}
_ => {
node.children.push(Node::new(FootnoteRefAnchor { ref_ids }));
}
}
}
}
});
#[expect(clippy::unwrap_used)]
let data = root.cast_mut::<Root>().unwrap();
data.ext = root_ext;
}
}

View File

@@ -0,0 +1,140 @@
//! Plugin to collect footnote definitions,
//! removing duplicate/unreferenced ones,
//! and move them to be the last child of the root node.
//!
//! ```rust
//! let parser = &mut markdown_it::MarkdownIt::new();
//! markdown_it::plugins::cmark::add(parser);
//! md_footnote::references::add(parser);
//! md_footnote::definitions::add(parser);
//! md_footnote::collect::add(parser);
//! let root = parser.parse("[^label]\n\n[^label]: This is a footnote\n\n> quote");
//! let mut names = vec![];
//! root.walk(|node,_| { names.push(node.name()); });
//! assert_eq!(names, vec![
//! "markdown_it::parser::core::root::Root",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "md_footnote::references::FootnoteReference",
//! "markdown_it::plugins::cmark::block::blockquote::Blockquote",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "markdown_it::parser::inline::builtin::skip_text::Text",
//! "md_footnote::collect::FootnotesContainerNode",
//! "md_footnote::definitions::FootnoteDefinition",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "markdown_it::parser::inline::builtin::skip_text::Text",
//! ]);
//! ```
use markdown_it::{
MarkdownIt, Node, NodeValue,
parser::core::{CoreRule, Root},
plugins::cmark::block::paragraph::Paragraph,
};
use crate::{FootnoteMap, definitions::FootnoteDefinition};
pub fn add(md: &mut MarkdownIt) {
// insert this rule into parser
md.add_rule::<FootnoteCollectRule>();
}
#[derive(Debug)]
struct PlaceholderNode;
impl NodeValue for PlaceholderNode {}
#[derive(Debug)]
pub struct FootnotesContainerNode;
impl NodeValue for FootnotesContainerNode {
fn render(&self, node: &Node, fmt: &mut dyn markdown_it::Renderer) {
let mut attrs = node.attrs.clone();
attrs.push(("class", "footnotes".into()));
fmt.cr();
fmt.self_close("hr", &[("class", "footnotes-sep".into())]);
fmt.cr();
fmt.open("section", &attrs);
fmt.cr();
fmt.open("ol", &[("class", "footnotes-list".into())]);
fmt.cr();
fmt.contents(&node.children);
fmt.cr();
fmt.close("ol");
fmt.cr();
fmt.close("section");
fmt.cr();
}
}
// This is an extension for the markdown parser.
struct FootnoteCollectRule;
impl CoreRule for FootnoteCollectRule {
// This is a custom function that will be invoked once per document.
//
// It has `root` node of the AST as an argument and may modify its
// contents as you like.
//
fn run(root: &mut Node, _: &MarkdownIt) {
// TODO this seems very cumbersome
// but it is also how the markdown_it::InlineParserRule works
#[expect(clippy::unwrap_used)]
let data = root.cast_mut::<Root>().unwrap();
let root_ext = std::mem::take(&mut data.ext);
let map = match root_ext.get::<FootnoteMap>() {
Some(map) => map,
None => return,
};
// walk through the AST and extract all footnote definitions
let mut defs = vec![];
root.walk_mut(|node, _| {
// TODO could use drain_filter if it becomes stable: https://github.com/rust-lang/rust/issues/43244
// defs.extend(
// node.children
// .drain_filter(|child| !child.is::<FootnoteDefinition>())
// .collect(),
// );
for child in node.children.iter_mut() {
if child.is::<FootnoteDefinition>() {
let mut extracted = std::mem::replace(child, Node::new(PlaceholderNode));
match extracted.cast::<FootnoteDefinition>() {
Some(def_node) => {
// skip footnotes that are not referenced
match def_node.def_id {
Some(def_id) => {
if map.referenced_by(def_id).is_empty() {
continue;
}
}
None => continue,
}
if def_node.inline {
// for inline footnotes,
// we need to wrap the definition's children in a paragraph
let mut para = Node::new(Paragraph);
std::mem::swap(&mut para.children, &mut extracted.children);
extracted.children = vec![para];
}
}
None => continue,
}
defs.push(extracted);
}
}
node.children.retain(|child| !child.is::<PlaceholderNode>());
});
if defs.is_empty() {
return;
}
// wrap the definitions in a container and append them to the root
let mut wrapper = Node::new(FootnotesContainerNode);
wrapper.children = defs;
root.children.push(wrapper);
#[expect(clippy::unwrap_used)]
let data = root.cast_mut::<Root>().unwrap();
data.ext = root_ext;
}
}

View File

@@ -0,0 +1,179 @@
//! Plugin to parse footnote definitions
//!
//! ```rust
//! let parser = &mut markdown_it::MarkdownIt::new();
//! markdown_it::plugins::cmark::add(parser);
//! md_footnote::definitions::add(parser);
//! let root = parser.parse("[^label]: This is a footnote");
//! let mut names = vec![];
//! root.walk(|node,_| { names.push(node.name()); });
//! assert_eq!(names, vec![
//! "markdown_it::parser::core::root::Root",
//! "md_footnote::definitions::FootnoteDefinition",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "markdown_it::parser::inline::builtin::skip_text::Text",
//! ]);
//! ```
use markdown_it::parser::block::{BlockRule, BlockState};
use markdown_it::plugins::cmark::block::reference::ReferenceScanner;
use markdown_it::{MarkdownIt, Node, NodeValue, Renderer};
use crate::FootnoteMap;
/// Add the footnote definition plugin to the parser
pub fn add(md: &mut MarkdownIt) {
// insert this rule into block subparser
md.block
.add_rule::<FootnoteDefinitionScanner>()
.before::<ReferenceScanner>();
}
#[derive(Debug)]
/// AST node for footnote definition
pub struct FootnoteDefinition {
pub label: Option<String>,
pub def_id: Option<usize>,
pub inline: bool,
}
impl NodeValue for FootnoteDefinition {
fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
let mut attrs = node.attrs.clone();
if let Some(def_id) = self.def_id {
attrs.push(("id", format!("fn{}", def_id)));
}
attrs.push(("class", "footnote-item".into()));
fmt.cr();
fmt.open("li", &attrs);
fmt.contents(&node.children);
fmt.close("li");
fmt.cr();
}
}
/// An extension for the block subparser.
struct FootnoteDefinitionScanner;
impl FootnoteDefinitionScanner {
fn is_def(state: &mut BlockState<'_, '_>) -> Option<(String, usize)> {
if state.line_indent(state.line) >= state.md.max_indent {
return None;
}
let mut chars = state.get_line(state.line).chars();
// check line starts with the correct syntax
let Some('[') = chars.next() else {
return None;
};
let Some('^') = chars.next() else {
return None;
};
// gather the label
let mut label = String::new();
// The labels in footnote references may not contain spaces, tabs, or newlines.
// Backslash escapes form part of the label and do not escape anything
loop {
match chars.next() {
None => return None,
Some(']') => {
if let Some(':') = chars.next() {
break;
} else {
return None;
}
}
Some(' ') => return None,
Some(c) => label.push(c),
}
}
if label.is_empty() {
return None;
}
// get number of spaces to next non-space character
let mut spaces = 0;
loop {
match chars.next() {
None => break,
Some(' ') => spaces += 1,
Some('\t') => spaces += 1, // spaces += 4 - spaces % 4,
Some(_) => break,
}
}
Some((label, spaces))
}
}
impl BlockRule for FootnoteDefinitionScanner {
fn check(state: &mut BlockState<'_, '_>) -> Option<()> {
// can interrupt a block elements,
// but only if its a child of another footnote definition
// TODO I think strictly only paragraphs should be interrupted, but this is not yet possible in markdown-it.rs
if state.node.is::<FootnoteDefinition>() && Self::is_def(state).is_some() {
return Some(());
}
None
}
fn run(state: &mut BlockState<'_, '_>) -> Option<(Node, usize)> {
let (label, spaces) = Self::is_def(state)?;
// record the footnote label, so we can match references to it later
let foot_map = state.root_ext.get_or_insert_default::<FootnoteMap>();
let def_id = foot_map.add_def(&label);
// temporarily set the current node to the footnote definition
// so child nodes are added to it
let new_node = Node::new(FootnoteDefinition {
label: Some(label.clone()),
def_id,
inline: false,
});
let old_node = std::mem::replace(&mut state.node, new_node);
// store the current line and its offsets, so we can restore later
let first_line = state.line;
let first_line_offsets = state.line_offsets[first_line].clone();
// temporarily change the first line offsets to account for the footnote label
// TODO this is not quite the same as pandoc where spaces >= 8 is code block (here >= 4)
state.line_offsets[first_line].first_nonspace += "[^]:".len() + label.len() + spaces;
state.line_offsets[first_line].indent_nonspace += "[^]:".len() as i32 + spaces as i32;
// tokenize with a +4 space indent
state.blk_indent += 4;
state.md.block.tokenize(state);
state.blk_indent -= 4;
// get the number of lines the footnote definition occupies
let num_lines = state.line - first_line;
// restore the first line and its offsets
state.line_offsets[first_line] = first_line_offsets;
state.line = first_line;
// restore the original node and return the footnote and number of lines it occupies
Some((std::mem::replace(&mut state.node, old_node), num_lines))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let parser = &mut markdown_it::MarkdownIt::new();
markdown_it::plugins::cmark::add(parser);
markdown_it::plugins::sourcepos::add(parser);
add(parser);
let node = parser.parse("[^note]: a\n\nhallo\nthere\n");
// println!("{:#?}", node);
assert!(node.children.first().unwrap().is::<FootnoteDefinition>());
// let text = node.render();
// assert_eq!(text, "hallo\n")
}
}

View File

@@ -0,0 +1,147 @@
//! Plugin to parse inline footnotes
//!
//! ```rust
//! let parser = &mut markdown_it::MarkdownIt::new();
//! markdown_it::plugins::cmark::add(parser);
//! md_footnote::inline::add(parser);
//! let root = parser.parse("Example^[This is a footnote]");
//! let mut names = vec![];
//! root.walk(|node,_| { names.push(node.name()); });
//! assert_eq!(names, vec![
//! "markdown_it::parser::core::root::Root",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "markdown_it::parser::inline::builtin::skip_text::Text",
//! "md_footnote::inline::InlineFootnote",
//! "md_footnote::definitions::FootnoteDefinition",
//! "markdown_it::parser::inline::builtin::skip_text::Text",
//! "md_footnote::references::FootnoteReference"
//! ]);
//! ```
use markdown_it::{
MarkdownIt, Node, NodeValue,
parser::inline::{InlineRule, InlineState},
};
use crate::{FootnoteMap, definitions::FootnoteDefinition};
/// Add the inline footnote plugin to the parser
pub fn add(md: &mut MarkdownIt) {
// insert this rule into inline subparser
md.inline.add_rule::<InlineFootnoteScanner>();
}
#[derive(Debug)]
pub struct InlineFootnote;
impl NodeValue for InlineFootnote {
fn render(&self, node: &Node, fmt: &mut dyn markdown_it::Renderer) {
// simply pass-through to children
fmt.contents(&node.children);
}
}
// This is an extension for the inline subparser.
struct InlineFootnoteScanner;
impl InlineRule for InlineFootnoteScanner {
const MARKER: char = '^';
fn check(state: &mut InlineState<'_, '_>) -> Option<usize> {
let mut chars = state.src[state.pos..state.pos_max].chars();
// check line starts with the correct syntax
let Some('^') = chars.next() else {
return None;
};
let Some('[') = chars.next() else {
return None;
};
let content_start = state.pos + 2;
match parse_footnote(state, content_start) {
Some(content_end) => Some(content_end + 1 - state.pos),
None => None,
}
}
fn run(state: &mut InlineState<'_, '_>) -> Option<(Node, usize)> {
let mut chars = state.src[state.pos..state.pos_max].chars();
// check line starts with the correct syntax
let Some('^') = chars.next() else {
return None;
};
let Some('[') = chars.next() else {
return None;
};
let content_start = state.pos + 2;
match parse_footnote(state, content_start) {
Some(content_end) => {
let foot_map = state.root_ext.get_or_insert_default::<FootnoteMap>();
let (def_id, ref_id) = foot_map.add_inline_def();
// create node and set it as current
let current_node = std::mem::replace(
&mut state.node,
Node::new(FootnoteDefinition {
label: None,
def_id: Some(def_id),
inline: true,
}),
);
// perform nested parsing
let start = state.pos;
let max = state.pos_max;
state.pos = content_start;
state.pos_max = content_end;
state.md.inline.tokenize(state);
state.pos = start;
state.pos_max = max;
// restore current node
let def_node = std::mem::replace(&mut state.node, current_node);
let ref_node = Node::new(crate::references::FootnoteReference {
label: None,
ref_id,
def_id,
});
// wrap the footnote definition and reference in an outer node to return
let mut outer_node = Node::new(InlineFootnote);
outer_node.children = vec![def_node, ref_node];
Some((outer_node, content_end + 1 - state.pos))
}
None => None,
}
}
}
// returns the end position of the footnote
// this function assumes that first character ("[") already matches;
fn parse_footnote(state: &mut InlineState<'_, '_>, start: usize) -> Option<usize> {
let old_pos = state.pos;
let mut label_end = None;
state.pos = start + 1;
let mut found = false;
while let Some(ch) = state.src[state.pos..state.pos_max].chars().next() {
if ch == ']' {
found = true;
break;
}
state.md.inline.skip_token(state);
}
if found {
label_end = Some(state.pos);
}
// restore old state
state.pos = old_pos;
label_end
}

View File

@@ -0,0 +1,89 @@
//! A [markdown_it] plugin for parsing footnotes
//!
//! ```
//! let parser = &mut markdown_it::MarkdownIt::new();
//! md_footnote::add(parser);
//! let node = parser.parse("[^note]\n\n[^note]: A footnote\n");
//! ```
use std::collections::HashMap;
use markdown_it::{MarkdownIt, parser::extset::RootExt};
pub mod back_refs;
pub mod collect;
pub mod definitions;
pub mod inline;
pub mod references;
// Silence lints
#[cfg(test)]
use md_dev as _;
#[cfg(test)]
use testing as _;
/// Add the full footnote plugin to the parser
pub fn add(md: &mut MarkdownIt) {
definitions::add(md);
references::add(md);
inline::add(md);
collect::add(md);
back_refs::add(md);
}
#[derive(Debug, Default)]
/// The set of parsed footnote definition labels,
/// stored in the root node.
pub struct FootnoteMap {
def_counter: usize,
ref_counter: usize,
label_to_def: HashMap<String, usize>,
def_to_refs: HashMap<usize, Vec<usize>>,
}
impl RootExt for FootnoteMap {}
impl FootnoteMap {
/// Create an ID for the definition,
/// or return None if a definition already exists for the label
pub fn add_def(&mut self, label: &str) -> Option<usize> {
if self.label_to_def.contains_key(label) {
return None;
}
self.def_counter += 1;
self.label_to_def
.insert(String::from(label), self.def_counter);
Some(self.def_counter)
}
/// Create an ID for the reference and return (def_id, ref_id),
/// or return None if no definition exists for the label
pub fn add_ref(&mut self, label: &str) -> Option<(usize, usize)> {
match self.label_to_def.get(label) {
Some(def_id) => {
self.ref_counter += 1;
// self.def_to_refs.get_mut(&def_id).unwrap().push(self.ref_counter);
match self.def_to_refs.get_mut(def_id) {
Some(refs) => refs.push(self.ref_counter),
None => {
self.def_to_refs.insert(*def_id, vec![self.ref_counter]);
}
}
Some((*def_id, self.ref_counter))
}
None => None,
}
}
/// Add an inline definition and return (def_id, ref_id)
pub fn add_inline_def(&mut self) -> (usize, usize) {
self.def_counter += 1;
self.ref_counter += 1;
self.def_to_refs
.insert(self.def_counter, vec![self.ref_counter]);
(self.def_counter, self.ref_counter)
}
/// return the IDs of all references to the given definition ID
pub fn referenced_by(&self, def_id: usize) -> Vec<usize> {
match self.def_to_refs.get(&def_id) {
Some(ids) => ids.clone(),
None => Vec::new(),
}
}
}

View File

@@ -0,0 +1,108 @@
//! Plugin to parse footnote references
//!
//! ```rust
//! let parser = &mut markdown_it::MarkdownIt::new();
//! markdown_it::plugins::cmark::add(parser);
//! md_footnote::references::add(parser);
//! md_footnote::definitions::add(parser);
//! let root = parser.parse("[^label]\n\n[^label]: This is a footnote");
//! let mut names = vec![];
//! root.walk(|node,_| { names.push(node.name()); });
//! assert_eq!(names, vec![
//! "markdown_it::parser::core::root::Root",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "md_footnote::references::FootnoteReference",
//! "md_footnote::definitions::FootnoteDefinition",
//! "markdown_it::plugins::cmark::block::paragraph::Paragraph",
//! "markdown_it::parser::inline::builtin::skip_text::Text"
//! ]);
//! ```
use markdown_it::parser::inline::{InlineRule, InlineState};
use markdown_it::{MarkdownIt, Node, NodeValue, Renderer};
use crate::FootnoteMap;
/// Add the footnote reference parsing to the markdown parser
pub fn add(md: &mut MarkdownIt) {
// insert this rule into inline subparser
md.inline.add_rule::<FootnoteReferenceScanner>();
}
#[derive(Debug)]
/// AST node for footnote reference
pub struct FootnoteReference {
pub label: Option<String>,
pub ref_id: usize,
pub def_id: usize,
}
impl NodeValue for FootnoteReference {
fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
let mut attrs = node.attrs.clone();
attrs.push(("class", "footnote-ref".into()));
fmt.open("sup", &attrs);
fmt.open(
"a",
&[
("href", format!("#fn{}", self.def_id)),
("id", format!("fnref{}", self.ref_id)),
],
);
fmt.text(&format!("[{}]", self.def_id));
fmt.close("a");
fmt.close("sup");
}
}
// This is an extension for the inline subparser.
struct FootnoteReferenceScanner;
impl InlineRule for FootnoteReferenceScanner {
const MARKER: char = '[';
fn run(state: &mut InlineState<'_, '_>) -> Option<(Node, usize)> {
let mut chars = state.src[state.pos..state.pos_max].chars();
// check line starts with the correct syntax
let Some('[') = chars.next() else {
return None;
};
let Some('^') = chars.next() else {
return None;
};
// gather the label
let mut label = String::new();
// The labels in footnote references may not contain spaces, tabs, or newlines.
// Backslash escapes form part of the label and do not escape anything
loop {
match chars.next() {
None => return None,
Some(']') => {
break;
}
Some(' ') => return None,
Some(c) => label.push(c),
}
}
if label.is_empty() {
return None;
}
let definitions = state.root_ext.get_or_insert_default::<FootnoteMap>();
let (def_id, ref_id) = definitions.add_ref(&label)?;
let length = label.len() + 3; // 3 for '[^' and ']'
// return new node and length of this structure
Some((
Node::new(FootnoteReference {
label: Some(label),
ref_id,
def_id,
}),
length,
))
}
}