mirror of
synced 2025-02-21 22:49:24 -08:00
Split tokenizer
This commit is contained in:
@ -73,7 +73,7 @@ fn main() -> Result<(), std::io::Error> {
write!(stdout, "\n")?;
let g = parser::parse(&s);
let g = parser::evaluate(&s);
match g {
@ -1,10 +1,12 @@
mod tokenize;
mod treeify;
mod groupify;
mod evaluate;
use crate::parser::tokenize::tokenize;
use crate::parser::treeify::treeify;
use crate::parser::evaluate::evaluate;
use crate::parser::tokenize::p_tokenize;
use crate::parser::groupify::p_groupify;
use crate::parser::treeify::p_treeify;
use crate::parser::evaluate::p_evaluate;
use std::collections::VecDeque;
@ -29,6 +31,8 @@ pub enum Token {
/// Each of these will become one of the operators below.
PreOperator(LineLocation, Operator),
/// Used only until operators are parsed.
/// PreGroups aren't needed once we have a tree.
PreGroup(LineLocation, VecDeque<Token>),
@ -212,7 +216,6 @@ pub struct LineLocation {
/// If we cannot parse a string, one of these is returned.
pub enum ParserError {
@ -233,11 +236,12 @@ pub enum ParserError {
/// *what* the error is.
/// - `Ok(Token)` otherwise, where `Token` is the top of an expression tree.
pub fn parse(s: &String) -> Result<Token, (LineLocation, ParserError)> {
pub fn evaluate(s: &String) -> Result<Token, (LineLocation, ParserError)> {
let mut g: Token = tokenize(s)?;
g = treeify(g)?;
g = evaluate(g)?;
let tokens = p_tokenize(s);
let mut g = p_groupify(tokens)?;
g = p_treeify(g)?;
g = p_evaluate(g)?;
return Ok(g);
@ -26,7 +26,7 @@ fn get_at_coords<'a>(g: &'a mut Token, coords: &Vec<usize>) -> &'a mut Token {
pub fn evaluate(
pub fn p_evaluate(
mut g: Token,
) -> Result<Token, (LineLocation, ParserError)> {
let mut coords: Vec<usize> = Vec::with_capacity(16);
@ -89,6 +89,8 @@ pub fn evaluate(
Token::PreWord(_,_) |
Token::PreOperator(_,_) |
Token::PreGroup(_,_) |
Token::PreGroupStart(_) |
Token::PreGroupEnd(_) |
=> panic!()
Normal file
Normal file
@ -0,0 +1,145 @@
use std::collections::VecDeque;
use crate::parser::Token;
use crate::parser::LineLocation;
use crate::parser::ParserError;
use crate::parser::Operator;
/// Looks backwards at the elements of g.
/// - Inserts ImplicitMultiply
/// - Removes multiple PreNegatives
/// - Applies PreNegative to Numbers
/// - Parses factorials
/// - Checks syntax
fn lookback(
g: &mut VecDeque<Token>
) -> Result<(), (LineLocation, ParserError)> {
if g.len() >= 2 {
let b: Token = g.pop_back().unwrap();
let a: Token = g.pop_back().unwrap();
match (&a, &b) {
// Insert ImplicitMultiply
(Token::PreGroup(_,_), Token::PreGroup(l ,_)) |
(Token::PreGroup(_,_), Token::Number(l,_)) |
(Token::Number(_,_), Token::PreGroup(l,_)) |
(Token::Constant(_,_,_), Token::Number(l,_)) |
(Token::Number(_,_), Token::Constant(l,_,_)) |
(Token::Constant(_,_,_), Token::PreGroup(l,_)) |
(Token::PreGroup(_,_), Token::Constant(l,_,_)) |
(Token::Constant(_,_,_), Token::Constant(l,_,_))
=> {
let LineLocation { pos: i, .. } = l;
LineLocation{pos: i-1, len: 0},
// The following are syntax errors
(Token::Number(la, _), Token::Number(lb,_))
=> {
let LineLocation { pos: posa, .. } = *la;
let LineLocation { pos: posb, len: lenb } = *lb;
return Err((
LineLocation{pos: posa, len: posb - posa + lenb},
// The following are fine
(Token::PreOperator(_,_), _) |
(_, Token::PreOperator(_,_))
=> { g.push_back(a); g.push_back(b); },
// If we get this far, we found a Token
// that shouldn't be here.
_ => panic!()
return Ok(());
pub fn p_groupify(mut g: VecDeque<Token>) -> Result<Token, (LineLocation, ParserError)> {
// Vector of grouping levels
let mut levels: Vec<(LineLocation, VecDeque<Token>)> = Vec::with_capacity(8);
levels.push((LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8)));
// Makes sure parenthesis are matched
let mut i_level = 0;
while g.len() > 0 {
let t = g.pop_front().unwrap();
let (l_now, v_now) = levels.last_mut().unwrap();
match &t {
Token::PreOperator(_, _) => {
Token::PreNumber(l, s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => return Err((*l, ParserError::BadNumber))
v_now.push_back(Token::Number(*l, n));
Token::PreWord(l, s) => {
v_now.push_back(match &s[..] {
"mod" => { Token::PreOperator(*l, Operator::ModuloLong) },
"pi" => { Token::Constant(*l, 3.141592653, String::from("π")) },
_ => { return Err((*l, ParserError::Syntax)); }
Token::PreGroupStart(l) => {
levels.push((*l, VecDeque::with_capacity(8)));
i_level += 1;
Token::PreGroupEnd(l) => {
let LineLocation{pos: posa, ..} = *l_now;
let LineLocation{pos: posb, len: lenb} = l;
let l = LineLocation {
pos: posa,
len: lenb + posb - posa
if i_level == 0 {
return Err((l, ParserError::ExtraCloseParen))
i_level -= 1;
// Catch empty groups
if v_now.len() == 0 {
return Err((l, ParserError::EmptyGroup))
let (_, v) = levels.pop().unwrap();
let (_, v_now) = levels.last_mut().unwrap();
v_now.push_back(Token::PreGroup(l, v));
_ => panic!()
if levels.len() != 1 {
let (l, _) = levels.pop().unwrap();
return Err((l, ParserError::MissingCloseParen))
let (_, v) = levels.pop().unwrap();
return Ok(Token::Root(v));
@ -2,7 +2,6 @@ use std::collections::VecDeque;
use crate::parser::Token;
use crate::parser::LineLocation;
use crate::parser::ParserError;
use crate::parser::Operator;
/// Updates the length of a Token's LineLocation.
@ -10,7 +9,8 @@ use crate::parser::Operator;
fn update_line_location(mut t: Token, stop_i: usize) -> Token {
match t {
Token::PreGroup(ref mut l, _) |
Token::PreGroupStart(ref mut l) |
Token::PreGroupEnd(ref mut l) |
Token::PreOperator(ref mut l, _) |
Token::PreNumber(ref mut l, _) |
Token::PreWord(ref mut l, _)
@ -27,137 +27,20 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token {
return t;
/// Looks backwards at the elements of g.
/// - Inserts ImplicitMultiply
/// - Removes multiple PreNegatives
/// - Applies PreNegative to Numbers
/// - Parses factorials
/// - Checks syntax
fn lookback(
g: &mut VecDeque<Token>
) -> Result<(), (LineLocation, ParserError)> {
if g.len() >= 2 {
let b: Token = g.pop_back().unwrap();
let a: Token = g.pop_back().unwrap();
match (&a, &b) {
// Insert ImplicitMultiply
(Token::PreGroup(_,_), Token::PreGroup(l ,_)) |
(Token::PreGroup(_,_), Token::Number(l,_)) |
(Token::Number(_,_), Token::PreGroup(l,_)) |
(Token::Constant(_,_,_), Token::Number(l,_)) |
(Token::Number(_,_), Token::Constant(l,_,_)) |
(Token::Constant(_,_,_), Token::PreGroup(l,_)) |
(Token::PreGroup(_,_), Token::Constant(l,_,_)) |
(Token::Constant(_,_,_), Token::Constant(l,_,_))
=> {
let LineLocation { pos: i, .. } = l;
LineLocation{pos: i-1, len: 0},
// The following are syntax errors
(Token::Number(la, _), Token::Number(lb,_))
=> {
let LineLocation { pos: posa, .. } = *la;
let LineLocation { pos: posb, len: lenb } = *lb;
return Err((
LineLocation{pos: posa, len: posb - posa + lenb},
// The following are fine
(Token::PreOperator(_,_), _) |
(_, Token::PreOperator(_,_))
=> { g.push_back(a); g.push_back(b); },
// If we get this far, we found a Token
// that shouldn't be here.
_ => panic!()
return Ok(());
/// Pushes (and potentially processes) a token we just read to a vector.
/// - Converts all `PreNumbers` to `Numbers`, returning a BadNumber error if necessary
/// - Converts all `PreWords` to other tokens.
fn push_token(
g_now: &mut VecDeque<Token>,
i: usize,
t: Option<Token>
) -> Result<(), (LineLocation, ParserError)>{
if t.is_none() {
return Ok(());
} else {
let t: Token = update_line_location(t.unwrap(), i);
g_now.push_back(match t {
Token::PreNumber(l, s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => return Err((l, ParserError::BadNumber))
Token::Number(l, n)
Token::PreWord(l, s) => {
if s == "mod" {
Token::PreOperator(l, Operator::ModuloLong)
} else if s == "pi" {
Token::Constant(l, 3.141592653, String::from("π"))
} else {
return Err((l, ParserError::Syntax));
Token::PreOperator(_, _) => t,
Token::PreGroup(_, _) => t,
_ => panic!()
return Ok(());
/// Turns a string into Tokens. First stage of parsing.
pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
pub fn p_tokenize(input: &String) -> VecDeque<Token> {
let mut t: Option<Token> = None; // The current token we're reading
let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels"
let mut i_level = 0;
g.push(Token::PreGroup(LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8)));
let mut g: VecDeque<Token> = VecDeque::with_capacity(32);
for (i, c) in input.chars().enumerate() {
// The grouping level we're on now
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
match c {
'!' => {
push_token(g_now, i, t)?;
t = Some(Token::PreOperator(
LineLocation{pos: i, len: 1},
// The minus sign can be both a Negative and an Operator.
// Needs special treatment.
'-' => {
push_token(g_now, i, t)?;
match g_now.back() {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
match g.back() {
// If previous token was any of the following,
// this is the "minus" operator
Some(Token::Number(_, _)) |
@ -192,42 +75,28 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
// If we're not building a number, finalize
// previous token and start one.
_ => {
push_token(g_now, i, t)?;
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c)));
// Word
'A'..='Z' |
'a'..='z' => {
match &mut t {
Some(Token::PreWord(_, val)) => {
_ => {
push_token(g_now, i, t)?;
t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
// Operator
// Always one character
'+'|'%'|'^' => {
push_token(g_now, i, t)?;
'^'|'!' => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreOperator(
LineLocation{pos: i, len: 1},
LineLocation{pos: i, len: 0},
match c {
'^' => Operator::Power,
'%' => Operator::Modulo,
'*'|'×' => Operator::Multiply,
'/'|'÷' => Operator::Divide,
'+' => Operator::Add,
'!' => Operator::Factorial,
_ => panic!()
@ -235,67 +104,41 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
// Group
'(' => {
push_token(g_now, i, t)?; t = None;
g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8)));
i_level += 1;
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreGroupStart(LineLocation{pos: i, len: 0}));
')' => {
// Catch extra close parens
if i_level == 0 {
return Err((
LineLocation{pos: i, len: 1},
i_level -= 1;
// Catch empty groups
if t.is_none() {
let mut last = g.pop().unwrap();
last = update_line_location(last, i+1);
let Token::PreGroup(l, _) = last else {panic!()};
return Err((
push_token(g_now, i, t)?;
t = Some(g.pop().unwrap());
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreGroupEnd(LineLocation{pos: i, len: 0}));
// Space. Basic seperator.
' ' => {
push_token(g_now, i, t)?; t = None;
if t.is_some() {
g.push_back(update_line_location(t.unwrap(), i));
t = None;
// Invalid character
_ => { return Err((LineLocation{pos: i, len: 1}, ParserError::InvalidChar)); }
// Word
//'A'..='Z' |
_ => {
match &mut t {
Some(Token::PreWord(_, val)) => {
_ => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
push_token(g_now, input.len(), t)?;
if t.is_some() { g.push_back(update_line_location(t.unwrap(), input.len())); }
if g.len() != 1 {
let q: LineLocation = match g.last_mut().unwrap() {
Token::PreGroup(l, _) => *l,
_ => panic!()
let LineLocation{pos:p, ..} = q;
return Err((
pos: p,
len: input.len() - p
return Ok(g.pop().unwrap());
return g;
@ -346,7 +346,7 @@ fn inner_treeify(
) -> Result<Token, (LineLocation, ParserError)> {
let g_inner: &mut VecDeque<Token> = match g {
Token::PreGroup(_, ref mut x) => x,
Token::Root(ref mut x) => x,
_ => panic!()
@ -394,7 +394,7 @@ fn inner_treeify(
return Ok(g);
pub fn treeify(
pub fn p_treeify(
mut g: Token,
) -> Result<Token, (LineLocation, ParserError)> {
let mut v: VecDeque<Token> = VecDeque::new();
Reference in New Issue
Block a user