Split tokenizer

pull/2/head
Mark 2023-03-24 23:06:44 -07:00
parent cafe1c9c64
commit 8430b0d15a
Signed by: Mark
GPG Key ID: AD62BB059C2AAEE4
6 changed files with 200 additions and 206 deletions

View File

@ -73,7 +73,7 @@ fn main() -> Result<(), std::io::Error> {
RawTerminal::suspend_raw_mode(&stdout)?; RawTerminal::suspend_raw_mode(&stdout)?;
write!(stdout, "\n")?; write!(stdout, "\n")?;
let g = parser::parse(&s); let g = parser::evaluate(&s);
RawTerminal::activate_raw_mode(&stdout)?; RawTerminal::activate_raw_mode(&stdout)?;
match g { match g {

View File

@ -1,10 +1,12 @@
mod tokenize; mod tokenize;
mod treeify; mod treeify;
mod groupify;
mod evaluate; mod evaluate;
use crate::parser::tokenize::tokenize; use crate::parser::tokenize::p_tokenize;
use crate::parser::treeify::treeify; use crate::parser::groupify::p_groupify;
use crate::parser::evaluate::evaluate; use crate::parser::treeify::p_treeify;
use crate::parser::evaluate::p_evaluate;
use std::collections::VecDeque; use std::collections::VecDeque;
@ -29,6 +31,8 @@ pub enum Token {
/// Each of these will become one of the operators below. /// Each of these will become one of the operators below.
PreOperator(LineLocation, Operator), PreOperator(LineLocation, Operator),
PreGroupStart(LineLocation),
PreGroupEnd(LineLocation),
/// Used only until operators are parsed. /// Used only until operators are parsed.
/// PreGroups aren't needed once we have a tree. /// PreGroups aren't needed once we have a tree.
PreGroup(LineLocation, VecDeque<Token>), PreGroup(LineLocation, VecDeque<Token>),
@ -212,7 +216,6 @@ pub struct LineLocation {
/// If we cannot parse a string, one of these is returned. /// If we cannot parse a string, one of these is returned.
#[derive(Debug)] #[derive(Debug)]
pub enum ParserError { pub enum ParserError {
InvalidChar,
MissingCloseParen, MissingCloseParen,
ExtraCloseParen, ExtraCloseParen,
EmptyGroup, EmptyGroup,
@ -233,11 +236,12 @@ pub enum ParserError {
/// *what* the error is. /// *what* the error is.
/// ///
/// - `Ok(Token)` otherwise, where `Token` is the top of an expression tree. /// - `Ok(Token)` otherwise, where `Token` is the top of an expression tree.
pub fn parse(s: &String) -> Result<Token, (LineLocation, ParserError)> { pub fn evaluate(s: &String) -> Result<Token, (LineLocation, ParserError)> {
let mut g: Token = tokenize(s)?; let tokens = p_tokenize(s);
g = treeify(g)?; let mut g = p_groupify(tokens)?;
g = evaluate(g)?; g = p_treeify(g)?;
g = p_evaluate(g)?;
return Ok(g); return Ok(g);
} }

View File

@ -26,7 +26,7 @@ fn get_at_coords<'a>(g: &'a mut Token, coords: &Vec<usize>) -> &'a mut Token {
} }
pub fn evaluate( pub fn p_evaluate(
mut g: Token, mut g: Token,
) -> Result<Token, (LineLocation, ParserError)> { ) -> Result<Token, (LineLocation, ParserError)> {
let mut coords: Vec<usize> = Vec::with_capacity(16); let mut coords: Vec<usize> = Vec::with_capacity(16);
@ -89,6 +89,8 @@ pub fn evaluate(
Token::PreWord(_,_) | Token::PreWord(_,_) |
Token::PreOperator(_,_) | Token::PreOperator(_,_) |
Token::PreGroup(_,_) | Token::PreGroup(_,_) |
Token::PreGroupStart(_) |
Token::PreGroupEnd(_) |
Token::Root(_) Token::Root(_)
=> panic!() => panic!()
}; };

145
src/parser/groupify.rs Normal file
View File

@ -0,0 +1,145 @@
use std::collections::VecDeque;
use crate::parser::Token;
use crate::parser::LineLocation;
use crate::parser::ParserError;
use crate::parser::Operator;
/// Looks backwards at the elements of g.
/// - Inserts ImplicitMultiply
/// - Removes multiple PreNegatives
/// - Applies PreNegative to Numbers
/// - Parses factorials
/// - Checks syntax
#[inline(always)]
fn lookback(
g: &mut VecDeque<Token>
) -> Result<(), (LineLocation, ParserError)> {
if g.len() >= 2 {
let b: Token = g.pop_back().unwrap();
let a: Token = g.pop_back().unwrap();
match (&a, &b) {
// Insert ImplicitMultiply
(Token::PreGroup(_,_), Token::PreGroup(l ,_)) |
(Token::PreGroup(_,_), Token::Number(l,_)) |
(Token::Number(_,_), Token::PreGroup(l,_)) |
(Token::Constant(_,_,_), Token::Number(l,_)) |
(Token::Number(_,_), Token::Constant(l,_,_)) |
(Token::Constant(_,_,_), Token::PreGroup(l,_)) |
(Token::PreGroup(_,_), Token::Constant(l,_,_)) |
(Token::Constant(_,_,_), Token::Constant(l,_,_))
=> {
g.push_back(a);
let LineLocation { pos: i, .. } = l;
g.push_back(Token::PreOperator(
LineLocation{pos: i-1, len: 0},
Operator::ImplicitMultiply
));
g.push_back(b);
},
// The following are syntax errors
(Token::Number(la, _), Token::Number(lb,_))
=> {
let LineLocation { pos: posa, .. } = *la;
let LineLocation { pos: posb, len: lenb } = *lb;
return Err((
LineLocation{pos: posa, len: posb - posa + lenb},
ParserError::Syntax
));
}
// The following are fine
(Token::PreOperator(_,_), _) |
(_, Token::PreOperator(_,_))
=> { g.push_back(a); g.push_back(b); },
// If we get this far, we found a Token
// that shouldn't be here.
_ => panic!()
}
};
return Ok(());
}
pub fn p_groupify(mut g: VecDeque<Token>) -> Result<Token, (LineLocation, ParserError)> {
// Vector of grouping levels
let mut levels: Vec<(LineLocation, VecDeque<Token>)> = Vec::with_capacity(8);
levels.push((LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8)));
// Makes sure parenthesis are matched
let mut i_level = 0;
while g.len() > 0 {
let t = g.pop_front().unwrap();
let (l_now, v_now) = levels.last_mut().unwrap();
match &t {
Token::PreOperator(_, _) => {
v_now.push_back(t);
lookback(v_now)?;
},
Token::PreNumber(l, s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => return Err((*l, ParserError::BadNumber))
};
v_now.push_back(Token::Number(*l, n));
lookback(v_now)?;
},
Token::PreWord(l, s) => {
v_now.push_back(match &s[..] {
"mod" => { Token::PreOperator(*l, Operator::ModuloLong) },
"pi" => { Token::Constant(*l, 3.141592653, String::from("π")) },
_ => { return Err((*l, ParserError::Syntax)); }
});
lookback(v_now)?;
},
Token::PreGroupStart(l) => {
levels.push((*l, VecDeque::with_capacity(8)));
i_level += 1;
},
Token::PreGroupEnd(l) => {
let LineLocation{pos: posa, ..} = *l_now;
let LineLocation{pos: posb, len: lenb} = l;
let l = LineLocation {
pos: posa,
len: lenb + posb - posa
};
if i_level == 0 {
return Err((l, ParserError::ExtraCloseParen))
}
i_level -= 1;
// Catch empty groups
if v_now.len() == 0 {
return Err((l, ParserError::EmptyGroup))
}
let (_, v) = levels.pop().unwrap();
let (_, v_now) = levels.last_mut().unwrap();
v_now.push_back(Token::PreGroup(l, v));
lookback(v_now)?;
},
_ => panic!()
}
}
if levels.len() != 1 {
let (l, _) = levels.pop().unwrap();
return Err((l, ParserError::MissingCloseParen))
}
let (_, v) = levels.pop().unwrap();
return Ok(Token::Root(v));
}

View File

@ -2,7 +2,6 @@ use std::collections::VecDeque;
use crate::parser::Token; use crate::parser::Token;
use crate::parser::LineLocation; use crate::parser::LineLocation;
use crate::parser::ParserError;
use crate::parser::Operator; use crate::parser::Operator;
/// Updates the length of a Token's LineLocation. /// Updates the length of a Token's LineLocation.
@ -10,7 +9,8 @@ use crate::parser::Operator;
#[inline(always)] #[inline(always)]
fn update_line_location(mut t: Token, stop_i: usize) -> Token { fn update_line_location(mut t: Token, stop_i: usize) -> Token {
match t { match t {
Token::PreGroup(ref mut l, _) | Token::PreGroupStart(ref mut l) |
Token::PreGroupEnd(ref mut l) |
Token::PreOperator(ref mut l, _) | Token::PreOperator(ref mut l, _) |
Token::PreNumber(ref mut l, _) | Token::PreNumber(ref mut l, _) |
Token::PreWord(ref mut l, _) Token::PreWord(ref mut l, _)
@ -27,137 +27,20 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token {
return t; return t;
} }
/// Looks backwards at the elements of g.
/// - Inserts ImplicitMultiply
/// - Removes multiple PreNegatives
/// - Applies PreNegative to Numbers
/// - Parses factorials
/// - Checks syntax
#[inline(always)]
fn lookback(
g: &mut VecDeque<Token>
) -> Result<(), (LineLocation, ParserError)> {
if g.len() >= 2 {
let b: Token = g.pop_back().unwrap();
let a: Token = g.pop_back().unwrap();
match (&a, &b) {
// Insert ImplicitMultiply
(Token::PreGroup(_,_), Token::PreGroup(l ,_)) |
(Token::PreGroup(_,_), Token::Number(l,_)) |
(Token::Number(_,_), Token::PreGroup(l,_)) |
(Token::Constant(_,_,_), Token::Number(l,_)) |
(Token::Number(_,_), Token::Constant(l,_,_)) |
(Token::Constant(_,_,_), Token::PreGroup(l,_)) |
(Token::PreGroup(_,_), Token::Constant(l,_,_)) |
(Token::Constant(_,_,_), Token::Constant(l,_,_))
=> {
g.push_back(a);
let LineLocation { pos: i, .. } = l;
g.push_back(Token::PreOperator(
LineLocation{pos: i-1, len: 0},
Operator::ImplicitMultiply
));
g.push_back(b);
},
// The following are syntax errors
(Token::Number(la, _), Token::Number(lb,_))
=> {
let LineLocation { pos: posa, .. } = *la;
let LineLocation { pos: posb, len: lenb } = *lb;
return Err((
LineLocation{pos: posa, len: posb - posa + lenb},
ParserError::Syntax
));
}
// The following are fine
(Token::PreOperator(_,_), _) |
(_, Token::PreOperator(_,_))
=> { g.push_back(a); g.push_back(b); },
// If we get this far, we found a Token
// that shouldn't be here.
_ => panic!()
}
};
return Ok(());
}
/// Pushes (and potentially processes) a token we just read to a vector.
/// - Converts all `PreNumbers` to `Numbers`, returning a BadNumber error if necessary
/// - Converts all `PreWords` to other tokens.
fn push_token(
g_now: &mut VecDeque<Token>,
i: usize,
t: Option<Token>
) -> Result<(), (LineLocation, ParserError)>{
if t.is_none() {
return Ok(());
} else {
let t: Token = update_line_location(t.unwrap(), i);
g_now.push_back(match t {
Token::PreNumber(l, s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => return Err((l, ParserError::BadNumber))
};
Token::Number(l, n)
},
Token::PreWord(l, s) => {
if s == "mod" {
Token::PreOperator(l, Operator::ModuloLong)
} else if s == "pi" {
Token::Constant(l, 3.141592653, String::from("π"))
} else {
return Err((l, ParserError::Syntax));
}
},
Token::PreOperator(_, _) => t,
Token::PreGroup(_, _) => t,
_ => panic!()
});
lookback(g_now)?;
}
return Ok(());
}
/// Turns a string into Tokens. First stage of parsing. /// Turns a string into Tokens. First stage of parsing.
pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> { pub fn p_tokenize(input: &String) -> VecDeque<Token> {
let mut t: Option<Token> = None; // The current token we're reading let mut t: Option<Token> = None; // The current token we're reading
let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels" let mut g: VecDeque<Token> = VecDeque::with_capacity(32);
let mut i_level = 0;
g.push(Token::PreGroup(LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8)));
for (i, c) in input.chars().enumerate() { for (i, c) in input.chars().enumerate() {
// The grouping level we're on now
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
};
match c { match c {
'!' => {
push_token(g_now, i, t)?;
t = Some(Token::PreOperator(
LineLocation{pos: i, len: 1},
Operator::Factorial
));
},
// The minus sign can be both a Negative and an Operator. // The minus sign can be both a Negative and an Operator.
// Needs special treatment. // Needs special treatment.
'-' => { '-' => {
push_token(g_now, i, t)?; if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
match g_now.back() { match g.back() {
// If previous token was any of the following, // If previous token was any of the following,
// this is the "minus" operator // this is the "minus" operator
Some(Token::Number(_, _)) | Some(Token::Number(_, _)) |
@ -192,42 +75,28 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
// If we're not building a number, finalize // If we're not building a number, finalize
// previous token and start one. // previous token and start one.
_ => { _ => {
push_token(g_now, i, t)?; if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c))); t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c)));
} }
}; };
}, },
// Word
'A'..='Z' |
'a'..='z' => {
match &mut t {
Some(Token::PreWord(_, val)) => {
val.push(c);
},
_ => {
push_token(g_now, i, t)?;
t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
},
// Operator // Operator
// Always one character // Always one character
'*'|'×'| '*'|'×'|
'/'|'÷'| '/'|'÷'|
'+'|'%'|'^' => { '+'|'%'|
push_token(g_now, i, t)?; '^'|'!' => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreOperator( t = Some(Token::PreOperator(
LineLocation{pos: i, len: 1}, LineLocation{pos: i, len: 0},
match c { match c {
'^' => Operator::Power, '^' => Operator::Power,
'%' => Operator::Modulo, '%' => Operator::Modulo,
'*'|'×' => Operator::Multiply, '*'|'×' => Operator::Multiply,
'/'|'÷' => Operator::Divide, '/'|'÷' => Operator::Divide,
'+' => Operator::Add, '+' => Operator::Add,
'!' => Operator::Factorial,
_ => panic!() _ => panic!()
} }
)); ));
@ -235,67 +104,41 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
// Group // Group
'(' => { '(' => {
push_token(g_now, i, t)?; t = None; if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8))); t = Some(Token::PreGroupStart(LineLocation{pos: i, len: 0}));
i_level += 1;
}, },
')' => { ')' => {
// Catch extra close parens if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
if i_level == 0 { t = Some(Token::PreGroupEnd(LineLocation{pos: i, len: 0}));
return Err((
LineLocation{pos: i, len: 1},
ParserError::ExtraCloseParen
))
}
i_level -= 1;
// Catch empty groups
if t.is_none() {
let mut last = g.pop().unwrap();
last = update_line_location(last, i+1);
let Token::PreGroup(l, _) = last else {panic!()};
return Err((
l,
ParserError::EmptyGroup
))
}
push_token(g_now, i, t)?;
t = Some(g.pop().unwrap());
}, },
// Space. Basic seperator. // Space. Basic seperator.
' ' => { ' ' => {
push_token(g_now, i, t)?; t = None; if t.is_some() {
g.push_back(update_line_location(t.unwrap(), i));
t = None;
}
} }
// Invalid character // Word
_ => { return Err((LineLocation{pos: i, len: 1}, ParserError::InvalidChar)); } //'A'..='Z' |
//'a'..='z'
_ => {
match &mut t {
Some(Token::PreWord(_, val)) => {
val.push(c);
},
_ => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
}
}; };
} }
if t.is_some() { g.push_back(update_line_location(t.unwrap(), input.len())); }
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
};
push_token(g_now, input.len(), t)?;
if g.len() != 1 { return g;
let q: LineLocation = match g.last_mut().unwrap() {
Token::PreGroup(l, _) => *l,
_ => panic!()
};
let LineLocation{pos:p, ..} = q;
return Err((
LineLocation{
pos: p,
len: input.len() - p
},
ParserError::MissingCloseParen
))
}
return Ok(g.pop().unwrap());
} }

View File

@ -346,7 +346,7 @@ fn inner_treeify(
) -> Result<Token, (LineLocation, ParserError)> { ) -> Result<Token, (LineLocation, ParserError)> {
let g_inner: &mut VecDeque<Token> = match g { let g_inner: &mut VecDeque<Token> = match g {
Token::PreGroup(_, ref mut x) => x, Token::Root(ref mut x) => x,
_ => panic!() _ => panic!()
}; };
@ -394,7 +394,7 @@ fn inner_treeify(
return Ok(g); return Ok(g);
} }
pub fn treeify( pub fn p_treeify(
mut g: Token, mut g: Token,
) -> Result<Token, (LineLocation, ParserError)> { ) -> Result<Token, (LineLocation, ParserError)> {
let mut v: VecDeque<Token> = VecDeque::new(); let mut v: VecDeque<Token> = VecDeque::new();