From 8430b0d15af5f3c27331d1bbb504c149518a4bb4 Mon Sep 17 00:00:00 2001 From: Mark Date: Fri, 24 Mar 2023 23:06:44 -0700 Subject: [PATCH] Split tokenizer --- src/main.rs | 2 +- src/parser.rs | 20 ++-- src/parser/evaluate.rs | 4 +- src/parser/groupify.rs | 145 ++++++++++++++++++++++++++ src/parser/tokenize.rs | 231 +++++++---------------------------------- src/parser/treeify.rs | 4 +- 6 files changed, 200 insertions(+), 206 deletions(-) create mode 100644 src/parser/groupify.rs diff --git a/src/main.rs b/src/main.rs index 2578b87..e37c381 100644 --- a/src/main.rs +++ b/src/main.rs @@ -73,7 +73,7 @@ fn main() -> Result<(), std::io::Error> { RawTerminal::suspend_raw_mode(&stdout)?; write!(stdout, "\n")?; - let g = parser::parse(&s); + let g = parser::evaluate(&s); RawTerminal::activate_raw_mode(&stdout)?; match g { diff --git a/src/parser.rs b/src/parser.rs index 1d820f3..10b2250 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,10 +1,12 @@ mod tokenize; mod treeify; +mod groupify; mod evaluate; -use crate::parser::tokenize::tokenize; -use crate::parser::treeify::treeify; -use crate::parser::evaluate::evaluate; +use crate::parser::tokenize::p_tokenize; +use crate::parser::groupify::p_groupify; +use crate::parser::treeify::p_treeify; +use crate::parser::evaluate::p_evaluate; use std::collections::VecDeque; @@ -29,6 +31,8 @@ pub enum Token { /// Each of these will become one of the operators below. PreOperator(LineLocation, Operator), + PreGroupStart(LineLocation), + PreGroupEnd(LineLocation), /// Used only until operators are parsed. /// PreGroups aren't needed once we have a tree. PreGroup(LineLocation, VecDeque), @@ -212,7 +216,6 @@ pub struct LineLocation { /// If we cannot parse a string, one of these is returned. #[derive(Debug)] pub enum ParserError { - InvalidChar, MissingCloseParen, ExtraCloseParen, EmptyGroup, @@ -233,11 +236,12 @@ pub enum ParserError { /// *what* the error is. /// /// - `Ok(Token)` otherwise, where `Token` is the top of an expression tree. -pub fn parse(s: &String) -> Result { +pub fn evaluate(s: &String) -> Result { - let mut g: Token = tokenize(s)?; - g = treeify(g)?; - g = evaluate(g)?; + let tokens = p_tokenize(s); + let mut g = p_groupify(tokens)?; + g = p_treeify(g)?; + g = p_evaluate(g)?; return Ok(g); } \ No newline at end of file diff --git a/src/parser/evaluate.rs b/src/parser/evaluate.rs index b495be6..5354bba 100644 --- a/src/parser/evaluate.rs +++ b/src/parser/evaluate.rs @@ -26,7 +26,7 @@ fn get_at_coords<'a>(g: &'a mut Token, coords: &Vec) -> &'a mut Token { } -pub fn evaluate( +pub fn p_evaluate( mut g: Token, ) -> Result { let mut coords: Vec = Vec::with_capacity(16); @@ -89,6 +89,8 @@ pub fn evaluate( Token::PreWord(_,_) | Token::PreOperator(_,_) | Token::PreGroup(_,_) | + Token::PreGroupStart(_) | + Token::PreGroupEnd(_) | Token::Root(_) => panic!() }; diff --git a/src/parser/groupify.rs b/src/parser/groupify.rs new file mode 100644 index 0000000..8a808e6 --- /dev/null +++ b/src/parser/groupify.rs @@ -0,0 +1,145 @@ +use std::collections::VecDeque; + +use crate::parser::Token; +use crate::parser::LineLocation; +use crate::parser::ParserError; +use crate::parser::Operator; + +/// Looks backwards at the elements of g. +/// - Inserts ImplicitMultiply +/// - Removes multiple PreNegatives +/// - Applies PreNegative to Numbers +/// - Parses factorials +/// - Checks syntax +#[inline(always)] +fn lookback( + g: &mut VecDeque +) -> Result<(), (LineLocation, ParserError)> { + if g.len() >= 2 { + let b: Token = g.pop_back().unwrap(); + let a: Token = g.pop_back().unwrap(); + + match (&a, &b) { + // Insert ImplicitMultiply + (Token::PreGroup(_,_), Token::PreGroup(l ,_)) | + (Token::PreGroup(_,_), Token::Number(l,_)) | + (Token::Number(_,_), Token::PreGroup(l,_)) | + (Token::Constant(_,_,_), Token::Number(l,_)) | + (Token::Number(_,_), Token::Constant(l,_,_)) | + (Token::Constant(_,_,_), Token::PreGroup(l,_)) | + (Token::PreGroup(_,_), Token::Constant(l,_,_)) | + (Token::Constant(_,_,_), Token::Constant(l,_,_)) + => { + g.push_back(a); + let LineLocation { pos: i, .. } = l; + g.push_back(Token::PreOperator( + LineLocation{pos: i-1, len: 0}, + Operator::ImplicitMultiply + )); + g.push_back(b); + }, + + // The following are syntax errors + (Token::Number(la, _), Token::Number(lb,_)) + => { + let LineLocation { pos: posa, .. } = *la; + let LineLocation { pos: posb, len: lenb } = *lb; + return Err(( + LineLocation{pos: posa, len: posb - posa + lenb}, + ParserError::Syntax + )); + } + + // The following are fine + (Token::PreOperator(_,_), _) | + (_, Token::PreOperator(_,_)) + => { g.push_back(a); g.push_back(b); }, + + // If we get this far, we found a Token + // that shouldn't be here. + _ => panic!() + } + }; + return Ok(()); +} + + +pub fn p_groupify(mut g: VecDeque) -> Result { + // Vector of grouping levels + let mut levels: Vec<(LineLocation, VecDeque)> = Vec::with_capacity(8); + levels.push((LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8))); + + // Makes sure parenthesis are matched + let mut i_level = 0; + + while g.len() > 0 { + let t = g.pop_front().unwrap(); + let (l_now, v_now) = levels.last_mut().unwrap(); + + match &t { + Token::PreOperator(_, _) => { + v_now.push_back(t); + lookback(v_now)?; + }, + + Token::PreNumber(l, s) => { + let n = match s.parse() { + Ok(n) => n, + Err(_) => return Err((*l, ParserError::BadNumber)) + }; + v_now.push_back(Token::Number(*l, n)); + lookback(v_now)?; + }, + + Token::PreWord(l, s) => { + v_now.push_back(match &s[..] { + "mod" => { Token::PreOperator(*l, Operator::ModuloLong) }, + "pi" => { Token::Constant(*l, 3.141592653, String::from("π")) }, + _ => { return Err((*l, ParserError::Syntax)); } + }); + lookback(v_now)?; + }, + + Token::PreGroupStart(l) => { + levels.push((*l, VecDeque::with_capacity(8))); + i_level += 1; + }, + + Token::PreGroupEnd(l) => { + let LineLocation{pos: posa, ..} = *l_now; + let LineLocation{pos: posb, len: lenb} = l; + + let l = LineLocation { + pos: posa, + len: lenb + posb - posa + }; + + if i_level == 0 { + return Err((l, ParserError::ExtraCloseParen)) + } + i_level -= 1; + + // Catch empty groups + if v_now.len() == 0 { + return Err((l, ParserError::EmptyGroup)) + } + + let (_, v) = levels.pop().unwrap(); + let (_, v_now) = levels.last_mut().unwrap(); + + v_now.push_back(Token::PreGroup(l, v)); + lookback(v_now)?; + }, + + _ => panic!() + } + } + + if levels.len() != 1 { + let (l, _) = levels.pop().unwrap(); + return Err((l, ParserError::MissingCloseParen)) + } + + let (_, v) = levels.pop().unwrap(); + return Ok(Token::Root(v)); +} \ No newline at end of file diff --git a/src/parser/tokenize.rs b/src/parser/tokenize.rs index 93d4570..fef7524 100644 --- a/src/parser/tokenize.rs +++ b/src/parser/tokenize.rs @@ -2,7 +2,6 @@ use std::collections::VecDeque; use crate::parser::Token; use crate::parser::LineLocation; -use crate::parser::ParserError; use crate::parser::Operator; /// Updates the length of a Token's LineLocation. @@ -10,7 +9,8 @@ use crate::parser::Operator; #[inline(always)] fn update_line_location(mut t: Token, stop_i: usize) -> Token { match t { - Token::PreGroup(ref mut l, _) | + Token::PreGroupStart(ref mut l) | + Token::PreGroupEnd(ref mut l) | Token::PreOperator(ref mut l, _) | Token::PreNumber(ref mut l, _) | Token::PreWord(ref mut l, _) @@ -27,137 +27,20 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token { return t; } - -/// Looks backwards at the elements of g. -/// - Inserts ImplicitMultiply -/// - Removes multiple PreNegatives -/// - Applies PreNegative to Numbers -/// - Parses factorials -/// - Checks syntax -#[inline(always)] -fn lookback( - g: &mut VecDeque -) -> Result<(), (LineLocation, ParserError)> { - if g.len() >= 2 { - let b: Token = g.pop_back().unwrap(); - let a: Token = g.pop_back().unwrap(); - - match (&a, &b) { - // Insert ImplicitMultiply - (Token::PreGroup(_,_), Token::PreGroup(l ,_)) | - (Token::PreGroup(_,_), Token::Number(l,_)) | - (Token::Number(_,_), Token::PreGroup(l,_)) | - (Token::Constant(_,_,_), Token::Number(l,_)) | - (Token::Number(_,_), Token::Constant(l,_,_)) | - (Token::Constant(_,_,_), Token::PreGroup(l,_)) | - (Token::PreGroup(_,_), Token::Constant(l,_,_)) | - (Token::Constant(_,_,_), Token::Constant(l,_,_)) - => { - g.push_back(a); - let LineLocation { pos: i, .. } = l; - g.push_back(Token::PreOperator( - LineLocation{pos: i-1, len: 0}, - Operator::ImplicitMultiply - )); - g.push_back(b); - }, - - // The following are syntax errors - (Token::Number(la, _), Token::Number(lb,_)) - => { - let LineLocation { pos: posa, .. } = *la; - let LineLocation { pos: posb, len: lenb } = *lb; - return Err(( - LineLocation{pos: posa, len: posb - posa + lenb}, - ParserError::Syntax - )); - } - - // The following are fine - (Token::PreOperator(_,_), _) | - (_, Token::PreOperator(_,_)) - => { g.push_back(a); g.push_back(b); }, - - // If we get this far, we found a Token - // that shouldn't be here. - _ => panic!() - } - }; - return Ok(()); -} - - - -/// Pushes (and potentially processes) a token we just read to a vector. -/// - Converts all `PreNumbers` to `Numbers`, returning a BadNumber error if necessary -/// - Converts all `PreWords` to other tokens. -fn push_token( - g_now: &mut VecDeque, - i: usize, - t: Option -) -> Result<(), (LineLocation, ParserError)>{ - if t.is_none() { - return Ok(()); - } else { - let t: Token = update_line_location(t.unwrap(), i); - g_now.push_back(match t { - Token::PreNumber(l, s) => { - let n = match s.parse() { - Ok(n) => n, - Err(_) => return Err((l, ParserError::BadNumber)) - }; - Token::Number(l, n) - }, - Token::PreWord(l, s) => { - if s == "mod" { - Token::PreOperator(l, Operator::ModuloLong) - } else if s == "pi" { - Token::Constant(l, 3.141592653, String::from("π")) - } else { - return Err((l, ParserError::Syntax)); - } - }, - Token::PreOperator(_, _) => t, - Token::PreGroup(_, _) => t, - _ => panic!() - }); - - lookback(g_now)?; - } - return Ok(()); -} - - /// Turns a string into Tokens. First stage of parsing. -pub fn tokenize(input: &String) -> Result { +pub fn p_tokenize(input: &String) -> VecDeque { let mut t: Option = None; // The current token we're reading - let mut g: Vec = Vec::with_capacity(8); // Vector of "grouping levels" - let mut i_level = 0; - g.push(Token::PreGroup(LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8))); + let mut g: VecDeque = VecDeque::with_capacity(32); for (i, c) in input.chars().enumerate() { - // The grouping level we're on now - let g_now: &mut VecDeque = match g.last_mut().unwrap() { - Token::PreGroup(_, ref mut x) => x, - _ => panic!() - }; - match c { - '!' => { - push_token(g_now, i, t)?; - t = Some(Token::PreOperator( - LineLocation{pos: i, len: 1}, - Operator::Factorial - )); - }, - // The minus sign can be both a Negative and an Operator. // Needs special treatment. '-' => { - push_token(g_now, i, t)?; - match g_now.back() { + if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); } + match g.back() { // If previous token was any of the following, // this is the "minus" operator Some(Token::Number(_, _)) | @@ -192,42 +75,28 @@ pub fn tokenize(input: &String) -> Result { // If we're not building a number, finalize // previous token and start one. _ => { - push_token(g_now, i, t)?; + if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); } t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c))); } }; }, - // Word - 'A'..='Z' | - 'a'..='z' => { - match &mut t { - Some(Token::PreWord(_, val)) => { - val.push(c); - }, - - _ => { - push_token(g_now, i, t)?; - t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c))); - } - }; - }, - // Operator // Always one character - '*'|'×'| '/'|'÷'| - '+'|'%'|'^' => { - push_token(g_now, i, t)?; + '+'|'%'| + '^'|'!' => { + if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); } t = Some(Token::PreOperator( - LineLocation{pos: i, len: 1}, + LineLocation{pos: i, len: 0}, match c { '^' => Operator::Power, '%' => Operator::Modulo, '*'|'×' => Operator::Multiply, '/'|'÷' => Operator::Divide, '+' => Operator::Add, + '!' => Operator::Factorial, _ => panic!() } )); @@ -235,67 +104,41 @@ pub fn tokenize(input: &String) -> Result { // Group '(' => { - push_token(g_now, i, t)?; t = None; - g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8))); - i_level += 1; + if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); } + t = Some(Token::PreGroupStart(LineLocation{pos: i, len: 0})); }, ')' => { - // Catch extra close parens - if i_level == 0 { - return Err(( - LineLocation{pos: i, len: 1}, - ParserError::ExtraCloseParen - )) - } - i_level -= 1; - - // Catch empty groups - if t.is_none() { - let mut last = g.pop().unwrap(); - last = update_line_location(last, i+1); - let Token::PreGroup(l, _) = last else {panic!()}; - return Err(( - l, - ParserError::EmptyGroup - )) - } - - push_token(g_now, i, t)?; - t = Some(g.pop().unwrap()); + if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); } + t = Some(Token::PreGroupEnd(LineLocation{pos: i, len: 0})); }, // Space. Basic seperator. ' ' => { - push_token(g_now, i, t)?; t = None; + if t.is_some() { + g.push_back(update_line_location(t.unwrap(), i)); + t = None; + } } - // Invalid character - _ => { return Err((LineLocation{pos: i, len: 1}, ParserError::InvalidChar)); } + // Word + //'A'..='Z' | + //'a'..='z' + _ => { + match &mut t { + Some(Token::PreWord(_, val)) => { + val.push(c); + }, + + _ => { + if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); } + t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c))); + } + }; + } }; } - - let g_now: &mut VecDeque = match g.last_mut().unwrap() { - Token::PreGroup(_, ref mut x) => x, - _ => panic!() - }; - push_token(g_now, input.len(), t)?; + if t.is_some() { g.push_back(update_line_location(t.unwrap(), input.len())); } - if g.len() != 1 { - let q: LineLocation = match g.last_mut().unwrap() { - Token::PreGroup(l, _) => *l, - _ => panic!() - }; - - let LineLocation{pos:p, ..} = q; - return Err(( - LineLocation{ - pos: p, - len: input.len() - p - }, - ParserError::MissingCloseParen - )) - } - - return Ok(g.pop().unwrap()); + return g; } \ No newline at end of file diff --git a/src/parser/treeify.rs b/src/parser/treeify.rs index e66579c..8760b3f 100644 --- a/src/parser/treeify.rs +++ b/src/parser/treeify.rs @@ -346,7 +346,7 @@ fn inner_treeify( ) -> Result { let g_inner: &mut VecDeque = match g { - Token::PreGroup(_, ref mut x) => x, + Token::Root(ref mut x) => x, _ => panic!() }; @@ -394,7 +394,7 @@ fn inner_treeify( return Ok(g); } -pub fn treeify( +pub fn p_treeify( mut g: Token, ) -> Result { let mut v: VecDeque = VecDeque::new();