From 9f53847ed3c3e5bf11d752bdd3b8e1ed01ff0a2d Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 22 Mar 2023 15:06:58 -0700 Subject: [PATCH] Added lookback parsing --- src/parser.rs | 11 ++- src/parser/tokenize.rs | 149 +++++++++++++++++++++-------------------- src/parser/treeify.rs | 9 +-- 3 files changed, 84 insertions(+), 85 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index ff919e8..b97b636 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -30,14 +30,16 @@ pub enum Token { /// PreGroups aren't needed once we have a tree. PreGroup(LineLocation, VecDeque), + PreNegative(LineLocation), + PreFactorial(LineLocation), Number(LineLocation, f64), Multiply(VecDeque), Divide(VecDeque), Add(VecDeque), Subtract(VecDeque), - Factorial(VecDeque), - Negative(VecDeque), + //Factorial(VecDeque), + //Negative(VecDeque), Power(VecDeque), Modulo(VecDeque), } @@ -56,9 +58,7 @@ pub enum Operators { Multiply, ImplicitMultiply, Modulo, // Mod invoked with % - Power, - Negative, - Factorial, + Power } /// Specifies the location of a token in an input string. @@ -77,7 +77,6 @@ pub enum ParserError { InvalidChar, MissingCloseParen, Syntax, - InvalidImplicitMultiply, BadNumber } diff --git a/src/parser/tokenize.rs b/src/parser/tokenize.rs index 78789a6..d049454 100644 --- a/src/parser/tokenize.rs +++ b/src/parser/tokenize.rs @@ -13,7 +13,10 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token { Token::PreGroup(ref mut l, _) | Token::PreOperator(ref mut l, _) | Token::PreNumber(ref mut l, _) | - Token::PreWord(ref mut l, _) => { + Token::PreWord(ref mut l, _) | + Token::PreNegative(ref mut l) | + Token::PreFactorial(ref mut l) + => { let LineLocation{pos, .. } = l; *l = LineLocation{ pos: *pos, @@ -27,30 +30,33 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token { } -/// Look at the last two elements of `g`: -/// - if one is an operator, do nothing. -/// - if they are a valid implicit multiplication pair, add an ImplicitMultiply between them -/// - if they aren't, throw an error. +/// Looks backwards at the elements of g. +/// - Inserts ImplicitMultiply +/// - Removes multiple PreNegatives +/// - Applies PreNegative to Numbers +/// - Parses factorials +/// - Checks syntax #[inline(always)] -fn insert_implicit( +fn lookback( g: &mut VecDeque ) -> Result<(), (LineLocation, ParserError)> { if g.len() >= 2 { let b: Token = g.pop_back().unwrap(); - let a: &Token = g.back().unwrap(); + let a: Token = g.pop_back().unwrap(); - match (a, &b) { + match (&a, &b) { - // Not implicit multiplication, ignore - (Token::PreOperator(_,_), _) | - (_, Token::PreOperator(_,_)) - => { g.push_back(b); }, + ( // Delete consecutive negatives + Token::PreNegative(_), + Token::PreNegative(_) + ) => {}, - // Valid implicit multiplications - (Token::PreGroup(_,_), Token::PreGroup(ref l,_)) | - (Token::PreGroup(_,_), Token::Number(ref l,_)) | - (Token::Number(_,_), Token::PreGroup(ref l,_)) + // Insert ImplicitMultiply + (Token::PreGroup(_,_), Token::PreGroup(l ,_)) | + (Token::PreGroup(_,_), Token::Number(l,_)) | + (Token::Number(_,_), Token::PreGroup(l,_)) => { + g.push_back(a); let LineLocation { pos: i, .. } = l; g.push_back(Token::PreOperator( LineLocation{pos: i-1, len: 0}, @@ -59,16 +65,35 @@ fn insert_implicit( g.push_back(b); }, - // Invalid implicit multiplications - (Token::Number(_,_), Token::Number(l,_)) + // The following are syntax errors + (Token::PreOperator(la,_), Token::PreOperator(lb,_)) | + (Token::Number(la, _), Token::Number(lb,_)) | + (Token::PreNegative(la), Token::PreOperator(lb,_)) | + (Token::PreOperator(la, _), Token::PreFactorial(lb)) | + (Token::PreNegative(la), Token::PreFactorial(lb)) => { - let LineLocation { pos: i, .. } = l; + let LineLocation { pos: posa, .. } = *la; + let LineLocation { pos: posb, len: lenb } = *lb; return Err(( - LineLocation{pos: i-1, len: 2}, - ParserError::InvalidImplicitMultiply + LineLocation{pos: posa, len: posb - posa + lenb}, + ParserError::Syntax )); - }, + } + // The following are fine + (Token::PreOperator(_,_), Token::PreNegative(_)) | + (Token::PreOperator(_,_), Token::Number(_,_)) | + (Token::Number(_,_), Token::PreOperator(_,_)) | + (Token::PreOperator(_,_), Token::PreGroup(_,_)) | + (Token::PreGroup(_,_), Token::PreOperator(_,_)) | + (Token::PreNegative(_), Token::PreGroup(_,_)) | + (Token::PreNegative(_), Token::Number(_,_)) | + (Token::PreGroup(_,_), Token::PreFactorial(_)) | + (Token::Number(_,_), Token::PreFactorial(_)) + => { g.push_back(a); g.push_back(b); }, + + // If we get this far, we found a Token + // that shouldn't be here. _ => panic!() } }; @@ -105,9 +130,13 @@ fn push_token( } }, Token::PreOperator(_, _) => t, + Token::PreGroup(_, _) => t, + Token::PreNegative(_) => t, + Token::PreFactorial(_) => t, _ => panic!() }); - insert_implicit(g_now)?; + + lookback(g_now)?; } return Ok(()); } @@ -130,41 +159,29 @@ pub fn tokenize(input: &String) -> Result { match c { '!' => { - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } - g_now.push_back( - Token::PreOperator( - LineLocation{pos: i, len: 1}, - Operators::Factorial - ) - ); + push_token(g_now, i, t)?; + t = Some(Token::PreFactorial(LineLocation{pos: i, len: 1})); }, // The minus sign can be both a Negative and an Operator. // Needs special treatment. '-' => { - push_token(g_now, i, t)?; t = None; + push_token(g_now, i, t)?; match g_now.back() { // If previous token was any of the following, // this is the "minus" operator - Some(Token::PreNumber(_, _)) | + Some(Token::Number(_, _)) | Some(Token::PreGroup(_, _)) | Some(Token::PreWord(_, _)) => { - g_now.push_back( - Token::PreOperator( - LineLocation{pos: i, len: 1}, - Operators::Subtract - ) - ); + t = Some(Token::PreOperator( + LineLocation{pos: i, len: 1}, + Operators::Subtract + )); }, // Otherwise, this is a negative sign. _ => { - g_now.push_back( - Token::PreOperator( - LineLocation{pos: i, len: 1}, - Operators::Negative - ) - ); + t = Some(Token::PreNegative(LineLocation{pos: i, len: 1})); } }; }, @@ -192,16 +209,12 @@ pub fn tokenize(input: &String) -> Result { 'A'..='Z' | 'a'..='z' => { match &mut t { - // If we're already building a number, - // append. Some(Token::PreWord(_, val)) => { val.push(c); }, - // If we're not building a number, finalize - // previous token and start one. _ => { - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); } + push_token(g_now, i, t)?; t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c))); } }; @@ -210,21 +223,18 @@ pub fn tokenize(input: &String) -> Result { // Operator // Always one character '+' | '*' | '/' | '^' | '%' => { - // Finalize previous token - push_token(g_now, i, t)?; t = None; - g_now.push_back( - Token::PreOperator( - LineLocation{pos: i, len: 1}, - match c { - '^' => Operators::Power, - '%' => Operators::Modulo, - '*' => Operators::Multiply, - '/' => Operators::Divide, - '+' => Operators::Add, - _ => panic!() - } - ) - ); + push_token(g_now, i, t)?; + t = Some(Token::PreOperator( + LineLocation{pos: i, len: 1}, + match c { + '^' => Operators::Power, + '%' => Operators::Modulo, + '*' => Operators::Multiply, + '/' => Operators::Divide, + '+' => Operators::Add, + _ => panic!() + } + )); } // Group @@ -233,15 +243,8 @@ pub fn tokenize(input: &String) -> Result { g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8))); }, ')' => { - push_token(g_now, i, t)?; t = None; - let new_group: Token = g.pop().unwrap(); - - let g_now: &mut VecDeque = match g.last_mut().unwrap() { - Token::PreGroup(_, ref mut x) => x, - _ => panic!() - }; - - g_now.push_back(update_line_location(new_group, i+1)); + push_token(g_now, i, t)?; + t = Some(g.pop().unwrap()); }, // Space. Basic seperator. diff --git a/src/parser/treeify.rs b/src/parser/treeify.rs index 91253c5..9e1a9d5 100644 --- a/src/parser/treeify.rs +++ b/src/parser/treeify.rs @@ -14,7 +14,6 @@ pub fn treeify( Token::PreGroup(_, ref mut x) => x, _ => panic!() }; - let mut new: VecDeque = VecDeque::with_capacity(8); let mut i = 1; while g_inner.len() > 1 { @@ -53,8 +52,8 @@ pub fn treeify( let this = g_inner.remove(i-1).unwrap(); let right = g_inner.remove(i-1).unwrap(); - let (l, k) = match this { - Token::PreOperator(l, k) => (l, k), + let k = match this { + Token::PreOperator(_, k) => k, _ => panic!() }; @@ -71,10 +70,8 @@ pub fn treeify( Operators::Multiply => Token::Multiply(new_token_args), Operators::ImplicitMultiply => Token::Multiply(new_token_args), Operators::Modulo => Token::Modulo(new_token_args), - Operators::Power => Token::Power(new_token_args), Operators::ModuloLong => Token::Modulo(new_token_args), - Operators::Negative => panic!(), - Operators::Factorial => panic!(), + Operators::Power => Token::Power(new_token_args), Operators::Null => panic!() } );