From c743d28f6c1f1407c09c5d05cc5d25c09e56886c Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 22 Mar 2023 22:51:48 -0700 Subject: [PATCH] Reworked parser, added support for unary operators --- src/parser.rs | 15 +- src/parser/tokenize.rs | 36 ++-- src/parser/treeify.rs | 376 +++++++++++++++++++++++++++++++++++------ 3 files changed, 343 insertions(+), 84 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index b97b636..d4fbb3e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -30,16 +30,13 @@ pub enum Token { /// PreGroups aren't needed once we have a tree. PreGroup(LineLocation, VecDeque), - PreNegative(LineLocation), - PreFactorial(LineLocation), - Number(LineLocation, f64), Multiply(VecDeque), Divide(VecDeque), Add(VecDeque), Subtract(VecDeque), - //Factorial(VecDeque), - //Negative(VecDeque), + Factorial(VecDeque), + Negative(VecDeque), Power(VecDeque), Modulo(VecDeque), } @@ -50,15 +47,17 @@ pub enum Token { #[derive(Debug)] #[derive(Copy, Clone)] pub enum Operators { - Null = 0, - ModuloLong, // Mod invoked with "mod" + ModuloLong = 0, // Mod invoked with "mod" Subtract, Add, Divide, Multiply, ImplicitMultiply, Modulo, // Mod invoked with % - Power + Power, + + Negative, + Factorial } /// Specifies the location of a token in an input string. diff --git a/src/parser/tokenize.rs b/src/parser/tokenize.rs index d049454..b3f1daa 100644 --- a/src/parser/tokenize.rs +++ b/src/parser/tokenize.rs @@ -13,9 +13,7 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token { Token::PreGroup(ref mut l, _) | Token::PreOperator(ref mut l, _) | Token::PreNumber(ref mut l, _) | - Token::PreWord(ref mut l, _) | - Token::PreNegative(ref mut l) | - Token::PreFactorial(ref mut l) + Token::PreWord(ref mut l, _) => { let LineLocation{pos, .. } = l; *l = LineLocation{ @@ -45,12 +43,6 @@ fn lookback( let a: Token = g.pop_back().unwrap(); match (&a, &b) { - - ( // Delete consecutive negatives - Token::PreNegative(_), - Token::PreNegative(_) - ) => {}, - // Insert ImplicitMultiply (Token::PreGroup(_,_), Token::PreGroup(l ,_)) | (Token::PreGroup(_,_), Token::Number(l,_)) | @@ -66,11 +58,7 @@ fn lookback( }, // The following are syntax errors - (Token::PreOperator(la,_), Token::PreOperator(lb,_)) | - (Token::Number(la, _), Token::Number(lb,_)) | - (Token::PreNegative(la), Token::PreOperator(lb,_)) | - (Token::PreOperator(la, _), Token::PreFactorial(lb)) | - (Token::PreNegative(la), Token::PreFactorial(lb)) + (Token::Number(la, _), Token::Number(lb,_)) => { let LineLocation { pos: posa, .. } = *la; let LineLocation { pos: posb, len: lenb } = *lb; @@ -81,15 +69,11 @@ fn lookback( } // The following are fine - (Token::PreOperator(_,_), Token::PreNegative(_)) | + (Token::PreOperator(_,_), Token::PreOperator(_,_)) | (Token::PreOperator(_,_), Token::Number(_,_)) | (Token::Number(_,_), Token::PreOperator(_,_)) | (Token::PreOperator(_,_), Token::PreGroup(_,_)) | - (Token::PreGroup(_,_), Token::PreOperator(_,_)) | - (Token::PreNegative(_), Token::PreGroup(_,_)) | - (Token::PreNegative(_), Token::Number(_,_)) | - (Token::PreGroup(_,_), Token::PreFactorial(_)) | - (Token::Number(_,_), Token::PreFactorial(_)) + (Token::PreGroup(_,_), Token::PreOperator(_,_)) => { g.push_back(a); g.push_back(b); }, // If we get this far, we found a Token @@ -131,8 +115,6 @@ fn push_token( }, Token::PreOperator(_, _) => t, Token::PreGroup(_, _) => t, - Token::PreNegative(_) => t, - Token::PreFactorial(_) => t, _ => panic!() }); @@ -160,7 +142,10 @@ pub fn tokenize(input: &String) -> Result { match c { '!' => { push_token(g_now, i, t)?; - t = Some(Token::PreFactorial(LineLocation{pos: i, len: 1})); + t = Some(Token::PreOperator( + LineLocation{pos: i, len: 1}, + Operators::Factorial + )); }, // The minus sign can be both a Negative and an Operator. @@ -181,7 +166,10 @@ pub fn tokenize(input: &String) -> Result { // Otherwise, this is a negative sign. _ => { - t = Some(Token::PreNegative(LineLocation{pos: i, len: 1})); + t = Some(Token::PreOperator( + LineLocation{pos: i, len: 1}, + Operators::Negative + )); } }; }, diff --git a/src/parser/treeify.rs b/src/parser/treeify.rs index 9e1a9d5..69fdbdf 100644 --- a/src/parser/treeify.rs +++ b/src/parser/treeify.rs @@ -5,49 +5,103 @@ use crate::parser::LineLocation; use crate::parser::ParserError; use crate::parser::Operators; - -pub fn treeify( - g: &mut Token, -) -> Result<(), (LineLocation, ParserError)> { - - let g_inner: &mut VecDeque = match g { - Token::PreGroup(_, ref mut x) => x, +#[inline(always)] +fn get_line_location(t: &Token) -> &LineLocation { + match t { + Token::PreNumber(l, _) | + Token::PreWord(l, _) | + Token::PreOperator(l, _) | + Token::PreGroup(l, _) + => l, _ => panic!() + } +} + +#[inline(always)] +fn select_op(k: Operators, new_token_args: VecDeque) -> Token { + match k { + Operators::Subtract => Token::Subtract(new_token_args), + Operators::Add => Token::Add(new_token_args), + Operators::Divide => Token::Divide(new_token_args), + Operators::Multiply => Token::Multiply(new_token_args), + Operators::ImplicitMultiply => Token::Multiply(new_token_args), + Operators::Modulo => Token::Modulo(new_token_args), + Operators::ModuloLong => Token::Modulo(new_token_args), + Operators::Power => Token::Power(new_token_args), + Operators::Negative => Token::Negative(new_token_args), + Operators::Factorial => Token::Factorial(new_token_args) + } +} + +fn treeify_binary( + mut i: usize, + g_inner: &mut VecDeque +) -> Result { + + let this: &Token = &g_inner[i]; + let right: &Token = { + if i < g_inner.len()-1 { + &g_inner[i+1] + } else { + // This binary operator is at the end of the expression. + let l = match this { + Token::PreOperator(l, _) => l, + _ => panic!() + }; + return Err((*l, ParserError::Syntax)); + } }; - let mut i = 1; - while g_inner.len() > 1 { - let a: isize; - if i == 1 { - a = Operators::Null as isize; - } else { - let q: Operators = match g_inner[i-2] { - Token::PreOperator(_, x) => x, - _ => panic!() - }; - a = q as isize; - } + if let Token::PreOperator(l, o) = right { + match o { + // Binary operators + Operators::ModuloLong | + Operators::Subtract | + Operators::Add | + Operators::Divide | + Operators::Multiply | + Operators::ImplicitMultiply | + Operators::Modulo | + Operators::Power | + // Right unary operators + Operators::Factorial + => { + // Binary and right-unary operators cannot + // follow a binary operator. + let LineLocation { pos: posa, .. } = *get_line_location(&this); + let LineLocation { pos: posb, len: lenb } = *l; + return Err(( + LineLocation{pos: posa, len: posb - posa + lenb}, + ParserError::Syntax + )); + }, - let b: isize = match g_inner[i] { - Token::PreOperator(_, x) => x, + // Left unary operators + Operators::Negative => { + i += 1; + return Ok(i); + } + }; + } else { + + // Precedence of this operator + let this_val: isize = match this { + Token::PreOperator(_, q) => *q as isize, _ => panic!() - } as isize; + }; - let c: isize; - if i >= g_inner.len()-2 { - c = Operators::Null as isize; - } else { - let q: Operators = match g_inner[i+2] { - Token::PreOperator(_, x) => x, + // Precedence of the operator contesting the right argument. + let right_val = if i < g_inner.len()-2 { + match &g_inner[i+2] { + Token::PreOperator(_, q) => Some(*q as isize), _ => panic!() - }; - c = q as isize; - } + } + } else { None }; - println!("{}, {:?}", i, g_inner); - if b >= a && b >= c { - // This operator owns both its arguments. + + if right_val.is_none() || this_val > right_val.unwrap() { + // This operator has higher precedence, it takes both arguments let left = g_inner.remove(i-1).unwrap(); let this = g_inner.remove(i-1).unwrap(); let right = g_inner.remove(i-1).unwrap(); @@ -61,27 +115,245 @@ pub fn treeify( new_token_args.push_back(left); new_token_args.push_back(right); - g_inner.insert( - i-1, - match k { - Operators::Subtract => Token::Subtract(new_token_args), - Operators::Add => Token::Add(new_token_args), - Operators::Divide => Token::Divide(new_token_args), - Operators::Multiply => Token::Multiply(new_token_args), - Operators::ImplicitMultiply => Token::Multiply(new_token_args), - Operators::Modulo => Token::Modulo(new_token_args), - Operators::ModuloLong => Token::Modulo(new_token_args), - Operators::Power => Token::Power(new_token_args), - Operators::Null => panic!() - } - ); - if i >= 3 { i -= 2; } + g_inner.insert(i-1, select_op(k, new_token_args)); + + if i > 1 { i -= 2; } else { i = 0; } + return Ok(i); } else { - // This operator has lower precedence than another. - // skip it for now. + // The operator to the right has higher precedence. + // Move on, don't to anything yet. i += 2; + return Ok(i); + }; + }; +} + + +fn treeify_unaryleft( + mut i: usize, + g_inner: &mut VecDeque +) -> Result { + + let this: &Token = &g_inner[i]; + let right: &Token = { + if i < g_inner.len()-1 { + &g_inner[i+1] + } else { + let l = match this { + Token::PreOperator(l, _) => l, + _ => panic!() + }; + return Err((*l, ParserError::Syntax)); + } + }; + + + if let Token::PreOperator(l, o) = right { + match o { + // Binary operators + Operators::ModuloLong | + Operators::Subtract | + Operators::Add | + Operators::Divide | + Operators::Multiply | + Operators::ImplicitMultiply | + Operators::Modulo | + Operators::Power | + // Right unary operators + Operators::Factorial + => { + // Binary and right-unary operators cannot + // follow a binary operator. + let LineLocation { pos: posa, .. } = *get_line_location(&this); + let LineLocation { pos: posb, len: lenb } = *l; + return Err(( + LineLocation{pos: posa, len: posb - posa + lenb}, + ParserError::Syntax + )); + }, + + // Left unary operators + Operators::Negative => { + i += 1; + return Ok(i); + } + }; + } else { + + // Precedence of this operator + let this_val: isize = match this { + Token::PreOperator(_, q) => *q as isize, + _ => panic!() + }; + + // Precedence of the operator contesting its argument + let right_val = if i < g_inner.len()-2 { + match &g_inner[i+2] { + Token::PreOperator(_, q) => Some(*q as isize), + _ => panic!() + } + } else { None }; + + + if right_val.is_none() || this_val > right_val.unwrap() { + let this = g_inner.remove(i).unwrap(); + let right = g_inner.remove(i).unwrap(); + + let k = match this { + Token::PreOperator(_, k) => k, + _ => panic!() + }; + + let mut new_token_args: VecDeque = VecDeque::with_capacity(3); + new_token_args.push_back(right); + + g_inner.insert(i, select_op(k, new_token_args)); + + if i > 0 { i -= 1; } else { i = 0; } + return Ok(i); + } else { + // The operator to the right has higher precedence. + // Move on, don't to anything yet. + i += 2; + return Ok(i); + }; + }; +} + +fn treeify_unaryright( + mut i: usize, + g_inner: &mut VecDeque +) -> Result { + + let this: &Token = &g_inner[i]; + let left: &Token = { + if i > 0 { + &g_inner[i-1] + } else { + let l = match this { + Token::PreOperator(l, _) => l, + _ => panic!() + }; + return Err((*l, ParserError::Syntax)); + } + }; + + + // We need to check the element after unary right operators too. + // Bad syntax like `3!3` won't be caught otherwise. + let right: Option<&Token> = { + if i < g_inner.len()-1 { + Some(&g_inner[i+1]) + } else {None} + }; + + if right.is_some() { + if let Token::PreOperator(l, o) = right.unwrap() { + match o { + // Left unary operators + Operators::Negative => { + let LineLocation { pos: posa, .. } = *get_line_location(&this); + let LineLocation { pos: posb, len: lenb } = *l; + return Err(( + LineLocation{pos: posa, len: posb - posa + lenb}, + ParserError::Syntax + )); + }, + _ => {}, + }; + } else { + return Err(( + *get_line_location(&this), + ParserError::Syntax + )); } - println!("{}", i); } + + if let Token::PreOperator(l, _) = left { + let LineLocation { pos: posa, .. } = *get_line_location(&this); + let LineLocation { pos: posb, len: lenb } = *l; + return Err(( + LineLocation{pos: posa, len: posb - posa + lenb}, + ParserError::Syntax + )); + + } else { + + // Precedence of this operator + let this_val: isize = match this { + Token::PreOperator(_, q) => *q as isize, + _ => panic!() + }; + + // Precedence of the operator contesting its argument. + let left_val = if i >= 2 { + match &g_inner[i-2] { + Token::PreOperator(_, q) => Some(*q as isize), + _ => panic!() + } + } else { None }; + + + if left_val.is_none() || this_val > left_val.unwrap() { + let this = g_inner.remove(i).unwrap(); + let left = g_inner.remove(i-1).unwrap(); + + let k = match this { + Token::PreOperator(_, k) => k, + _ => panic!() + }; + + let mut new_token_args: VecDeque = VecDeque::with_capacity(3); + new_token_args.push_back(left); + + g_inner.insert(i-1, select_op(k, new_token_args)); + + if i > 2 { i -= 2; } else { i = 0; } + return Ok(i); + } else { + // The operator to the right has higher precedence. + // Move on, don't to anything yet. + i += 1; + return Ok(i); + }; + }; +} + +pub fn treeify( + g: &mut Token, +) -> Result<(), (LineLocation, ParserError)> { + + let g_inner: &mut VecDeque = match g { + Token::PreGroup(_, ref mut x) => x, + _ => panic!() + }; + + let mut i: usize = 0; + while g_inner.len() > 1 { + let this_op = match &g_inner[i] { + Token::PreOperator(_, o) => o, + _ => { i+=1; continue; } + }; + + match this_op { + Operators::ModuloLong | + Operators::Subtract | + Operators::Add | + Operators::Divide | + Operators::Multiply | + Operators::ImplicitMultiply | + Operators::Modulo | + Operators::Power + => { i = treeify_binary(i, g_inner)?; }, + + Operators::Negative + => { i = treeify_unaryleft(i, g_inner)?; }, + + Operators::Factorial + => { i = treeify_unaryright(i, g_inner)?; } + + }; + } + return Ok(()); } \ No newline at end of file