From af85e8a6d5ef051af6c2061295643414666bd452 Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 22 Mar 2023 10:53:38 -0700 Subject: [PATCH] Rewrote parser --- src/parser.rs | 80 ++++++++++++++------ src/parser/fold_operators.rs | 4 +- src/parser/replace_pre.rs | 33 --------- src/parser/tokenize.rs | 139 +++++++++++++++++++++++++++++------ src/parser/treeify.rs | 90 +++++++++++++++++++++++ src/parser/unwrap_groups.rs | 38 ---------- 6 files changed, 267 insertions(+), 117 deletions(-) delete mode 100644 src/parser/replace_pre.rs create mode 100644 src/parser/treeify.rs delete mode 100644 src/parser/unwrap_groups.rs diff --git a/src/parser.rs b/src/parser.rs index 846258f..ff919e8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,34 +1,37 @@ mod tokenize; -mod replace_pre; -mod fold_operators; -mod unwrap_groups; +mod treeify; use crate::parser::tokenize::tokenize; -use crate::parser::replace_pre::replace_pre; -use crate::parser::fold_operators::fold_operators; -use crate::parser::unwrap_groups::unwrap_groups; +use crate::parser::treeify::treeify; use std::collections::VecDeque; +/// Tokens represent logical objects in an expession. +/// +/// Tokens starting with `Pre*` are intermediate tokens, and +/// will never show up in a fully-parsed expression tree. #[derive(Debug)] pub enum Token { - // Used only while tokenizing. - // All of these are replaced with one of the tokens below. - // - // If parsing is successful, - // - all PreGroups will vanish - // - all PreOperators will become Operators - // - all PreNumbers will become Numbers - PreGroup(LineLocation, VecDeque), - PreOperator(LineLocation, String), + /// Used only while tokenizing. + /// Will be replaced with a Number once we finish. PreNumber(LineLocation, String), + + /// Used only while tokenizing. + /// Will be replaced with one of the Tokens below once we finish. PreWord(LineLocation, String), - Number(f64), + /// Used only until operators are parsed. + /// Each of these will become one of the operators below. + PreOperator(LineLocation, Operators), - // Operators + /// Used only until operators are parsed. + /// PreGroups aren't needed once we have a tree. + PreGroup(LineLocation, VecDeque), + + + Number(LineLocation, f64), Multiply(VecDeque), Divide(VecDeque), Add(VecDeque), @@ -39,6 +42,27 @@ pub enum Token { Modulo(VecDeque), } + +/// Operator types, in order of increasing priority. +/// The Null operator MUST be equal to zero. +#[derive(Debug)] +#[derive(Copy, Clone)] +pub enum Operators { + Null = 0, + ModuloLong, // Mod invoked with "mod" + Subtract, + Add, + Divide, + Multiply, + ImplicitMultiply, + Modulo, // Mod invoked with % + Power, + Negative, + Factorial, +} + +/// Specifies the location of a token in an input string. +/// Used to locate ParserErrors. #[derive(Debug)] #[derive(Copy, Clone)] pub struct LineLocation { @@ -46,22 +70,34 @@ pub struct LineLocation { pub len: usize } +/// Types of parser errors. +/// If we cannot parse a string, one of these is returned. #[derive(Debug)] pub enum ParserError { InvalidChar, MissingCloseParen, Syntax, - BadNumber // Cannot parse a number + InvalidImplicitMultiply, + BadNumber } - +/// Parse a user string. This is the only method that should be used +/// outside this module. +/// +/// # Arguments: +/// `s`: the string to parse. Must be trimmed. +/// +/// # Returns: +/// - `Err(LineLocation, ParserError)` if we couldn't parse this string. +/// `LineLocation` specifies *where* the error is, and `ParserError` specifies +/// *what* the error is. +/// +/// - `Ok(Token)` otherwise, where `Token` is the top of an expression tree. pub fn parse(s: &String) -> Result { let mut g: Token = tokenize(s)?; - replace_pre(&mut g)?; - fold_operators(&mut g)?; - unwrap_groups(&mut g)?; + treeify(&mut g)?; return Ok(g); } \ No newline at end of file diff --git a/src/parser/fold_operators.rs b/src/parser/fold_operators.rs index 56a19c0..ba025f7 100644 --- a/src/parser/fold_operators.rs +++ b/src/parser/fold_operators.rs @@ -1,4 +1,4 @@ -use std::collections::VecDeque; +/*use std::collections::VecDeque; use crate::parser::Token; use crate::parser::LineLocation; @@ -176,4 +176,4 @@ pub fn fold_operators(exp: &mut Token) -> Result<(), (LineLocation, ParserError) )?; return Ok(()); -} \ No newline at end of file +}*/ \ No newline at end of file diff --git a/src/parser/replace_pre.rs b/src/parser/replace_pre.rs deleted file mode 100644 index 8fc6424..0000000 --- a/src/parser/replace_pre.rs +++ /dev/null @@ -1,33 +0,0 @@ -use crate::parser::Token; -use crate::parser::LineLocation; -use crate::parser::ParserError; - - -pub fn replace_pre(g: &mut Token) -> Result<(), (LineLocation, ParserError)> { - - match g { - Token::PreGroup(_, ref mut vec) => { - for i in vec.iter_mut() { - replace_pre(i)?; - } - }, - Token::PreNumber(l, s) => { - let n = match s.parse() { - Ok(n) => n, - Err(_) => return Err((*l, ParserError::BadNumber)) - }; - *g = Token::Number(n); - } - Token::PreWord(l, ref s) => { - if s == "mod" { - *g = Token::PreOperator(*l, String::from("mod")); - } else { - return Err((*l, ParserError::Syntax)); - } - }, - Token::PreOperator(_, _) => {}, - _ => { panic!(); } - }; - - return Ok(()); -} \ No newline at end of file diff --git a/src/parser/tokenize.rs b/src/parser/tokenize.rs index 2c16900..78789a6 100644 --- a/src/parser/tokenize.rs +++ b/src/parser/tokenize.rs @@ -3,7 +3,10 @@ use std::collections::VecDeque; use crate::parser::Token; use crate::parser::LineLocation; use crate::parser::ParserError; +use crate::parser::Operators; +/// Updates the length of a Token's LineLocation. +/// Run whenever a token is finished. #[inline(always)] fn update_line_location(mut t: Token, stop_i: usize) -> Token { match t { @@ -24,7 +27,93 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token { } +/// Look at the last two elements of `g`: +/// - if one is an operator, do nothing. +/// - if they are a valid implicit multiplication pair, add an ImplicitMultiply between them +/// - if they aren't, throw an error. +#[inline(always)] +fn insert_implicit( + g: &mut VecDeque +) -> Result<(), (LineLocation, ParserError)> { + if g.len() >= 2 { + let b: Token = g.pop_back().unwrap(); + let a: &Token = g.back().unwrap(); + match (a, &b) { + + // Not implicit multiplication, ignore + (Token::PreOperator(_,_), _) | + (_, Token::PreOperator(_,_)) + => { g.push_back(b); }, + + // Valid implicit multiplications + (Token::PreGroup(_,_), Token::PreGroup(ref l,_)) | + (Token::PreGroup(_,_), Token::Number(ref l,_)) | + (Token::Number(_,_), Token::PreGroup(ref l,_)) + => { + let LineLocation { pos: i, .. } = l; + g.push_back(Token::PreOperator( + LineLocation{pos: i-1, len: 0}, + Operators::ImplicitMultiply + )); + g.push_back(b); + }, + + // Invalid implicit multiplications + (Token::Number(_,_), Token::Number(l,_)) + => { + let LineLocation { pos: i, .. } = l; + return Err(( + LineLocation{pos: i-1, len: 2}, + ParserError::InvalidImplicitMultiply + )); + }, + + _ => panic!() + } + }; + return Ok(()); +} + + + +/// Pushes (and potentially processes) a token we just read to a vector. +/// - Converts all `PreNumbers` to `Numbers`, returning a BadNumber error if necessary +/// - Converts all `PreWords` to other tokens. +fn push_token( + g_now: &mut VecDeque, + i: usize, + t: Option +) -> Result<(), (LineLocation, ParserError)>{ + if t.is_none() { + return Ok(()); + } else { + let t: Token = update_line_location(t.unwrap(), i); + g_now.push_back(match t { + Token::PreNumber(l, s) => { + let n = match s.parse() { + Ok(n) => n, + Err(_) => return Err((l, ParserError::BadNumber)) + }; + Token::Number(l, n) + }, + Token::PreWord(l, s) => { + if s == "mod" { + Token::PreOperator(l, Operators::ModuloLong) + } else { + return Err((l, ParserError::Syntax)); + } + }, + Token::PreOperator(_, _) => t, + _ => panic!() + }); + insert_implicit(g_now)?; + } + return Ok(()); +} + + +/// Turns a string into Tokens. First stage of parsing. pub fn tokenize(input: &String) -> Result { let mut t: Option = None; // The current token we're reading let mut g: Vec = Vec::with_capacity(8); // Vector of "grouping levels" @@ -45,15 +134,15 @@ pub fn tokenize(input: &String) -> Result { g_now.push_back( Token::PreOperator( LineLocation{pos: i, len: 1}, - String::from("!") + Operators::Factorial ) ); }, - // Minus sign can be both a Negative and an Operator. + // The minus sign can be both a Negative and an Operator. // Needs special treatment. '-' => { - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } + push_token(g_now, i, t)?; t = None; match g_now.back() { // If previous token was any of the following, // this is the "minus" operator @@ -63,7 +152,7 @@ pub fn tokenize(input: &String) -> Result { g_now.push_back( Token::PreOperator( LineLocation{pos: i, len: 1}, - String::from(c) + Operators::Subtract ) ); }, @@ -73,7 +162,7 @@ pub fn tokenize(input: &String) -> Result { g_now.push_back( Token::PreOperator( LineLocation{pos: i, len: 1}, - String::from("neg") + Operators::Negative ) ); } @@ -93,7 +182,7 @@ pub fn tokenize(input: &String) -> Result { // If we're not building a number, finalize // previous token and start one. _ => { - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); } + push_token(g_now, i, t)?; t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c))); } }; @@ -118,27 +207,33 @@ pub fn tokenize(input: &String) -> Result { }; }, - - // Operation + // Operator // Always one character - '+' | - '*' | - '/' | - '^' | - '%' => { + '+' | '*' | '/' | '^' | '%' => { // Finalize previous token - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } - g_now.push_back(Token::PreOperator(LineLocation{pos: i, len: 1}, String::from(c))); + push_token(g_now, i, t)?; t = None; + g_now.push_back( + Token::PreOperator( + LineLocation{pos: i, len: 1}, + match c { + '^' => Operators::Power, + '%' => Operators::Modulo, + '*' => Operators::Multiply, + '/' => Operators::Divide, + '+' => Operators::Add, + _ => panic!() + } + ) + ); } - // Groups - // Always one character + // Group '(' => { - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } + push_token(g_now, i, t)?; t = None; g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8))); }, ')' => { - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } + push_token(g_now, i, t)?; t = None; let new_group: Token = g.pop().unwrap(); let g_now: &mut VecDeque = match g.last_mut().unwrap() { @@ -151,10 +246,10 @@ pub fn tokenize(input: &String) -> Result { // Space. Basic seperator. ' ' => { - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } + push_token(g_now, i, t)?; t = None; } - // Invalid token + // Invalid character _ => { return Err((LineLocation{pos: i, len: 1}, ParserError::InvalidChar)); } }; } @@ -164,7 +259,7 @@ pub fn tokenize(input: &String) -> Result { Token::PreGroup(_, ref mut x) => x, _ => panic!() }; - if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), input.len())); } + push_token(g_now, input.len(), t)?; if g.len() != 1 { let q: LineLocation = match g.last_mut().unwrap() { diff --git a/src/parser/treeify.rs b/src/parser/treeify.rs new file mode 100644 index 0000000..91253c5 --- /dev/null +++ b/src/parser/treeify.rs @@ -0,0 +1,90 @@ +use std::collections::VecDeque; + +use crate::parser::Token; +use crate::parser::LineLocation; +use crate::parser::ParserError; +use crate::parser::Operators; + + +pub fn treeify( + g: &mut Token, +) -> Result<(), (LineLocation, ParserError)> { + + let g_inner: &mut VecDeque = match g { + Token::PreGroup(_, ref mut x) => x, + _ => panic!() + }; + let mut new: VecDeque = VecDeque::with_capacity(8); + + let mut i = 1; + while g_inner.len() > 1 { + + let a: isize; + if i == 1 { + a = Operators::Null as isize; + } else { + let q: Operators = match g_inner[i-2] { + Token::PreOperator(_, x) => x, + _ => panic!() + }; + a = q as isize; + } + + let b: isize = match g_inner[i] { + Token::PreOperator(_, x) => x, + _ => panic!() + } as isize; + + let c: isize; + if i >= g_inner.len()-2 { + c = Operators::Null as isize; + } else { + let q: Operators = match g_inner[i+2] { + Token::PreOperator(_, x) => x, + _ => panic!() + }; + c = q as isize; + } + + println!("{}, {:?}", i, g_inner); + if b >= a && b >= c { + // This operator owns both its arguments. + let left = g_inner.remove(i-1).unwrap(); + let this = g_inner.remove(i-1).unwrap(); + let right = g_inner.remove(i-1).unwrap(); + + let (l, k) = match this { + Token::PreOperator(l, k) => (l, k), + _ => panic!() + }; + + let mut new_token_args: VecDeque = VecDeque::with_capacity(3); + new_token_args.push_back(left); + new_token_args.push_back(right); + + g_inner.insert( + i-1, + match k { + Operators::Subtract => Token::Subtract(new_token_args), + Operators::Add => Token::Add(new_token_args), + Operators::Divide => Token::Divide(new_token_args), + Operators::Multiply => Token::Multiply(new_token_args), + Operators::ImplicitMultiply => Token::Multiply(new_token_args), + Operators::Modulo => Token::Modulo(new_token_args), + Operators::Power => Token::Power(new_token_args), + Operators::ModuloLong => Token::Modulo(new_token_args), + Operators::Negative => panic!(), + Operators::Factorial => panic!(), + Operators::Null => panic!() + } + ); + if i >= 3 { i -= 2; } + } else { + // This operator has lower precedence than another. + // skip it for now. + i += 2; + } + println!("{}", i); + } + return Ok(()); +} \ No newline at end of file diff --git a/src/parser/unwrap_groups.rs b/src/parser/unwrap_groups.rs deleted file mode 100644 index 6065dd7..0000000 --- a/src/parser/unwrap_groups.rs +++ /dev/null @@ -1,38 +0,0 @@ -use crate::parser::Token; -use crate::parser::ParserError; -use crate::parser::LineLocation; - -pub fn unwrap_groups(g: &mut Token) -> Result<(), (LineLocation, ParserError)> { - - match g { - // If g is a PreGroup, unwrap it - Token::PreGroup(l, ref mut vec) => { - if vec.len() != 1 { - return Err((*l, ParserError::Syntax)); - } - - let mut i = vec.pop_front().unwrap(); - unwrap_groups(&mut i)?; - *g = i; - }, - - // If g has sub-elements, recursive call - Token::Multiply(ref mut vec) | - Token::Divide(ref mut vec) | - Token::Add(ref mut vec) | - Token::Subtract(ref mut vec) | - Token::Factorial(ref mut vec) | - Token::Negative(ref mut vec) | - Token::Power(ref mut vec) | - Token::Modulo(ref mut vec) => { - for i in vec.iter_mut() { - unwrap_groups(i)?; - } - }, - - // Otherwise, skip g. - _ => {} - }; - - return Ok(()); -} \ No newline at end of file