From 2d9eeffb39fd0618e20387355e728414540ecb4f Mon Sep 17 00:00:00 2001 From: Mark Date: Tue, 21 Mar 2023 19:37:02 -0700 Subject: [PATCH] Reorganized code, added basic error handling --- src/main.rs | 24 ++++++++---- src/parser.rs | 63 ++++++++++++++++++++++++++---- src/parser/fold_operators.rs | 11 ++++-- src/parser/replace_pre.rs | 14 ++++--- src/parser/tokenize.rs | 74 +++++++++++------------------------- src/parser/unwrap_groups.rs | 10 +++-- 6 files changed, 117 insertions(+), 79 deletions(-) diff --git a/src/main.rs b/src/main.rs index e0ea3cd..5376072 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,8 @@ use std::io::Write; use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; + + use termcolor::{ Color, ColorChoice, @@ -13,6 +15,9 @@ use termcolor::{ }; mod parser; +use crate::parser::Token; +//use crate::parser::ParserError; +use crate::parser::LineLocation; const PROMPT_PREFIX: &str = "==> "; @@ -77,23 +82,26 @@ fn main() -> Result<(), std::io::Error> { continue; } - // Tokenize input. + // Parse input. // Fail if we encounter invalid characters. - let mut g = match parser::tokenize::tokenize(&input) { - Ok(v) => v, - Err(_) => { + let g: Token = match parser::parse(&input) { + Ok(g) => g, + Err((l, e)) => { + let LineLocation{pos, len} = l; + + let s = " "; + let m = "^"; + println!("{}{} {:?}", s.repeat(pos + 4), m.repeat(len), e); + stdout.flush()?; continue; } }; - stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green)))?; write!(stdout, "\n => ")?; stdout.reset()?; write!(stdout, "Got {input}\n\n\n")?; - - parser::parse(&mut g).expect("Could not parse"); - + writeln!(stdout, "Tokenized: {g:#?}")?; } diff --git a/src/parser.rs b/src/parser.rs index 82c68f0..846258f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,18 +1,67 @@ -pub mod tokenize; +mod tokenize; mod replace_pre; mod fold_operators; mod unwrap_groups; -use crate::parser::tokenize::Token; +use crate::parser::tokenize::tokenize; use crate::parser::replace_pre::replace_pre; use crate::parser::fold_operators::fold_operators; use crate::parser::unwrap_groups::unwrap_groups; +use std::collections::VecDeque; -pub fn parse(g: &mut Token) -> Result<(), ()> { - replace_pre(g)?; - fold_operators(g)?; - unwrap_groups(g)?; - return Ok(()); +#[derive(Debug)] +pub enum Token { + + // Used only while tokenizing. + // All of these are replaced with one of the tokens below. + // + // If parsing is successful, + // - all PreGroups will vanish + // - all PreOperators will become Operators + // - all PreNumbers will become Numbers + PreGroup(LineLocation, VecDeque), + PreOperator(LineLocation, String), + PreNumber(LineLocation, String), + PreWord(LineLocation, String), + + Number(f64), + + // Operators + Multiply(VecDeque), + Divide(VecDeque), + Add(VecDeque), + Subtract(VecDeque), + Factorial(VecDeque), + Negative(VecDeque), + Power(VecDeque), + Modulo(VecDeque), +} + +#[derive(Debug)] +#[derive(Copy, Clone)] +pub struct LineLocation { + pub pos: usize, + pub len: usize +} + +#[derive(Debug)] +pub enum ParserError { + InvalidChar, + MissingCloseParen, + Syntax, + BadNumber // Cannot parse a number +} + + + +pub fn parse(s: &String) -> Result { + + let mut g: Token = tokenize(s)?; + replace_pre(&mut g)?; + fold_operators(&mut g)?; + unwrap_groups(&mut g)?; + + return Ok(g); } \ No newline at end of file diff --git a/src/parser/fold_operators.rs b/src/parser/fold_operators.rs index a205339..56a19c0 100644 --- a/src/parser/fold_operators.rs +++ b/src/parser/fold_operators.rs @@ -1,5 +1,10 @@ use std::collections::VecDeque; -use crate::parser::tokenize::Token; + +use crate::parser::Token; +use crate::parser::LineLocation; +use crate::parser::ParserError; + + enum OperatorType { Binary, // A binary operator, like a + b @@ -12,7 +17,7 @@ fn fold_operators_once( op_type: &OperatorType, check: fn(&str) -> bool, new_token: fn(&str, VecDeque) -> Token, -) -> Result<(), ()> { +) -> Result<(), (LineLocation, ParserError)> { // Groups to process let mut t_vec: VecDeque<&mut Token> = VecDeque::with_capacity(32); @@ -122,7 +127,7 @@ fn fold_operators_once( return Ok(()); } -pub fn fold_operators(exp: &mut Token) -> Result<(), ()> { +pub fn fold_operators(exp: &mut Token) -> Result<(), (LineLocation, ParserError)> { fold_operators_once( exp, &OperatorType::UnaryLeft, |s| s=="!", diff --git a/src/parser/replace_pre.rs b/src/parser/replace_pre.rs index cca0d42..8fc6424 100644 --- a/src/parser/replace_pre.rs +++ b/src/parser/replace_pre.rs @@ -1,6 +1,9 @@ -use crate::parser::tokenize::Token; +use crate::parser::Token; +use crate::parser::LineLocation; +use crate::parser::ParserError; -pub fn replace_pre(g: &mut Token) -> Result<(), ()> { + +pub fn replace_pre(g: &mut Token) -> Result<(), (LineLocation, ParserError)> { match g { Token::PreGroup(_, ref mut vec) => { @@ -8,10 +11,10 @@ pub fn replace_pre(g: &mut Token) -> Result<(), ()> { replace_pre(i)?; } }, - Token::PreNumber(_, s) => { + Token::PreNumber(l, s) => { let n = match s.parse() { Ok(n) => n, - Err(_) => panic!() + Err(_) => return Err((*l, ParserError::BadNumber)) }; *g = Token::Number(n); } @@ -19,8 +22,7 @@ pub fn replace_pre(g: &mut Token) -> Result<(), ()> { if s == "mod" { *g = Token::PreOperator(*l, String::from("mod")); } else { - return Err(()); - //new.push_back(t); + return Err((*l, ParserError::Syntax)); } }, Token::PreOperator(_, _) => {}, diff --git a/src/parser/tokenize.rs b/src/parser/tokenize.rs index 069284d..2c16900 100644 --- a/src/parser/tokenize.rs +++ b/src/parser/tokenize.rs @@ -1,44 +1,8 @@ use std::collections::VecDeque; -#[derive(Debug)] -#[derive(Copy, Clone)] -pub struct LineLocation { - pos: usize, - len: usize -} - -#[derive(Debug)] -pub enum Token { - - // Only used after tokenizing - PreGroup(LineLocation, VecDeque), - PreOperator(LineLocation, String), - PreNumber(LineLocation, String), - PreWord(LineLocation, String), - - // All PreGroups should vanish after operator folding - // All PreOperators should become Operators - // All PreNumbers should become Numbers - // All PreWords should become TODO. - - // Only used in tree - - Number(f64), - - // Functions - - // Operators - Multiply(VecDeque), - Divide(VecDeque), - Add(VecDeque), - Subtract(VecDeque), - Factorial(VecDeque), - Negative(VecDeque), - Power(VecDeque), - Modulo(VecDeque), - - //Function(String, VecDeque), -} +use crate::parser::Token; +use crate::parser::LineLocation; +use crate::parser::ParserError; #[inline(always)] fn update_line_location(mut t: Token, stop_i: usize) -> Token { @@ -60,16 +24,8 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token { } -/// Turn a string into a set of tokens. -/// Does not check syntax. Fails if `input` contains an invalid character. -// -// # Arguments: -// `input`: A string like `(-3*2.2)/3` -// -// # Returns: -// * `Ok(Vec)` if we were successful. -// * `Err(())` if we couldn't tokenize this string. -pub fn tokenize(input: &String) -> Result { + +pub fn tokenize(input: &String) -> Result { let mut t: Option = None; // The current token we're reading let mut g: Vec = Vec::with_capacity(8); // Vector of "grouping levels" g.push(Token::PreGroup(LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8))); @@ -190,7 +146,7 @@ pub fn tokenize(input: &String) -> Result { _ => panic!() }; - g_now.push_back(update_line_location(new_group, i)); + g_now.push_back(update_line_location(new_group, i+1)); }, // Space. Basic seperator. @@ -199,7 +155,7 @@ pub fn tokenize(input: &String) -> Result { } // Invalid token - _ => { return Err(()); } + _ => { return Err((LineLocation{pos: i, len: 1}, ParserError::InvalidChar)); } }; } @@ -210,5 +166,21 @@ pub fn tokenize(input: &String) -> Result { }; if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), input.len())); } + if g.len() != 1 { + let q: LineLocation = match g.last_mut().unwrap() { + Token::PreGroup(l, _) => *l, + _ => panic!() + }; + + let LineLocation{pos:p, ..} = q; + return Err(( + LineLocation{ + pos: p, + len: input.len() - p + }, + ParserError::MissingCloseParen + )) + } + return Ok(g.pop().unwrap()); } \ No newline at end of file diff --git a/src/parser/unwrap_groups.rs b/src/parser/unwrap_groups.rs index a79ecbd..6065dd7 100644 --- a/src/parser/unwrap_groups.rs +++ b/src/parser/unwrap_groups.rs @@ -1,12 +1,14 @@ -use crate::parser::tokenize::Token; +use crate::parser::Token; +use crate::parser::ParserError; +use crate::parser::LineLocation; -pub fn unwrap_groups(g: &mut Token) -> Result<(), ()> { +pub fn unwrap_groups(g: &mut Token) -> Result<(), (LineLocation, ParserError)> { match g { // If g is a PreGroup, unwrap it - Token::PreGroup(_, ref mut vec) => { + Token::PreGroup(l, ref mut vec) => { if vec.len() != 1 { - panic!(); + return Err((*l, ParserError::Syntax)); } let mut i = vec.pop_front().unwrap();