From 79be72903f427fc0bb75228a46118900f3ae3543 Mon Sep 17 00:00:00 2001 From: Mark Date: Sun, 19 Mar 2023 17:46:30 -0700 Subject: [PATCH] Updated parser --- src/main.rs | 133 +++++++++++++++++++++++++++++++++++++++++++++++- src/tokenize.rs | 66 +++++++++++++++--------- 2 files changed, 174 insertions(+), 25 deletions(-) diff --git a/src/main.rs b/src/main.rs index 313e832..1e1b351 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ use std::io; use std::io::Write; //use std::io::Read; use std::sync::Arc; +use std::collections::VecDeque; use std::sync::atomic::{AtomicBool, Ordering}; use termcolor::{ @@ -59,6 +60,75 @@ fn prompt( Ok(()) } + +fn treefold( + mut exp: tokenize::Token, // Must be a group + check: fn(&tokenize::Token) -> bool, + op_type: u8, + new_token: fn(VecDeque) -> tokenize::Token, +) -> Result { + + // Groups to process + let mut t_vec: VecDeque<&mut tokenize::Token> = VecDeque::with_capacity(32); + t_vec.push_back(&mut exp); + + let mut out: Option = None; + + while t_vec.len() > 0 { + + // The group we're currently working with + let g: &mut tokenize::Token = t_vec.pop_front().unwrap(); + let g_inner: &mut VecDeque = match g { + tokenize::Token::Group(ref mut x) => x, + _ => panic!() + }; + + let mut new: VecDeque = VecDeque::with_capacity(8); + + // Build new group array + while g_inner.len() > 0 { + let t: tokenize::Token = match g_inner.pop_front() { + Some(o) => o, + None => break + }; + + if check(&t) { + match op_type { + 0 => {}, + 1 => {}, + 2 => { + let last: tokenize::Token = new.pop_back().unwrap(); + let next: tokenize::Token = g_inner.pop_front().unwrap().clone(); + + let mut new_token_args: VecDeque = VecDeque::with_capacity(2); + new_token_args.push_back(last); + new_token_args.push_back(next); + new.push_back(new_token(new_token_args)); + }, + _ => panic!() + }; + } else { + new.push_back(t.clone()); + } + } + + *g_inner = new; + } + return Ok(exp); +} + + +fn is_mult(t: &tokenize::Token) -> bool { + match t { + tokenize::Token::Operator(s) => {s == "*"}, + _ => false + } +} + +fn new_mult(v: VecDeque) -> tokenize::Token { + tokenize::Token::Mult(v) +} + fn main() -> Result<(), std::io::Error> { let mut stdout = StandardStream::stdout(ColorChoice::Always); @@ -78,7 +148,7 @@ fn main() -> Result<(), std::io::Error> { // Tokenize input. // Fail if we encounter invalid characters. - let tokens = match tokenize::tokenize(&input) { + let exp = match tokenize::tokenize(&input) { Ok(v) => v, Err(_) => { continue; @@ -91,8 +161,67 @@ fn main() -> Result<(), std::io::Error> { stdout.reset()?; write!(stdout, "Got {input}\n\n\n")?; - writeln!(stdout, "Tokenized: {tokens:#?}")?; + //writeln!(stdout, "Tokenized: {exp:#?}")?; + + let q = treefold( + exp, + is_mult, + 2, + new_mult + ); + writeln!(stdout, "{q:#?}")?; + + + /* + // Groups to process + let mut t_vec: VecDeque = VecDeque::with_capacity(32); + t_vec.push_back(exp); + + while t_vec.len() > 0 { + let g: tokenize::Token = t_vec.pop_front().unwrap(); + let mut g_inner: Vec = match g { + tokenize::Token::Group(x) => x, + _ => panic!() + }; + + let mut new: Vec = Vec::with_capacity(8); + + // Parse binary operators + for o in ["*", "/", "+", "-"] { + let mut i = g_inner.iter(); + loop { + let t = match i.next() { + Some(o) => o, + None => break + }; + + match t { + tokenize::Token::Operator(s) => { + if s == o { + let last = new.pop().unwrap(); + let next = i.next().unwrap(); + + new.push(tokenize::Token::Op( + String::from(s), + Box::new(last.clone()), + Box::new(next.clone()) + )) + } else { + new.push(t.clone()); + } + }, + _ => { + new.push(t.clone()); + } + } + } + g_inner = new.clone(); + new = Vec::with_capacity(8); + } + writeln!(stdout, "{:?}", g_inner)?; + } + */ } writeln!(stdout, "Exiting.")?; diff --git a/src/tokenize.rs b/src/tokenize.rs index aa96446..514316a 100644 --- a/src/tokenize.rs +++ b/src/tokenize.rs @@ -1,13 +1,26 @@ +use std::collections::VecDeque; + + #[derive(Debug)] #[derive(Clone)] pub enum Token { + + // Only used after tokenizing Negative, + Factorial, + Group(VecDeque), // Will be expanded during tree folding + Operator(String), // Will become Ops during tree folding + + // Used in both. Number(String), - Operator(String), Word(String), - Group(Vec), + + // Only used in tree + Op(String, Box, Box), + Mult(VecDeque) } + /// Turn a string into a set of tokens. /// Does not check syntax. Fails if `input` contains an invalid character. // @@ -18,33 +31,40 @@ pub enum Token { // * `Ok(Vec)` if we were successful. // * `Err(())` if we couldn't tokenize this string. pub fn tokenize(input: &String) -> Result { - let mut t: Option = None; - let mut g: Vec = Vec::with_capacity(8); - g.push(Token::Group(Vec::with_capacity(8))); + let mut t: Option = None; // The current token we're reading + let mut g: Vec = Vec::with_capacity(8); // Vector of "grouping levels" + g.push(Token::Group(VecDeque::with_capacity(8))); for c in input.chars() { - let v_now: &mut Vec = match g.last_mut().unwrap() { + + // The grouping level we're on now + let g_now: &mut VecDeque = match g.last_mut().unwrap() { Token::Group(ref mut x) => x, _ => panic!() }; match c { + '!' => { + if t.is_some() { g_now.push_back(t.unwrap()); t = None; } + g_now.push_back(Token::Factorial); + }, + // Minus sign can be both a Negative and an Operator. // Needs special treatment. '-' => { - if t.is_some() { v_now.push(t.unwrap()); t = None; } - match v_now.last() { + if t.is_some() { g_now.push_back(t.unwrap()); t = None; } + match g_now.back() { // If previous token was any of the following, // this is the "minus" operator Some(Token::Number(_)) | Some(Token::Group(_)) | Some(Token::Word(_)) => { - v_now.push(Token::Operator(String::from(c))); + g_now.push_back(Token::Operator(String::from(c))); }, // Otherwise, this is a negative sign. - _ => { t = Some(Token::Negative); } + _ => { g_now.push_back(Token::Negative); } }; }, @@ -61,7 +81,7 @@ pub fn tokenize(input: &String) -> Result { // If we're not building a number, finalize // previous token and start one. _ => { - if t.is_some() { v_now.push(t.unwrap()); } + if t.is_some() { g_now.push_back(t.unwrap()); } t = Some(Token::Number(String::from(c))); } }; @@ -80,7 +100,7 @@ pub fn tokenize(input: &String) -> Result { // If we're not building a number, finalize // previous token and start one. _ => { - if t.is_some() { v_now.push(t.unwrap()); } + if t.is_some() { g_now.push_back(t.unwrap()); } t = Some(Token::Word(String::from(c))); } }; @@ -91,31 +111,31 @@ pub fn tokenize(input: &String) -> Result { // Always one character '+' | '*' | '/' | '^' => { // Finalize previous token - if t.is_some() { v_now.push(t.unwrap()); t = None; } - v_now.push(Token::Operator(String::from(c))); + if t.is_some() { g_now.push_back(t.unwrap()); t = None; } + g_now.push_back(Token::Operator(String::from(c))); } // Groups // Always one character '(' => { - if t.is_some() { v_now.push(t.unwrap()); t = None; } - g.push(Token::Group(Vec::with_capacity(8))); + if t.is_some() { g_now.push_back(t.unwrap()); t = None; } + g.push(Token::Group(VecDeque::with_capacity(8))); }, ')' => { - if t.is_some() { v_now.push(t.unwrap()); t = None; } + if t.is_some() { g_now.push_back(t.unwrap()); t = None; } let new_group: Token = g.pop().unwrap(); - let v_now: &mut Vec = match g.last_mut().unwrap() { + let g_now: &mut VecDeque = match g.last_mut().unwrap() { Token::Group(ref mut x) => x, _ => panic!() }; - v_now.push(new_group); + g_now.push_back(new_group); }, // Space. Basic seperator. ' ' => { - if t.is_some() { v_now.push(t.unwrap()); t = None; } + if t.is_some() { g_now.push_back(t.unwrap()); t = None; } } // Invalid token @@ -124,11 +144,11 @@ pub fn tokenize(input: &String) -> Result { } - let v_now: &mut Vec = match g.last_mut().unwrap() { + let g_now: &mut VecDeque = match g.last_mut().unwrap() { Token::Group(ref mut x) => x, _ => panic!() }; - if t.is_some() { v_now.push(t.unwrap()); } + if t.is_some() { g_now.push_back(t.unwrap()); } - return Ok(Token::Group(v_now.to_vec())); + return Ok(g.pop().unwrap()); } \ No newline at end of file