diff --git a/src/main.rs b/src/main.rs index e83614b..e0ea3cd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -91,8 +91,8 @@ fn main() -> Result<(), std::io::Error> { write!(stdout, "\n => ")?; stdout.reset()?; write!(stdout, "Got {input}\n\n\n")?; - - parser::parse(&mut g).expect("Could not fold"); + + parser::parse(&mut g).expect("Could not parse"); writeln!(stdout, "Tokenized: {g:#?}")?; } diff --git a/src/parser.rs b/src/parser.rs index c6ac76c..82c68f0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,14 +1,18 @@ pub mod tokenize; -mod replace_words; +mod replace_pre; mod fold_operators; +mod unwrap_groups; use crate::parser::tokenize::Token; -use crate::parser::replace_words::replace_words; +use crate::parser::replace_pre::replace_pre; use crate::parser::fold_operators::fold_operators; +use crate::parser::unwrap_groups::unwrap_groups; + pub fn parse(g: &mut Token) -> Result<(), ()> { - replace_words(g)?; + replace_pre(g)?; fold_operators(g)?; + unwrap_groups(g)?; return Ok(()); } \ No newline at end of file diff --git a/src/parser/fold_operators.rs b/src/parser/fold_operators.rs index 3e00a6c..a205339 100644 --- a/src/parser/fold_operators.rs +++ b/src/parser/fold_operators.rs @@ -23,7 +23,15 @@ fn fold_operators_once( // The group we're currently working with let g: &mut Token = t_vec.pop_front().unwrap(); let g_inner: &mut VecDeque = match g { - Token::PreGroup(ref mut x) => x, + Token::PreGroup(_, ref mut x) | + Token::Multiply(ref mut x) | + Token::Divide(ref mut x) | + Token::Add(ref mut x) | + Token::Subtract(ref mut x) | + Token::Factorial(ref mut x) | + Token::Negative(ref mut x) | + Token::Power(ref mut x) | + Token::Modulo(ref mut x) => x, _ => panic!() }; @@ -31,15 +39,29 @@ fn fold_operators_once( // Build new group array while g_inner.len() > 0 { - let t: Token = match g_inner.pop_front() { + let mut t: Token = match g_inner.pop_front() { Some(o) => o, None => break }; let s: &str; - if let Token::PreOperator(ref x) = t { + if let Token::PreOperator(_, ref x) = t { s = x; } else { + match t { + Token::PreGroup(_, _) | + Token::Multiply(_) | + Token::Divide(_) | + Token::Add(_) | + Token::Subtract(_) | + Token::Factorial(_) | + Token::Negative(_) | + Token::Power(_) | + Token::Modulo(_) => { + fold_operators_once(&mut t, op_type, check, new_token)?; + }, + _ => {} + }; new.push_back(t); continue; } @@ -49,8 +71,8 @@ fn fold_operators_once( OperatorType::UnaryLeft => { let mut last: Token = new.pop_back().unwrap(); - if let Token::PreGroup(_) = last { - fold_operators_once(&mut last, op_type, check, new_token).unwrap(); + if let Token::PreGroup(_, _) = last { + fold_operators_once(&mut last, op_type, check, new_token)?; } let mut new_token_args: VecDeque = VecDeque::with_capacity(1); @@ -60,8 +82,8 @@ fn fold_operators_once( OperatorType::UnaryRight => { let mut next: Token = g_inner.pop_front().unwrap(); - if let Token::PreGroup(_) = next { - fold_operators_once(&mut next, op_type, check, new_token).unwrap(); + if let Token::PreGroup(_, _) = next { + fold_operators_once(&mut next, op_type, check, new_token)?; } let mut new_token_args: VecDeque = VecDeque::with_capacity(1); @@ -73,11 +95,11 @@ fn fold_operators_once( let mut next: Token = g_inner.pop_front().unwrap(); // TODO: append to t_vec and do this without recursion. - if let Token::PreGroup(_) = last { - fold_operators_once(&mut last, op_type, check, new_token).unwrap(); + if let Token::PreGroup(_, _) = last { + fold_operators_once(&mut last, op_type, check, new_token)?; } - if let Token::PreGroup(_) = next { - fold_operators_once(&mut next, op_type, check, new_token).unwrap(); + if let Token::PreGroup(_, _) = next { + fold_operators_once(&mut next, op_type, check, new_token)?; } let mut new_token_args: VecDeque = VecDeque::with_capacity(2); @@ -87,6 +109,9 @@ fn fold_operators_once( } }; } else { + if let Token::PreGroup(_, _) = t { + fold_operators_once(&mut t, op_type, check, new_token)?; + } new.push_back(t); } } @@ -139,7 +164,6 @@ pub fn fold_operators(exp: &mut Token) -> Result<(), ()> { } )?; - fold_operators_once( exp, &OperatorType::Binary, |s| s=="mod", diff --git a/src/parser/replace_pre.rs b/src/parser/replace_pre.rs new file mode 100644 index 0000000..cca0d42 --- /dev/null +++ b/src/parser/replace_pre.rs @@ -0,0 +1,31 @@ +use crate::parser::tokenize::Token; + +pub fn replace_pre(g: &mut Token) -> Result<(), ()> { + + match g { + Token::PreGroup(_, ref mut vec) => { + for i in vec.iter_mut() { + replace_pre(i)?; + } + }, + Token::PreNumber(_, s) => { + let n = match s.parse() { + Ok(n) => n, + Err(_) => panic!() + }; + *g = Token::Number(n); + } + Token::PreWord(l, ref s) => { + if s == "mod" { + *g = Token::PreOperator(*l, String::from("mod")); + } else { + return Err(()); + //new.push_back(t); + } + }, + Token::PreOperator(_, _) => {}, + _ => { panic!(); } + }; + + return Ok(()); +} \ No newline at end of file diff --git a/src/parser/replace_words.rs b/src/parser/replace_words.rs deleted file mode 100644 index f53a05d..0000000 --- a/src/parser/replace_words.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::collections::VecDeque; -use crate::parser::tokenize::Token; - -pub fn replace_words(g: &mut Token) -> Result<(), ()> { - let g_inner: &mut VecDeque = match g { - Token::PreGroup(ref mut x) => x, - _ => panic!() - }; - - let mut new: VecDeque = VecDeque::with_capacity(8); - - while g_inner.len() > 0 { - let mut t: Token = match g_inner.pop_front() { - Some(o) => o, - None => break - }; - - match t { - Token::PreGroup(_) => { - replace_words(&mut t)?; - new.push_back(t); - }, - Token::PreNumber(ref s) => { - let n = match s.parse() { - Ok(n) => n, - Err(_) => panic!() - }; - - new.push_back(Token::Number(n)); - } - Token::PreWord(ref s) => { - if s == "to" { - new.push_back(Token::PreOperator(String::from("to"))); - } else if s == "mod" { - new.push_back(Token::PreOperator(String::from("mod"))); - } else { - new.push_back(t); - } - }, - _ => { new.push_back(t); } - }; - } - - *g_inner = new; - - return Ok(()); -} \ No newline at end of file diff --git a/src/parser/tokenize.rs b/src/parser/tokenize.rs index c0cf05b..069284d 100644 --- a/src/parser/tokenize.rs +++ b/src/parser/tokenize.rs @@ -1,15 +1,20 @@ use std::collections::VecDeque; +#[derive(Debug)] +#[derive(Copy, Clone)] +pub struct LineLocation { + pos: usize, + len: usize +} #[derive(Debug)] -#[derive(Clone)] pub enum Token { // Only used after tokenizing - PreGroup(VecDeque), - PreOperator(String), - PreNumber(String), - PreWord(String), + PreGroup(LineLocation, VecDeque), + PreOperator(LineLocation, String), + PreNumber(LineLocation, String), + PreWord(LineLocation, String), // All PreGroups should vanish after operator folding // All PreOperators should become Operators @@ -17,7 +22,12 @@ pub enum Token { // All PreWords should become TODO. // Only used in tree + Number(f64), + + // Functions + + // Operators Multiply(VecDeque), Divide(VecDeque), Add(VecDeque), @@ -30,6 +40,25 @@ pub enum Token { //Function(String, VecDeque), } +#[inline(always)] +fn update_line_location(mut t: Token, stop_i: usize) -> Token { + match t { + Token::PreGroup(ref mut l, _) | + Token::PreOperator(ref mut l, _) | + Token::PreNumber(ref mut l, _) | + Token::PreWord(ref mut l, _) => { + let LineLocation{pos, .. } = l; + *l = LineLocation{ + pos: *pos, + len: stop_i - *pos, + }; + }, + _ => panic!() + }; + + return t; +} + /// Turn a string into a set of tokens. /// Does not check syntax. Fails if `input` contains an invalid character. @@ -43,38 +72,55 @@ pub enum Token { pub fn tokenize(input: &String) -> Result { let mut t: Option = None; // The current token we're reading let mut g: Vec = Vec::with_capacity(8); // Vector of "grouping levels" - g.push(Token::PreGroup(VecDeque::with_capacity(8))); + g.push(Token::PreGroup(LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8))); - for c in input.chars() { + for (i, c) in input.chars().enumerate() { // The grouping level we're on now let g_now: &mut VecDeque = match g.last_mut().unwrap() { - Token::PreGroup(ref mut x) => x, + Token::PreGroup(_, ref mut x) => x, _ => panic!() }; match c { '!' => { - if t.is_some() { g_now.push_back(t.unwrap()); t = None; } - g_now.push_back(Token::PreOperator(String::from("!"))); + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } + g_now.push_back( + Token::PreOperator( + LineLocation{pos: i, len: 1}, + String::from("!") + ) + ); }, // Minus sign can be both a Negative and an Operator. // Needs special treatment. '-' => { - if t.is_some() { g_now.push_back(t.unwrap()); t = None; } + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } match g_now.back() { // If previous token was any of the following, // this is the "minus" operator - Some(Token::PreNumber(_)) | - Some(Token::PreGroup(_)) | - Some(Token::PreWord(_)) => { - g_now.push_back(Token::PreOperator(String::from(c))); + Some(Token::PreNumber(_, _)) | + Some(Token::PreGroup(_, _)) | + Some(Token::PreWord(_, _)) => { + g_now.push_back( + Token::PreOperator( + LineLocation{pos: i, len: 1}, + String::from(c) + ) + ); }, // Otherwise, this is a negative sign. - _ => { g_now.push_back(Token::PreOperator(String::from("neg"))); } + _ => { + g_now.push_back( + Token::PreOperator( + LineLocation{pos: i, len: 1}, + String::from("neg") + ) + ); + } }; }, @@ -84,15 +130,15 @@ pub fn tokenize(input: &String) -> Result { match &mut t { // If we're already building a number, // append. - Some(Token::PreNumber(val)) => { + Some(Token::PreNumber(_, val)) => { val.push(if c == ',' {'.'} else {c}); }, // If we're not building a number, finalize // previous token and start one. _ => { - if t.is_some() { g_now.push_back(t.unwrap()); } - t = Some(Token::PreNumber(String::from(c))); + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); } + t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c))); } }; }, @@ -103,15 +149,15 @@ pub fn tokenize(input: &String) -> Result { match &mut t { // If we're already building a number, // append. - Some(Token::PreWord(val)) => { + Some(Token::PreWord(_, val)) => { val.push(c); }, // If we're not building a number, finalize // previous token and start one. _ => { - if t.is_some() { g_now.push_back(t.unwrap()); } - t = Some(Token::PreWord(String::from(c))); + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); } + t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c))); } }; }, @@ -125,31 +171,31 @@ pub fn tokenize(input: &String) -> Result { '^' | '%' => { // Finalize previous token - if t.is_some() { g_now.push_back(t.unwrap()); t = None; } - g_now.push_back(Token::PreOperator(String::from(c))); + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } + g_now.push_back(Token::PreOperator(LineLocation{pos: i, len: 1}, String::from(c))); } // Groups // Always one character '(' => { - if t.is_some() { g_now.push_back(t.unwrap()); t = None; } - g.push(Token::PreGroup(VecDeque::with_capacity(8))); + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } + g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8))); }, ')' => { - if t.is_some() { g_now.push_back(t.unwrap()); t = None; } + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } let new_group: Token = g.pop().unwrap(); let g_now: &mut VecDeque = match g.last_mut().unwrap() { - Token::PreGroup(ref mut x) => x, + Token::PreGroup(_, ref mut x) => x, _ => panic!() }; - g_now.push_back(new_group); + g_now.push_back(update_line_location(new_group, i)); }, // Space. Basic seperator. ' ' => { - if t.is_some() { g_now.push_back(t.unwrap()); t = None; } + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } } // Invalid token @@ -159,10 +205,10 @@ pub fn tokenize(input: &String) -> Result { let g_now: &mut VecDeque = match g.last_mut().unwrap() { - Token::PreGroup(ref mut x) => x, + Token::PreGroup(_, ref mut x) => x, _ => panic!() }; - if t.is_some() { g_now.push_back(t.unwrap()); } + if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), input.len())); } return Ok(g.pop().unwrap()); } \ No newline at end of file diff --git a/src/parser/unwrap_groups.rs b/src/parser/unwrap_groups.rs new file mode 100644 index 0000000..a79ecbd --- /dev/null +++ b/src/parser/unwrap_groups.rs @@ -0,0 +1,36 @@ +use crate::parser::tokenize::Token; + +pub fn unwrap_groups(g: &mut Token) -> Result<(), ()> { + + match g { + // If g is a PreGroup, unwrap it + Token::PreGroup(_, ref mut vec) => { + if vec.len() != 1 { + panic!(); + } + + let mut i = vec.pop_front().unwrap(); + unwrap_groups(&mut i)?; + *g = i; + }, + + // If g has sub-elements, recursive call + Token::Multiply(ref mut vec) | + Token::Divide(ref mut vec) | + Token::Add(ref mut vec) | + Token::Subtract(ref mut vec) | + Token::Factorial(ref mut vec) | + Token::Negative(ref mut vec) | + Token::Power(ref mut vec) | + Token::Modulo(ref mut vec) => { + for i in vec.iter_mut() { + unwrap_groups(i)?; + } + }, + + // Otherwise, skip g. + _ => {} + }; + + return Ok(()); +} \ No newline at end of file