Fixed parser bugs

Added PreGroup unwrapper
Added token location tracking
pull/2/head
Mark 2023-03-21 19:02:18 -07:00
parent 83961409a7
commit b942e9dcf9
Signed by: Mark
GPG Key ID: AD62BB059C2AAEE4
7 changed files with 190 additions and 96 deletions

View File

@ -91,8 +91,8 @@ fn main() -> Result<(), std::io::Error> {
write!(stdout, "\n => ")?;
stdout.reset()?;
write!(stdout, "Got {input}\n\n\n")?;
parser::parse(&mut g).expect("Could not fold");
parser::parse(&mut g).expect("Could not parse");
writeln!(stdout, "Tokenized: {g:#?}")?;
}

View File

@ -1,14 +1,18 @@
pub mod tokenize;
mod replace_words;
mod replace_pre;
mod fold_operators;
mod unwrap_groups;
use crate::parser::tokenize::Token;
use crate::parser::replace_words::replace_words;
use crate::parser::replace_pre::replace_pre;
use crate::parser::fold_operators::fold_operators;
use crate::parser::unwrap_groups::unwrap_groups;
pub fn parse(g: &mut Token) -> Result<(), ()> {
replace_words(g)?;
replace_pre(g)?;
fold_operators(g)?;
unwrap_groups(g)?;
return Ok(());
}

View File

@ -23,7 +23,15 @@ fn fold_operators_once(
// The group we're currently working with
let g: &mut Token = t_vec.pop_front().unwrap();
let g_inner: &mut VecDeque<Token> = match g {
Token::PreGroup(ref mut x) => x,
Token::PreGroup(_, ref mut x) |
Token::Multiply(ref mut x) |
Token::Divide(ref mut x) |
Token::Add(ref mut x) |
Token::Subtract(ref mut x) |
Token::Factorial(ref mut x) |
Token::Negative(ref mut x) |
Token::Power(ref mut x) |
Token::Modulo(ref mut x) => x,
_ => panic!()
};
@ -31,15 +39,29 @@ fn fold_operators_once(
// Build new group array
while g_inner.len() > 0 {
let t: Token = match g_inner.pop_front() {
let mut t: Token = match g_inner.pop_front() {
Some(o) => o,
None => break
};
let s: &str;
if let Token::PreOperator(ref x) = t {
if let Token::PreOperator(_, ref x) = t {
s = x;
} else {
match t {
Token::PreGroup(_, _) |
Token::Multiply(_) |
Token::Divide(_) |
Token::Add(_) |
Token::Subtract(_) |
Token::Factorial(_) |
Token::Negative(_) |
Token::Power(_) |
Token::Modulo(_) => {
fold_operators_once(&mut t, op_type, check, new_token)?;
},
_ => {}
};
new.push_back(t);
continue;
}
@ -49,8 +71,8 @@ fn fold_operators_once(
OperatorType::UnaryLeft => {
let mut last: Token = new.pop_back().unwrap();
if let Token::PreGroup(_) = last {
fold_operators_once(&mut last, op_type, check, new_token).unwrap();
if let Token::PreGroup(_, _) = last {
fold_operators_once(&mut last, op_type, check, new_token)?;
}
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(1);
@ -60,8 +82,8 @@ fn fold_operators_once(
OperatorType::UnaryRight => {
let mut next: Token = g_inner.pop_front().unwrap();
if let Token::PreGroup(_) = next {
fold_operators_once(&mut next, op_type, check, new_token).unwrap();
if let Token::PreGroup(_, _) = next {
fold_operators_once(&mut next, op_type, check, new_token)?;
}
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(1);
@ -73,11 +95,11 @@ fn fold_operators_once(
let mut next: Token = g_inner.pop_front().unwrap();
// TODO: append to t_vec and do this without recursion.
if let Token::PreGroup(_) = last {
fold_operators_once(&mut last, op_type, check, new_token).unwrap();
if let Token::PreGroup(_, _) = last {
fold_operators_once(&mut last, op_type, check, new_token)?;
}
if let Token::PreGroup(_) = next {
fold_operators_once(&mut next, op_type, check, new_token).unwrap();
if let Token::PreGroup(_, _) = next {
fold_operators_once(&mut next, op_type, check, new_token)?;
}
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(2);
@ -87,6 +109,9 @@ fn fold_operators_once(
}
};
} else {
if let Token::PreGroup(_, _) = t {
fold_operators_once(&mut t, op_type, check, new_token)?;
}
new.push_back(t);
}
}
@ -139,7 +164,6 @@ pub fn fold_operators(exp: &mut Token) -> Result<(), ()> {
}
)?;
fold_operators_once(
exp, &OperatorType::Binary,
|s| s=="mod",

31
src/parser/replace_pre.rs Normal file
View File

@ -0,0 +1,31 @@
use crate::parser::tokenize::Token;
pub fn replace_pre(g: &mut Token) -> Result<(), ()> {
match g {
Token::PreGroup(_, ref mut vec) => {
for i in vec.iter_mut() {
replace_pre(i)?;
}
},
Token::PreNumber(_, s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => panic!()
};
*g = Token::Number(n);
}
Token::PreWord(l, ref s) => {
if s == "mod" {
*g = Token::PreOperator(*l, String::from("mod"));
} else {
return Err(());
//new.push_back(t);
}
},
Token::PreOperator(_, _) => {},
_ => { panic!(); }
};
return Ok(());
}

View File

@ -1,47 +0,0 @@
use std::collections::VecDeque;
use crate::parser::tokenize::Token;
pub fn replace_words(g: &mut Token) -> Result<(), ()> {
let g_inner: &mut VecDeque<Token> = match g {
Token::PreGroup(ref mut x) => x,
_ => panic!()
};
let mut new: VecDeque<Token> = VecDeque::with_capacity(8);
while g_inner.len() > 0 {
let mut t: Token = match g_inner.pop_front() {
Some(o) => o,
None => break
};
match t {
Token::PreGroup(_) => {
replace_words(&mut t)?;
new.push_back(t);
},
Token::PreNumber(ref s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => panic!()
};
new.push_back(Token::Number(n));
}
Token::PreWord(ref s) => {
if s == "to" {
new.push_back(Token::PreOperator(String::from("to")));
} else if s == "mod" {
new.push_back(Token::PreOperator(String::from("mod")));
} else {
new.push_back(t);
}
},
_ => { new.push_back(t); }
};
}
*g_inner = new;
return Ok(());
}

View File

@ -1,15 +1,20 @@
use std::collections::VecDeque;
#[derive(Debug)]
#[derive(Copy, Clone)]
pub struct LineLocation {
pos: usize,
len: usize
}
#[derive(Debug)]
#[derive(Clone)]
pub enum Token {
// Only used after tokenizing
PreGroup(VecDeque<Token>),
PreOperator(String),
PreNumber(String),
PreWord(String),
PreGroup(LineLocation, VecDeque<Token>),
PreOperator(LineLocation, String),
PreNumber(LineLocation, String),
PreWord(LineLocation, String),
// All PreGroups should vanish after operator folding
// All PreOperators should become Operators
@ -17,7 +22,12 @@ pub enum Token {
// All PreWords should become TODO.
// Only used in tree
Number(f64),
// Functions
// Operators
Multiply(VecDeque<Token>),
Divide(VecDeque<Token>),
Add(VecDeque<Token>),
@ -30,6 +40,25 @@ pub enum Token {
//Function(String, VecDeque<Token>),
}
#[inline(always)]
fn update_line_location(mut t: Token, stop_i: usize) -> Token {
match t {
Token::PreGroup(ref mut l, _) |
Token::PreOperator(ref mut l, _) |
Token::PreNumber(ref mut l, _) |
Token::PreWord(ref mut l, _) => {
let LineLocation{pos, .. } = l;
*l = LineLocation{
pos: *pos,
len: stop_i - *pos,
};
},
_ => panic!()
};
return t;
}
/// Turn a string into a set of tokens.
/// Does not check syntax. Fails if `input` contains an invalid character.
@ -43,38 +72,55 @@ pub enum Token {
pub fn tokenize(input: &String) -> Result<Token, ()> {
let mut t: Option<Token> = None; // The current token we're reading
let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels"
g.push(Token::PreGroup(VecDeque::with_capacity(8)));
g.push(Token::PreGroup(LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8)));
for c in input.chars() {
for (i, c) in input.chars().enumerate() {
// The grouping level we're on now
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(ref mut x) => x,
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
};
match c {
'!' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
g_now.push_back(Token::PreOperator(String::from("!")));
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
g_now.push_back(
Token::PreOperator(
LineLocation{pos: i, len: 1},
String::from("!")
)
);
},
// Minus sign can be both a Negative and an Operator.
// Needs special treatment.
'-' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
match g_now.back() {
// If previous token was any of the following,
// this is the "minus" operator
Some(Token::PreNumber(_)) |
Some(Token::PreGroup(_)) |
Some(Token::PreWord(_)) => {
g_now.push_back(Token::PreOperator(String::from(c)));
Some(Token::PreNumber(_, _)) |
Some(Token::PreGroup(_, _)) |
Some(Token::PreWord(_, _)) => {
g_now.push_back(
Token::PreOperator(
LineLocation{pos: i, len: 1},
String::from(c)
)
);
},
// Otherwise, this is a negative sign.
_ => { g_now.push_back(Token::PreOperator(String::from("neg"))); }
_ => {
g_now.push_back(
Token::PreOperator(
LineLocation{pos: i, len: 1},
String::from("neg")
)
);
}
};
},
@ -84,15 +130,15 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
match &mut t {
// If we're already building a number,
// append.
Some(Token::PreNumber(val)) => {
Some(Token::PreNumber(_, val)) => {
val.push(if c == ',' {'.'} else {c});
},
// If we're not building a number, finalize
// previous token and start one.
_ => {
if t.is_some() { g_now.push_back(t.unwrap()); }
t = Some(Token::PreNumber(String::from(c)));
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
},
@ -103,15 +149,15 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
match &mut t {
// If we're already building a number,
// append.
Some(Token::PreWord(val)) => {
Some(Token::PreWord(_, val)) => {
val.push(c);
},
// If we're not building a number, finalize
// previous token and start one.
_ => {
if t.is_some() { g_now.push_back(t.unwrap()); }
t = Some(Token::PreWord(String::from(c)));
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
},
@ -125,31 +171,31 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
'^' |
'%' => {
// Finalize previous token
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
g_now.push_back(Token::PreOperator(String::from(c)));
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
g_now.push_back(Token::PreOperator(LineLocation{pos: i, len: 1}, String::from(c)));
}
// Groups
// Always one character
'(' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
g.push(Token::PreGroup(VecDeque::with_capacity(8)));
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8)));
},
')' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
let new_group: Token = g.pop().unwrap();
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(ref mut x) => x,
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
};
g_now.push_back(new_group);
g_now.push_back(update_line_location(new_group, i));
},
// Space. Basic seperator.
' ' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
}
// Invalid token
@ -159,10 +205,10 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(ref mut x) => x,
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
};
if t.is_some() { g_now.push_back(t.unwrap()); }
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), input.len())); }
return Ok(g.pop().unwrap());
}

View File

@ -0,0 +1,36 @@
use crate::parser::tokenize::Token;
pub fn unwrap_groups(g: &mut Token) -> Result<(), ()> {
match g {
// If g is a PreGroup, unwrap it
Token::PreGroup(_, ref mut vec) => {
if vec.len() != 1 {
panic!();
}
let mut i = vec.pop_front().unwrap();
unwrap_groups(&mut i)?;
*g = i;
},
// If g has sub-elements, recursive call
Token::Multiply(ref mut vec) |
Token::Divide(ref mut vec) |
Token::Add(ref mut vec) |
Token::Subtract(ref mut vec) |
Token::Factorial(ref mut vec) |
Token::Negative(ref mut vec) |
Token::Power(ref mut vec) |
Token::Modulo(ref mut vec) => {
for i in vec.iter_mut() {
unwrap_groups(i)?;
}
},
// Otherwise, skip g.
_ => {}
};
return Ok(());
}