Fixed parser bugs

Added PreGroup unwrapper
Added token location tracking
pull/2/head
Mark 2023-03-21 19:02:18 -07:00
parent 83961409a7
commit b942e9dcf9
Signed by: Mark
GPG Key ID: AD62BB059C2AAEE4
7 changed files with 190 additions and 96 deletions

View File

@ -92,7 +92,7 @@ fn main() -> Result<(), std::io::Error> {
stdout.reset()?; stdout.reset()?;
write!(stdout, "Got {input}\n\n\n")?; write!(stdout, "Got {input}\n\n\n")?;
parser::parse(&mut g).expect("Could not fold"); parser::parse(&mut g).expect("Could not parse");
writeln!(stdout, "Tokenized: {g:#?}")?; writeln!(stdout, "Tokenized: {g:#?}")?;
} }

View File

@ -1,14 +1,18 @@
pub mod tokenize; pub mod tokenize;
mod replace_words; mod replace_pre;
mod fold_operators; mod fold_operators;
mod unwrap_groups;
use crate::parser::tokenize::Token; use crate::parser::tokenize::Token;
use crate::parser::replace_words::replace_words; use crate::parser::replace_pre::replace_pre;
use crate::parser::fold_operators::fold_operators; use crate::parser::fold_operators::fold_operators;
use crate::parser::unwrap_groups::unwrap_groups;
pub fn parse(g: &mut Token) -> Result<(), ()> { pub fn parse(g: &mut Token) -> Result<(), ()> {
replace_words(g)?; replace_pre(g)?;
fold_operators(g)?; fold_operators(g)?;
unwrap_groups(g)?;
return Ok(()); return Ok(());
} }

View File

@ -23,7 +23,15 @@ fn fold_operators_once(
// The group we're currently working with // The group we're currently working with
let g: &mut Token = t_vec.pop_front().unwrap(); let g: &mut Token = t_vec.pop_front().unwrap();
let g_inner: &mut VecDeque<Token> = match g { let g_inner: &mut VecDeque<Token> = match g {
Token::PreGroup(ref mut x) => x, Token::PreGroup(_, ref mut x) |
Token::Multiply(ref mut x) |
Token::Divide(ref mut x) |
Token::Add(ref mut x) |
Token::Subtract(ref mut x) |
Token::Factorial(ref mut x) |
Token::Negative(ref mut x) |
Token::Power(ref mut x) |
Token::Modulo(ref mut x) => x,
_ => panic!() _ => panic!()
}; };
@ -31,15 +39,29 @@ fn fold_operators_once(
// Build new group array // Build new group array
while g_inner.len() > 0 { while g_inner.len() > 0 {
let t: Token = match g_inner.pop_front() { let mut t: Token = match g_inner.pop_front() {
Some(o) => o, Some(o) => o,
None => break None => break
}; };
let s: &str; let s: &str;
if let Token::PreOperator(ref x) = t { if let Token::PreOperator(_, ref x) = t {
s = x; s = x;
} else { } else {
match t {
Token::PreGroup(_, _) |
Token::Multiply(_) |
Token::Divide(_) |
Token::Add(_) |
Token::Subtract(_) |
Token::Factorial(_) |
Token::Negative(_) |
Token::Power(_) |
Token::Modulo(_) => {
fold_operators_once(&mut t, op_type, check, new_token)?;
},
_ => {}
};
new.push_back(t); new.push_back(t);
continue; continue;
} }
@ -49,8 +71,8 @@ fn fold_operators_once(
OperatorType::UnaryLeft => { OperatorType::UnaryLeft => {
let mut last: Token = new.pop_back().unwrap(); let mut last: Token = new.pop_back().unwrap();
if let Token::PreGroup(_) = last { if let Token::PreGroup(_, _) = last {
fold_operators_once(&mut last, op_type, check, new_token).unwrap(); fold_operators_once(&mut last, op_type, check, new_token)?;
} }
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(1); let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(1);
@ -60,8 +82,8 @@ fn fold_operators_once(
OperatorType::UnaryRight => { OperatorType::UnaryRight => {
let mut next: Token = g_inner.pop_front().unwrap(); let mut next: Token = g_inner.pop_front().unwrap();
if let Token::PreGroup(_) = next { if let Token::PreGroup(_, _) = next {
fold_operators_once(&mut next, op_type, check, new_token).unwrap(); fold_operators_once(&mut next, op_type, check, new_token)?;
} }
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(1); let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(1);
@ -73,11 +95,11 @@ fn fold_operators_once(
let mut next: Token = g_inner.pop_front().unwrap(); let mut next: Token = g_inner.pop_front().unwrap();
// TODO: append to t_vec and do this without recursion. // TODO: append to t_vec and do this without recursion.
if let Token::PreGroup(_) = last { if let Token::PreGroup(_, _) = last {
fold_operators_once(&mut last, op_type, check, new_token).unwrap(); fold_operators_once(&mut last, op_type, check, new_token)?;
} }
if let Token::PreGroup(_) = next { if let Token::PreGroup(_, _) = next {
fold_operators_once(&mut next, op_type, check, new_token).unwrap(); fold_operators_once(&mut next, op_type, check, new_token)?;
} }
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(2); let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(2);
@ -87,6 +109,9 @@ fn fold_operators_once(
} }
}; };
} else { } else {
if let Token::PreGroup(_, _) = t {
fold_operators_once(&mut t, op_type, check, new_token)?;
}
new.push_back(t); new.push_back(t);
} }
} }
@ -139,7 +164,6 @@ pub fn fold_operators(exp: &mut Token) -> Result<(), ()> {
} }
)?; )?;
fold_operators_once( fold_operators_once(
exp, &OperatorType::Binary, exp, &OperatorType::Binary,
|s| s=="mod", |s| s=="mod",

31
src/parser/replace_pre.rs Normal file
View File

@ -0,0 +1,31 @@
use crate::parser::tokenize::Token;
pub fn replace_pre(g: &mut Token) -> Result<(), ()> {
match g {
Token::PreGroup(_, ref mut vec) => {
for i in vec.iter_mut() {
replace_pre(i)?;
}
},
Token::PreNumber(_, s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => panic!()
};
*g = Token::Number(n);
}
Token::PreWord(l, ref s) => {
if s == "mod" {
*g = Token::PreOperator(*l, String::from("mod"));
} else {
return Err(());
//new.push_back(t);
}
},
Token::PreOperator(_, _) => {},
_ => { panic!(); }
};
return Ok(());
}

View File

@ -1,47 +0,0 @@
use std::collections::VecDeque;
use crate::parser::tokenize::Token;
pub fn replace_words(g: &mut Token) -> Result<(), ()> {
let g_inner: &mut VecDeque<Token> = match g {
Token::PreGroup(ref mut x) => x,
_ => panic!()
};
let mut new: VecDeque<Token> = VecDeque::with_capacity(8);
while g_inner.len() > 0 {
let mut t: Token = match g_inner.pop_front() {
Some(o) => o,
None => break
};
match t {
Token::PreGroup(_) => {
replace_words(&mut t)?;
new.push_back(t);
},
Token::PreNumber(ref s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => panic!()
};
new.push_back(Token::Number(n));
}
Token::PreWord(ref s) => {
if s == "to" {
new.push_back(Token::PreOperator(String::from("to")));
} else if s == "mod" {
new.push_back(Token::PreOperator(String::from("mod")));
} else {
new.push_back(t);
}
},
_ => { new.push_back(t); }
};
}
*g_inner = new;
return Ok(());
}

View File

@ -1,15 +1,20 @@
use std::collections::VecDeque; use std::collections::VecDeque;
#[derive(Debug)]
#[derive(Copy, Clone)]
pub struct LineLocation {
pos: usize,
len: usize
}
#[derive(Debug)] #[derive(Debug)]
#[derive(Clone)]
pub enum Token { pub enum Token {
// Only used after tokenizing // Only used after tokenizing
PreGroup(VecDeque<Token>), PreGroup(LineLocation, VecDeque<Token>),
PreOperator(String), PreOperator(LineLocation, String),
PreNumber(String), PreNumber(LineLocation, String),
PreWord(String), PreWord(LineLocation, String),
// All PreGroups should vanish after operator folding // All PreGroups should vanish after operator folding
// All PreOperators should become Operators // All PreOperators should become Operators
@ -17,7 +22,12 @@ pub enum Token {
// All PreWords should become TODO. // All PreWords should become TODO.
// Only used in tree // Only used in tree
Number(f64), Number(f64),
// Functions
// Operators
Multiply(VecDeque<Token>), Multiply(VecDeque<Token>),
Divide(VecDeque<Token>), Divide(VecDeque<Token>),
Add(VecDeque<Token>), Add(VecDeque<Token>),
@ -30,6 +40,25 @@ pub enum Token {
//Function(String, VecDeque<Token>), //Function(String, VecDeque<Token>),
} }
#[inline(always)]
fn update_line_location(mut t: Token, stop_i: usize) -> Token {
match t {
Token::PreGroup(ref mut l, _) |
Token::PreOperator(ref mut l, _) |
Token::PreNumber(ref mut l, _) |
Token::PreWord(ref mut l, _) => {
let LineLocation{pos, .. } = l;
*l = LineLocation{
pos: *pos,
len: stop_i - *pos,
};
},
_ => panic!()
};
return t;
}
/// Turn a string into a set of tokens. /// Turn a string into a set of tokens.
/// Does not check syntax. Fails if `input` contains an invalid character. /// Does not check syntax. Fails if `input` contains an invalid character.
@ -43,38 +72,55 @@ pub enum Token {
pub fn tokenize(input: &String) -> Result<Token, ()> { pub fn tokenize(input: &String) -> Result<Token, ()> {
let mut t: Option<Token> = None; // The current token we're reading let mut t: Option<Token> = None; // The current token we're reading
let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels" let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels"
g.push(Token::PreGroup(VecDeque::with_capacity(8))); g.push(Token::PreGroup(LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8)));
for c in input.chars() { for (i, c) in input.chars().enumerate() {
// The grouping level we're on now // The grouping level we're on now
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() { let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(ref mut x) => x, Token::PreGroup(_, ref mut x) => x,
_ => panic!() _ => panic!()
}; };
match c { match c {
'!' => { '!' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
g_now.push_back(Token::PreOperator(String::from("!"))); g_now.push_back(
Token::PreOperator(
LineLocation{pos: i, len: 1},
String::from("!")
)
);
}, },
// Minus sign can be both a Negative and an Operator. // Minus sign can be both a Negative and an Operator.
// Needs special treatment. // Needs special treatment.
'-' => { '-' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
match g_now.back() { match g_now.back() {
// If previous token was any of the following, // If previous token was any of the following,
// this is the "minus" operator // this is the "minus" operator
Some(Token::PreNumber(_)) | Some(Token::PreNumber(_, _)) |
Some(Token::PreGroup(_)) | Some(Token::PreGroup(_, _)) |
Some(Token::PreWord(_)) => { Some(Token::PreWord(_, _)) => {
g_now.push_back(Token::PreOperator(String::from(c))); g_now.push_back(
Token::PreOperator(
LineLocation{pos: i, len: 1},
String::from(c)
)
);
}, },
// Otherwise, this is a negative sign. // Otherwise, this is a negative sign.
_ => { g_now.push_back(Token::PreOperator(String::from("neg"))); } _ => {
g_now.push_back(
Token::PreOperator(
LineLocation{pos: i, len: 1},
String::from("neg")
)
);
}
}; };
}, },
@ -84,15 +130,15 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
match &mut t { match &mut t {
// If we're already building a number, // If we're already building a number,
// append. // append.
Some(Token::PreNumber(val)) => { Some(Token::PreNumber(_, val)) => {
val.push(if c == ',' {'.'} else {c}); val.push(if c == ',' {'.'} else {c});
}, },
// If we're not building a number, finalize // If we're not building a number, finalize
// previous token and start one. // previous token and start one.
_ => { _ => {
if t.is_some() { g_now.push_back(t.unwrap()); } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreNumber(String::from(c))); t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c)));
} }
}; };
}, },
@ -103,15 +149,15 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
match &mut t { match &mut t {
// If we're already building a number, // If we're already building a number,
// append. // append.
Some(Token::PreWord(val)) => { Some(Token::PreWord(_, val)) => {
val.push(c); val.push(c);
}, },
// If we're not building a number, finalize // If we're not building a number, finalize
// previous token and start one. // previous token and start one.
_ => { _ => {
if t.is_some() { g_now.push_back(t.unwrap()); } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreWord(String::from(c))); t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
} }
}; };
}, },
@ -125,31 +171,31 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
'^' | '^' |
'%' => { '%' => {
// Finalize previous token // Finalize previous token
if t.is_some() { g_now.push_back(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
g_now.push_back(Token::PreOperator(String::from(c))); g_now.push_back(Token::PreOperator(LineLocation{pos: i, len: 1}, String::from(c)));
} }
// Groups // Groups
// Always one character // Always one character
'(' => { '(' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
g.push(Token::PreGroup(VecDeque::with_capacity(8))); g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8)));
}, },
')' => { ')' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
let new_group: Token = g.pop().unwrap(); let new_group: Token = g.pop().unwrap();
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() { let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(ref mut x) => x, Token::PreGroup(_, ref mut x) => x,
_ => panic!() _ => panic!()
}; };
g_now.push_back(new_group); g_now.push_back(update_line_location(new_group, i));
}, },
// Space. Basic seperator. // Space. Basic seperator.
' ' => { ' ' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
} }
// Invalid token // Invalid token
@ -159,10 +205,10 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() { let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::PreGroup(ref mut x) => x, Token::PreGroup(_, ref mut x) => x,
_ => panic!() _ => panic!()
}; };
if t.is_some() { g_now.push_back(t.unwrap()); } if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), input.len())); }
return Ok(g.pop().unwrap()); return Ok(g.pop().unwrap());
} }

View File

@ -0,0 +1,36 @@
use crate::parser::tokenize::Token;
pub fn unwrap_groups(g: &mut Token) -> Result<(), ()> {
match g {
// If g is a PreGroup, unwrap it
Token::PreGroup(_, ref mut vec) => {
if vec.len() != 1 {
panic!();
}
let mut i = vec.pop_front().unwrap();
unwrap_groups(&mut i)?;
*g = i;
},
// If g has sub-elements, recursive call
Token::Multiply(ref mut vec) |
Token::Divide(ref mut vec) |
Token::Add(ref mut vec) |
Token::Subtract(ref mut vec) |
Token::Factorial(ref mut vec) |
Token::Negative(ref mut vec) |
Token::Power(ref mut vec) |
Token::Modulo(ref mut vec) => {
for i in vec.iter_mut() {
unwrap_groups(i)?;
}
},
// Otherwise, skip g.
_ => {}
};
return Ok(());
}