Rewrote parser

pull/2/head
Mark 2023-03-22 10:53:38 -07:00
parent 2d9eeffb39
commit af85e8a6d5
Signed by: Mark
GPG Key ID: AD62BB059C2AAEE4
6 changed files with 267 additions and 117 deletions

View File

@ -1,34 +1,37 @@
mod tokenize; mod tokenize;
mod replace_pre; mod treeify;
mod fold_operators;
mod unwrap_groups;
use crate::parser::tokenize::tokenize; use crate::parser::tokenize::tokenize;
use crate::parser::replace_pre::replace_pre; use crate::parser::treeify::treeify;
use crate::parser::fold_operators::fold_operators;
use crate::parser::unwrap_groups::unwrap_groups;
use std::collections::VecDeque; use std::collections::VecDeque;
/// Tokens represent logical objects in an expession.
///
/// Tokens starting with `Pre*` are intermediate tokens, and
/// will never show up in a fully-parsed expression tree.
#[derive(Debug)] #[derive(Debug)]
pub enum Token { pub enum Token {
// Used only while tokenizing. /// Used only while tokenizing.
// All of these are replaced with one of the tokens below. /// Will be replaced with a Number once we finish.
//
// If parsing is successful,
// - all PreGroups will vanish
// - all PreOperators will become Operators
// - all PreNumbers will become Numbers
PreGroup(LineLocation, VecDeque<Token>),
PreOperator(LineLocation, String),
PreNumber(LineLocation, String), PreNumber(LineLocation, String),
/// Used only while tokenizing.
/// Will be replaced with one of the Tokens below once we finish.
PreWord(LineLocation, String), PreWord(LineLocation, String),
Number(f64), /// Used only until operators are parsed.
/// Each of these will become one of the operators below.
PreOperator(LineLocation, Operators),
// Operators /// Used only until operators are parsed.
/// PreGroups aren't needed once we have a tree.
PreGroup(LineLocation, VecDeque<Token>),
Number(LineLocation, f64),
Multiply(VecDeque<Token>), Multiply(VecDeque<Token>),
Divide(VecDeque<Token>), Divide(VecDeque<Token>),
Add(VecDeque<Token>), Add(VecDeque<Token>),
@ -39,6 +42,27 @@ pub enum Token {
Modulo(VecDeque<Token>), Modulo(VecDeque<Token>),
} }
/// Operator types, in order of increasing priority.
/// The Null operator MUST be equal to zero.
#[derive(Debug)]
#[derive(Copy, Clone)]
pub enum Operators {
Null = 0,
ModuloLong, // Mod invoked with "mod"
Subtract,
Add,
Divide,
Multiply,
ImplicitMultiply,
Modulo, // Mod invoked with %
Power,
Negative,
Factorial,
}
/// Specifies the location of a token in an input string.
/// Used to locate ParserErrors.
#[derive(Debug)] #[derive(Debug)]
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct LineLocation { pub struct LineLocation {
@ -46,22 +70,34 @@ pub struct LineLocation {
pub len: usize pub len: usize
} }
/// Types of parser errors.
/// If we cannot parse a string, one of these is returned.
#[derive(Debug)] #[derive(Debug)]
pub enum ParserError { pub enum ParserError {
InvalidChar, InvalidChar,
MissingCloseParen, MissingCloseParen,
Syntax, Syntax,
BadNumber // Cannot parse a number InvalidImplicitMultiply,
BadNumber
} }
/// Parse a user string. This is the only method that should be used
/// outside this module.
///
/// # Arguments:
/// `s`: the string to parse. Must be trimmed.
///
/// # Returns:
/// - `Err(LineLocation, ParserError)` if we couldn't parse this string.
/// `LineLocation` specifies *where* the error is, and `ParserError` specifies
/// *what* the error is.
///
/// - `Ok(Token)` otherwise, where `Token` is the top of an expression tree.
pub fn parse(s: &String) -> Result<Token, (LineLocation, ParserError)> { pub fn parse(s: &String) -> Result<Token, (LineLocation, ParserError)> {
let mut g: Token = tokenize(s)?; let mut g: Token = tokenize(s)?;
replace_pre(&mut g)?; treeify(&mut g)?;
fold_operators(&mut g)?;
unwrap_groups(&mut g)?;
return Ok(g); return Ok(g);
} }

View File

@ -1,4 +1,4 @@
use std::collections::VecDeque; /*use std::collections::VecDeque;
use crate::parser::Token; use crate::parser::Token;
use crate::parser::LineLocation; use crate::parser::LineLocation;
@ -176,4 +176,4 @@ pub fn fold_operators(exp: &mut Token) -> Result<(), (LineLocation, ParserError)
)?; )?;
return Ok(()); return Ok(());
} }*/

View File

@ -1,33 +0,0 @@
use crate::parser::Token;
use crate::parser::LineLocation;
use crate::parser::ParserError;
pub fn replace_pre(g: &mut Token) -> Result<(), (LineLocation, ParserError)> {
match g {
Token::PreGroup(_, ref mut vec) => {
for i in vec.iter_mut() {
replace_pre(i)?;
}
},
Token::PreNumber(l, s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => return Err((*l, ParserError::BadNumber))
};
*g = Token::Number(n);
}
Token::PreWord(l, ref s) => {
if s == "mod" {
*g = Token::PreOperator(*l, String::from("mod"));
} else {
return Err((*l, ParserError::Syntax));
}
},
Token::PreOperator(_, _) => {},
_ => { panic!(); }
};
return Ok(());
}

View File

@ -3,7 +3,10 @@ use std::collections::VecDeque;
use crate::parser::Token; use crate::parser::Token;
use crate::parser::LineLocation; use crate::parser::LineLocation;
use crate::parser::ParserError; use crate::parser::ParserError;
use crate::parser::Operators;
/// Updates the length of a Token's LineLocation.
/// Run whenever a token is finished.
#[inline(always)] #[inline(always)]
fn update_line_location(mut t: Token, stop_i: usize) -> Token { fn update_line_location(mut t: Token, stop_i: usize) -> Token {
match t { match t {
@ -24,7 +27,93 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token {
} }
/// Look at the last two elements of `g`:
/// - if one is an operator, do nothing.
/// - if they are a valid implicit multiplication pair, add an ImplicitMultiply between them
/// - if they aren't, throw an error.
#[inline(always)]
fn insert_implicit(
g: &mut VecDeque<Token>
) -> Result<(), (LineLocation, ParserError)> {
if g.len() >= 2 {
let b: Token = g.pop_back().unwrap();
let a: &Token = g.back().unwrap();
match (a, &b) {
// Not implicit multiplication, ignore
(Token::PreOperator(_,_), _) |
(_, Token::PreOperator(_,_))
=> { g.push_back(b); },
// Valid implicit multiplications
(Token::PreGroup(_,_), Token::PreGroup(ref l,_)) |
(Token::PreGroup(_,_), Token::Number(ref l,_)) |
(Token::Number(_,_), Token::PreGroup(ref l,_))
=> {
let LineLocation { pos: i, .. } = l;
g.push_back(Token::PreOperator(
LineLocation{pos: i-1, len: 0},
Operators::ImplicitMultiply
));
g.push_back(b);
},
// Invalid implicit multiplications
(Token::Number(_,_), Token::Number(l,_))
=> {
let LineLocation { pos: i, .. } = l;
return Err((
LineLocation{pos: i-1, len: 2},
ParserError::InvalidImplicitMultiply
));
},
_ => panic!()
}
};
return Ok(());
}
/// Pushes (and potentially processes) a token we just read to a vector.
/// - Converts all `PreNumbers` to `Numbers`, returning a BadNumber error if necessary
/// - Converts all `PreWords` to other tokens.
fn push_token(
g_now: &mut VecDeque<Token>,
i: usize,
t: Option<Token>
) -> Result<(), (LineLocation, ParserError)>{
if t.is_none() {
return Ok(());
} else {
let t: Token = update_line_location(t.unwrap(), i);
g_now.push_back(match t {
Token::PreNumber(l, s) => {
let n = match s.parse() {
Ok(n) => n,
Err(_) => return Err((l, ParserError::BadNumber))
};
Token::Number(l, n)
},
Token::PreWord(l, s) => {
if s == "mod" {
Token::PreOperator(l, Operators::ModuloLong)
} else {
return Err((l, ParserError::Syntax));
}
},
Token::PreOperator(_, _) => t,
_ => panic!()
});
insert_implicit(g_now)?;
}
return Ok(());
}
/// Turns a string into Tokens. First stage of parsing.
pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> { pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
let mut t: Option<Token> = None; // The current token we're reading let mut t: Option<Token> = None; // The current token we're reading
let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels" let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels"
@ -45,15 +134,15 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
g_now.push_back( g_now.push_back(
Token::PreOperator( Token::PreOperator(
LineLocation{pos: i, len: 1}, LineLocation{pos: i, len: 1},
String::from("!") Operators::Factorial
) )
); );
}, },
// Minus sign can be both a Negative and an Operator. // The minus sign can be both a Negative and an Operator.
// Needs special treatment. // Needs special treatment.
'-' => { '-' => {
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } push_token(g_now, i, t)?; t = None;
match g_now.back() { match g_now.back() {
// If previous token was any of the following, // If previous token was any of the following,
// this is the "minus" operator // this is the "minus" operator
@ -63,7 +152,7 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
g_now.push_back( g_now.push_back(
Token::PreOperator( Token::PreOperator(
LineLocation{pos: i, len: 1}, LineLocation{pos: i, len: 1},
String::from(c) Operators::Subtract
) )
); );
}, },
@ -73,7 +162,7 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
g_now.push_back( g_now.push_back(
Token::PreOperator( Token::PreOperator(
LineLocation{pos: i, len: 1}, LineLocation{pos: i, len: 1},
String::from("neg") Operators::Negative
) )
); );
} }
@ -93,7 +182,7 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
// If we're not building a number, finalize // If we're not building a number, finalize
// previous token and start one. // previous token and start one.
_ => { _ => {
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); } push_token(g_now, i, t)?;
t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c))); t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c)));
} }
}; };
@ -118,27 +207,33 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
}; };
}, },
// Operator
// Operation
// Always one character // Always one character
'+' | '+' | '*' | '/' | '^' | '%' => {
'*' |
'/' |
'^' |
'%' => {
// Finalize previous token // Finalize previous token
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } push_token(g_now, i, t)?; t = None;
g_now.push_back(Token::PreOperator(LineLocation{pos: i, len: 1}, String::from(c))); g_now.push_back(
Token::PreOperator(
LineLocation{pos: i, len: 1},
match c {
'^' => Operators::Power,
'%' => Operators::Modulo,
'*' => Operators::Multiply,
'/' => Operators::Divide,
'+' => Operators::Add,
_ => panic!()
}
)
);
} }
// Groups // Group
// Always one character
'(' => { '(' => {
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } push_token(g_now, i, t)?; t = None;
g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8))); g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8)));
}, },
')' => { ')' => {
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } push_token(g_now, i, t)?; t = None;
let new_group: Token = g.pop().unwrap(); let new_group: Token = g.pop().unwrap();
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() { let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
@ -151,10 +246,10 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
// Space. Basic seperator. // Space. Basic seperator.
' ' => { ' ' => {
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; } push_token(g_now, i, t)?; t = None;
} }
// Invalid token // Invalid character
_ => { return Err((LineLocation{pos: i, len: 1}, ParserError::InvalidChar)); } _ => { return Err((LineLocation{pos: i, len: 1}, ParserError::InvalidChar)); }
}; };
} }
@ -164,7 +259,7 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
Token::PreGroup(_, ref mut x) => x, Token::PreGroup(_, ref mut x) => x,
_ => panic!() _ => panic!()
}; };
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), input.len())); } push_token(g_now, input.len(), t)?;
if g.len() != 1 { if g.len() != 1 {
let q: LineLocation = match g.last_mut().unwrap() { let q: LineLocation = match g.last_mut().unwrap() {

90
src/parser/treeify.rs Normal file
View File

@ -0,0 +1,90 @@
use std::collections::VecDeque;
use crate::parser::Token;
use crate::parser::LineLocation;
use crate::parser::ParserError;
use crate::parser::Operators;
pub fn treeify(
g: &mut Token,
) -> Result<(), (LineLocation, ParserError)> {
let g_inner: &mut VecDeque<Token> = match g {
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
};
let mut new: VecDeque<Token> = VecDeque::with_capacity(8);
let mut i = 1;
while g_inner.len() > 1 {
let a: isize;
if i == 1 {
a = Operators::Null as isize;
} else {
let q: Operators = match g_inner[i-2] {
Token::PreOperator(_, x) => x,
_ => panic!()
};
a = q as isize;
}
let b: isize = match g_inner[i] {
Token::PreOperator(_, x) => x,
_ => panic!()
} as isize;
let c: isize;
if i >= g_inner.len()-2 {
c = Operators::Null as isize;
} else {
let q: Operators = match g_inner[i+2] {
Token::PreOperator(_, x) => x,
_ => panic!()
};
c = q as isize;
}
println!("{}, {:?}", i, g_inner);
if b >= a && b >= c {
// This operator owns both its arguments.
let left = g_inner.remove(i-1).unwrap();
let this = g_inner.remove(i-1).unwrap();
let right = g_inner.remove(i-1).unwrap();
let (l, k) = match this {
Token::PreOperator(l, k) => (l, k),
_ => panic!()
};
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(3);
new_token_args.push_back(left);
new_token_args.push_back(right);
g_inner.insert(
i-1,
match k {
Operators::Subtract => Token::Subtract(new_token_args),
Operators::Add => Token::Add(new_token_args),
Operators::Divide => Token::Divide(new_token_args),
Operators::Multiply => Token::Multiply(new_token_args),
Operators::ImplicitMultiply => Token::Multiply(new_token_args),
Operators::Modulo => Token::Modulo(new_token_args),
Operators::Power => Token::Power(new_token_args),
Operators::ModuloLong => Token::Modulo(new_token_args),
Operators::Negative => panic!(),
Operators::Factorial => panic!(),
Operators::Null => panic!()
}
);
if i >= 3 { i -= 2; }
} else {
// This operator has lower precedence than another.
// skip it for now.
i += 2;
}
println!("{}", i);
}
return Ok(());
}

View File

@ -1,38 +0,0 @@
use crate::parser::Token;
use crate::parser::ParserError;
use crate::parser::LineLocation;
pub fn unwrap_groups(g: &mut Token) -> Result<(), (LineLocation, ParserError)> {
match g {
// If g is a PreGroup, unwrap it
Token::PreGroup(l, ref mut vec) => {
if vec.len() != 1 {
return Err((*l, ParserError::Syntax));
}
let mut i = vec.pop_front().unwrap();
unwrap_groups(&mut i)?;
*g = i;
},
// If g has sub-elements, recursive call
Token::Multiply(ref mut vec) |
Token::Divide(ref mut vec) |
Token::Add(ref mut vec) |
Token::Subtract(ref mut vec) |
Token::Factorial(ref mut vec) |
Token::Negative(ref mut vec) |
Token::Power(ref mut vec) |
Token::Modulo(ref mut vec) => {
for i in vec.iter_mut() {
unwrap_groups(i)?;
}
},
// Otherwise, skip g.
_ => {}
};
return Ok(());
}