mirror of https://github.com/rm-dr/daisy
Rewrote parser
parent
2d9eeffb39
commit
af85e8a6d5
|
@ -1,34 +1,37 @@
|
|||
mod tokenize;
|
||||
mod replace_pre;
|
||||
mod fold_operators;
|
||||
mod unwrap_groups;
|
||||
mod treeify;
|
||||
|
||||
use crate::parser::tokenize::tokenize;
|
||||
use crate::parser::replace_pre::replace_pre;
|
||||
use crate::parser::fold_operators::fold_operators;
|
||||
use crate::parser::unwrap_groups::unwrap_groups;
|
||||
use crate::parser::treeify::treeify;
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
||||
|
||||
/// Tokens represent logical objects in an expession.
|
||||
///
|
||||
/// Tokens starting with `Pre*` are intermediate tokens, and
|
||||
/// will never show up in a fully-parsed expression tree.
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
|
||||
// Used only while tokenizing.
|
||||
// All of these are replaced with one of the tokens below.
|
||||
//
|
||||
// If parsing is successful,
|
||||
// - all PreGroups will vanish
|
||||
// - all PreOperators will become Operators
|
||||
// - all PreNumbers will become Numbers
|
||||
PreGroup(LineLocation, VecDeque<Token>),
|
||||
PreOperator(LineLocation, String),
|
||||
/// Used only while tokenizing.
|
||||
/// Will be replaced with a Number once we finish.
|
||||
PreNumber(LineLocation, String),
|
||||
|
||||
/// Used only while tokenizing.
|
||||
/// Will be replaced with one of the Tokens below once we finish.
|
||||
PreWord(LineLocation, String),
|
||||
|
||||
Number(f64),
|
||||
/// Used only until operators are parsed.
|
||||
/// Each of these will become one of the operators below.
|
||||
PreOperator(LineLocation, Operators),
|
||||
|
||||
// Operators
|
||||
/// Used only until operators are parsed.
|
||||
/// PreGroups aren't needed once we have a tree.
|
||||
PreGroup(LineLocation, VecDeque<Token>),
|
||||
|
||||
|
||||
Number(LineLocation, f64),
|
||||
Multiply(VecDeque<Token>),
|
||||
Divide(VecDeque<Token>),
|
||||
Add(VecDeque<Token>),
|
||||
|
@ -39,6 +42,27 @@ pub enum Token {
|
|||
Modulo(VecDeque<Token>),
|
||||
}
|
||||
|
||||
|
||||
/// Operator types, in order of increasing priority.
|
||||
/// The Null operator MUST be equal to zero.
|
||||
#[derive(Debug)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum Operators {
|
||||
Null = 0,
|
||||
ModuloLong, // Mod invoked with "mod"
|
||||
Subtract,
|
||||
Add,
|
||||
Divide,
|
||||
Multiply,
|
||||
ImplicitMultiply,
|
||||
Modulo, // Mod invoked with %
|
||||
Power,
|
||||
Negative,
|
||||
Factorial,
|
||||
}
|
||||
|
||||
/// Specifies the location of a token in an input string.
|
||||
/// Used to locate ParserErrors.
|
||||
#[derive(Debug)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct LineLocation {
|
||||
|
@ -46,22 +70,34 @@ pub struct LineLocation {
|
|||
pub len: usize
|
||||
}
|
||||
|
||||
/// Types of parser errors.
|
||||
/// If we cannot parse a string, one of these is returned.
|
||||
#[derive(Debug)]
|
||||
pub enum ParserError {
|
||||
InvalidChar,
|
||||
MissingCloseParen,
|
||||
Syntax,
|
||||
BadNumber // Cannot parse a number
|
||||
InvalidImplicitMultiply,
|
||||
BadNumber
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Parse a user string. This is the only method that should be used
|
||||
/// outside this module.
|
||||
///
|
||||
/// # Arguments:
|
||||
/// `s`: the string to parse. Must be trimmed.
|
||||
///
|
||||
/// # Returns:
|
||||
/// - `Err(LineLocation, ParserError)` if we couldn't parse this string.
|
||||
/// `LineLocation` specifies *where* the error is, and `ParserError` specifies
|
||||
/// *what* the error is.
|
||||
///
|
||||
/// - `Ok(Token)` otherwise, where `Token` is the top of an expression tree.
|
||||
pub fn parse(s: &String) -> Result<Token, (LineLocation, ParserError)> {
|
||||
|
||||
let mut g: Token = tokenize(s)?;
|
||||
replace_pre(&mut g)?;
|
||||
fold_operators(&mut g)?;
|
||||
unwrap_groups(&mut g)?;
|
||||
treeify(&mut g)?;
|
||||
|
||||
return Ok(g);
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
use std::collections::VecDeque;
|
||||
/*use std::collections::VecDeque;
|
||||
|
||||
use crate::parser::Token;
|
||||
use crate::parser::LineLocation;
|
||||
|
@ -176,4 +176,4 @@ pub fn fold_operators(exp: &mut Token) -> Result<(), (LineLocation, ParserError)
|
|||
)?;
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
}*/
|
|
@ -1,33 +0,0 @@
|
|||
use crate::parser::Token;
|
||||
use crate::parser::LineLocation;
|
||||
use crate::parser::ParserError;
|
||||
|
||||
|
||||
pub fn replace_pre(g: &mut Token) -> Result<(), (LineLocation, ParserError)> {
|
||||
|
||||
match g {
|
||||
Token::PreGroup(_, ref mut vec) => {
|
||||
for i in vec.iter_mut() {
|
||||
replace_pre(i)?;
|
||||
}
|
||||
},
|
||||
Token::PreNumber(l, s) => {
|
||||
let n = match s.parse() {
|
||||
Ok(n) => n,
|
||||
Err(_) => return Err((*l, ParserError::BadNumber))
|
||||
};
|
||||
*g = Token::Number(n);
|
||||
}
|
||||
Token::PreWord(l, ref s) => {
|
||||
if s == "mod" {
|
||||
*g = Token::PreOperator(*l, String::from("mod"));
|
||||
} else {
|
||||
return Err((*l, ParserError::Syntax));
|
||||
}
|
||||
},
|
||||
Token::PreOperator(_, _) => {},
|
||||
_ => { panic!(); }
|
||||
};
|
||||
|
||||
return Ok(());
|
||||
}
|
|
@ -3,7 +3,10 @@ use std::collections::VecDeque;
|
|||
use crate::parser::Token;
|
||||
use crate::parser::LineLocation;
|
||||
use crate::parser::ParserError;
|
||||
use crate::parser::Operators;
|
||||
|
||||
/// Updates the length of a Token's LineLocation.
|
||||
/// Run whenever a token is finished.
|
||||
#[inline(always)]
|
||||
fn update_line_location(mut t: Token, stop_i: usize) -> Token {
|
||||
match t {
|
||||
|
@ -24,7 +27,93 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token {
|
|||
}
|
||||
|
||||
|
||||
/// Look at the last two elements of `g`:
|
||||
/// - if one is an operator, do nothing.
|
||||
/// - if they are a valid implicit multiplication pair, add an ImplicitMultiply between them
|
||||
/// - if they aren't, throw an error.
|
||||
#[inline(always)]
|
||||
fn insert_implicit(
|
||||
g: &mut VecDeque<Token>
|
||||
) -> Result<(), (LineLocation, ParserError)> {
|
||||
if g.len() >= 2 {
|
||||
let b: Token = g.pop_back().unwrap();
|
||||
let a: &Token = g.back().unwrap();
|
||||
|
||||
match (a, &b) {
|
||||
|
||||
// Not implicit multiplication, ignore
|
||||
(Token::PreOperator(_,_), _) |
|
||||
(_, Token::PreOperator(_,_))
|
||||
=> { g.push_back(b); },
|
||||
|
||||
// Valid implicit multiplications
|
||||
(Token::PreGroup(_,_), Token::PreGroup(ref l,_)) |
|
||||
(Token::PreGroup(_,_), Token::Number(ref l,_)) |
|
||||
(Token::Number(_,_), Token::PreGroup(ref l,_))
|
||||
=> {
|
||||
let LineLocation { pos: i, .. } = l;
|
||||
g.push_back(Token::PreOperator(
|
||||
LineLocation{pos: i-1, len: 0},
|
||||
Operators::ImplicitMultiply
|
||||
));
|
||||
g.push_back(b);
|
||||
},
|
||||
|
||||
// Invalid implicit multiplications
|
||||
(Token::Number(_,_), Token::Number(l,_))
|
||||
=> {
|
||||
let LineLocation { pos: i, .. } = l;
|
||||
return Err((
|
||||
LineLocation{pos: i-1, len: 2},
|
||||
ParserError::InvalidImplicitMultiply
|
||||
));
|
||||
},
|
||||
|
||||
_ => panic!()
|
||||
}
|
||||
};
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Pushes (and potentially processes) a token we just read to a vector.
|
||||
/// - Converts all `PreNumbers` to `Numbers`, returning a BadNumber error if necessary
|
||||
/// - Converts all `PreWords` to other tokens.
|
||||
fn push_token(
|
||||
g_now: &mut VecDeque<Token>,
|
||||
i: usize,
|
||||
t: Option<Token>
|
||||
) -> Result<(), (LineLocation, ParserError)>{
|
||||
if t.is_none() {
|
||||
return Ok(());
|
||||
} else {
|
||||
let t: Token = update_line_location(t.unwrap(), i);
|
||||
g_now.push_back(match t {
|
||||
Token::PreNumber(l, s) => {
|
||||
let n = match s.parse() {
|
||||
Ok(n) => n,
|
||||
Err(_) => return Err((l, ParserError::BadNumber))
|
||||
};
|
||||
Token::Number(l, n)
|
||||
},
|
||||
Token::PreWord(l, s) => {
|
||||
if s == "mod" {
|
||||
Token::PreOperator(l, Operators::ModuloLong)
|
||||
} else {
|
||||
return Err((l, ParserError::Syntax));
|
||||
}
|
||||
},
|
||||
Token::PreOperator(_, _) => t,
|
||||
_ => panic!()
|
||||
});
|
||||
insert_implicit(g_now)?;
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
||||
/// Turns a string into Tokens. First stage of parsing.
|
||||
pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
|
||||
let mut t: Option<Token> = None; // The current token we're reading
|
||||
let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels"
|
||||
|
@ -45,15 +134,15 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
|
|||
g_now.push_back(
|
||||
Token::PreOperator(
|
||||
LineLocation{pos: i, len: 1},
|
||||
String::from("!")
|
||||
Operators::Factorial
|
||||
)
|
||||
);
|
||||
},
|
||||
|
||||
// Minus sign can be both a Negative and an Operator.
|
||||
// The minus sign can be both a Negative and an Operator.
|
||||
// Needs special treatment.
|
||||
'-' => {
|
||||
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
|
||||
push_token(g_now, i, t)?; t = None;
|
||||
match g_now.back() {
|
||||
// If previous token was any of the following,
|
||||
// this is the "minus" operator
|
||||
|
@ -63,7 +152,7 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
|
|||
g_now.push_back(
|
||||
Token::PreOperator(
|
||||
LineLocation{pos: i, len: 1},
|
||||
String::from(c)
|
||||
Operators::Subtract
|
||||
)
|
||||
);
|
||||
},
|
||||
|
@ -73,7 +162,7 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
|
|||
g_now.push_back(
|
||||
Token::PreOperator(
|
||||
LineLocation{pos: i, len: 1},
|
||||
String::from("neg")
|
||||
Operators::Negative
|
||||
)
|
||||
);
|
||||
}
|
||||
|
@ -93,7 +182,7 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
|
|||
// If we're not building a number, finalize
|
||||
// previous token and start one.
|
||||
_ => {
|
||||
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); }
|
||||
push_token(g_now, i, t)?;
|
||||
t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c)));
|
||||
}
|
||||
};
|
||||
|
@ -118,27 +207,33 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
|
|||
};
|
||||
},
|
||||
|
||||
|
||||
// Operation
|
||||
// Operator
|
||||
// Always one character
|
||||
'+' |
|
||||
'*' |
|
||||
'/' |
|
||||
'^' |
|
||||
'%' => {
|
||||
'+' | '*' | '/' | '^' | '%' => {
|
||||
// Finalize previous token
|
||||
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
|
||||
g_now.push_back(Token::PreOperator(LineLocation{pos: i, len: 1}, String::from(c)));
|
||||
push_token(g_now, i, t)?; t = None;
|
||||
g_now.push_back(
|
||||
Token::PreOperator(
|
||||
LineLocation{pos: i, len: 1},
|
||||
match c {
|
||||
'^' => Operators::Power,
|
||||
'%' => Operators::Modulo,
|
||||
'*' => Operators::Multiply,
|
||||
'/' => Operators::Divide,
|
||||
'+' => Operators::Add,
|
||||
_ => panic!()
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Groups
|
||||
// Always one character
|
||||
// Group
|
||||
'(' => {
|
||||
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
|
||||
push_token(g_now, i, t)?; t = None;
|
||||
g.push(Token::PreGroup(LineLocation{pos: i, len: 0}, VecDeque::with_capacity(8)));
|
||||
},
|
||||
')' => {
|
||||
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
|
||||
push_token(g_now, i, t)?; t = None;
|
||||
let new_group: Token = g.pop().unwrap();
|
||||
|
||||
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
|
||||
|
@ -151,10 +246,10 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
|
|||
|
||||
// Space. Basic seperator.
|
||||
' ' => {
|
||||
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), i)); t = None; }
|
||||
push_token(g_now, i, t)?; t = None;
|
||||
}
|
||||
|
||||
// Invalid token
|
||||
// Invalid character
|
||||
_ => { return Err((LineLocation{pos: i, len: 1}, ParserError::InvalidChar)); }
|
||||
};
|
||||
}
|
||||
|
@ -164,7 +259,7 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
|
|||
Token::PreGroup(_, ref mut x) => x,
|
||||
_ => panic!()
|
||||
};
|
||||
if t.is_some() { g_now.push_back(update_line_location(t.unwrap(), input.len())); }
|
||||
push_token(g_now, input.len(), t)?;
|
||||
|
||||
if g.len() != 1 {
|
||||
let q: LineLocation = match g.last_mut().unwrap() {
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
use std::collections::VecDeque;
|
||||
|
||||
use crate::parser::Token;
|
||||
use crate::parser::LineLocation;
|
||||
use crate::parser::ParserError;
|
||||
use crate::parser::Operators;
|
||||
|
||||
|
||||
pub fn treeify(
|
||||
g: &mut Token,
|
||||
) -> Result<(), (LineLocation, ParserError)> {
|
||||
|
||||
let g_inner: &mut VecDeque<Token> = match g {
|
||||
Token::PreGroup(_, ref mut x) => x,
|
||||
_ => panic!()
|
||||
};
|
||||
let mut new: VecDeque<Token> = VecDeque::with_capacity(8);
|
||||
|
||||
let mut i = 1;
|
||||
while g_inner.len() > 1 {
|
||||
|
||||
let a: isize;
|
||||
if i == 1 {
|
||||
a = Operators::Null as isize;
|
||||
} else {
|
||||
let q: Operators = match g_inner[i-2] {
|
||||
Token::PreOperator(_, x) => x,
|
||||
_ => panic!()
|
||||
};
|
||||
a = q as isize;
|
||||
}
|
||||
|
||||
let b: isize = match g_inner[i] {
|
||||
Token::PreOperator(_, x) => x,
|
||||
_ => panic!()
|
||||
} as isize;
|
||||
|
||||
let c: isize;
|
||||
if i >= g_inner.len()-2 {
|
||||
c = Operators::Null as isize;
|
||||
} else {
|
||||
let q: Operators = match g_inner[i+2] {
|
||||
Token::PreOperator(_, x) => x,
|
||||
_ => panic!()
|
||||
};
|
||||
c = q as isize;
|
||||
}
|
||||
|
||||
println!("{}, {:?}", i, g_inner);
|
||||
if b >= a && b >= c {
|
||||
// This operator owns both its arguments.
|
||||
let left = g_inner.remove(i-1).unwrap();
|
||||
let this = g_inner.remove(i-1).unwrap();
|
||||
let right = g_inner.remove(i-1).unwrap();
|
||||
|
||||
let (l, k) = match this {
|
||||
Token::PreOperator(l, k) => (l, k),
|
||||
_ => panic!()
|
||||
};
|
||||
|
||||
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(3);
|
||||
new_token_args.push_back(left);
|
||||
new_token_args.push_back(right);
|
||||
|
||||
g_inner.insert(
|
||||
i-1,
|
||||
match k {
|
||||
Operators::Subtract => Token::Subtract(new_token_args),
|
||||
Operators::Add => Token::Add(new_token_args),
|
||||
Operators::Divide => Token::Divide(new_token_args),
|
||||
Operators::Multiply => Token::Multiply(new_token_args),
|
||||
Operators::ImplicitMultiply => Token::Multiply(new_token_args),
|
||||
Operators::Modulo => Token::Modulo(new_token_args),
|
||||
Operators::Power => Token::Power(new_token_args),
|
||||
Operators::ModuloLong => Token::Modulo(new_token_args),
|
||||
Operators::Negative => panic!(),
|
||||
Operators::Factorial => panic!(),
|
||||
Operators::Null => panic!()
|
||||
}
|
||||
);
|
||||
if i >= 3 { i -= 2; }
|
||||
} else {
|
||||
// This operator has lower precedence than another.
|
||||
// skip it for now.
|
||||
i += 2;
|
||||
}
|
||||
println!("{}", i);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
use crate::parser::Token;
|
||||
use crate::parser::ParserError;
|
||||
use crate::parser::LineLocation;
|
||||
|
||||
pub fn unwrap_groups(g: &mut Token) -> Result<(), (LineLocation, ParserError)> {
|
||||
|
||||
match g {
|
||||
// If g is a PreGroup, unwrap it
|
||||
Token::PreGroup(l, ref mut vec) => {
|
||||
if vec.len() != 1 {
|
||||
return Err((*l, ParserError::Syntax));
|
||||
}
|
||||
|
||||
let mut i = vec.pop_front().unwrap();
|
||||
unwrap_groups(&mut i)?;
|
||||
*g = i;
|
||||
},
|
||||
|
||||
// If g has sub-elements, recursive call
|
||||
Token::Multiply(ref mut vec) |
|
||||
Token::Divide(ref mut vec) |
|
||||
Token::Add(ref mut vec) |
|
||||
Token::Subtract(ref mut vec) |
|
||||
Token::Factorial(ref mut vec) |
|
||||
Token::Negative(ref mut vec) |
|
||||
Token::Power(ref mut vec) |
|
||||
Token::Modulo(ref mut vec) => {
|
||||
for i in vec.iter_mut() {
|
||||
unwrap_groups(i)?;
|
||||
}
|
||||
},
|
||||
|
||||
// Otherwise, skip g.
|
||||
_ => {}
|
||||
};
|
||||
|
||||
return Ok(());
|
||||
}
|
Loading…
Reference in New Issue