Reworked parser, added support for unary operators

pull/2/head
Mark 2023-03-22 22:51:48 -07:00
parent 9f53847ed3
commit c743d28f6c
Signed by: Mark
GPG Key ID: AD62BB059C2AAEE4
3 changed files with 343 additions and 84 deletions

View File

@ -30,16 +30,13 @@ pub enum Token {
/// PreGroups aren't needed once we have a tree. /// PreGroups aren't needed once we have a tree.
PreGroup(LineLocation, VecDeque<Token>), PreGroup(LineLocation, VecDeque<Token>),
PreNegative(LineLocation),
PreFactorial(LineLocation),
Number(LineLocation, f64), Number(LineLocation, f64),
Multiply(VecDeque<Token>), Multiply(VecDeque<Token>),
Divide(VecDeque<Token>), Divide(VecDeque<Token>),
Add(VecDeque<Token>), Add(VecDeque<Token>),
Subtract(VecDeque<Token>), Subtract(VecDeque<Token>),
//Factorial(VecDeque<Token>), Factorial(VecDeque<Token>),
//Negative(VecDeque<Token>), Negative(VecDeque<Token>),
Power(VecDeque<Token>), Power(VecDeque<Token>),
Modulo(VecDeque<Token>), Modulo(VecDeque<Token>),
} }
@ -50,15 +47,17 @@ pub enum Token {
#[derive(Debug)] #[derive(Debug)]
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub enum Operators { pub enum Operators {
Null = 0, ModuloLong = 0, // Mod invoked with "mod"
ModuloLong, // Mod invoked with "mod"
Subtract, Subtract,
Add, Add,
Divide, Divide,
Multiply, Multiply,
ImplicitMultiply, ImplicitMultiply,
Modulo, // Mod invoked with % Modulo, // Mod invoked with %
Power Power,
Negative,
Factorial
} }
/// Specifies the location of a token in an input string. /// Specifies the location of a token in an input string.

View File

@ -13,9 +13,7 @@ fn update_line_location(mut t: Token, stop_i: usize) -> Token {
Token::PreGroup(ref mut l, _) | Token::PreGroup(ref mut l, _) |
Token::PreOperator(ref mut l, _) | Token::PreOperator(ref mut l, _) |
Token::PreNumber(ref mut l, _) | Token::PreNumber(ref mut l, _) |
Token::PreWord(ref mut l, _) | Token::PreWord(ref mut l, _)
Token::PreNegative(ref mut l) |
Token::PreFactorial(ref mut l)
=> { => {
let LineLocation{pos, .. } = l; let LineLocation{pos, .. } = l;
*l = LineLocation{ *l = LineLocation{
@ -45,12 +43,6 @@ fn lookback(
let a: Token = g.pop_back().unwrap(); let a: Token = g.pop_back().unwrap();
match (&a, &b) { match (&a, &b) {
( // Delete consecutive negatives
Token::PreNegative(_),
Token::PreNegative(_)
) => {},
// Insert ImplicitMultiply // Insert ImplicitMultiply
(Token::PreGroup(_,_), Token::PreGroup(l ,_)) | (Token::PreGroup(_,_), Token::PreGroup(l ,_)) |
(Token::PreGroup(_,_), Token::Number(l,_)) | (Token::PreGroup(_,_), Token::Number(l,_)) |
@ -66,11 +58,7 @@ fn lookback(
}, },
// The following are syntax errors // The following are syntax errors
(Token::PreOperator(la,_), Token::PreOperator(lb,_)) | (Token::Number(la, _), Token::Number(lb,_))
(Token::Number(la, _), Token::Number(lb,_)) |
(Token::PreNegative(la), Token::PreOperator(lb,_)) |
(Token::PreOperator(la, _), Token::PreFactorial(lb)) |
(Token::PreNegative(la), Token::PreFactorial(lb))
=> { => {
let LineLocation { pos: posa, .. } = *la; let LineLocation { pos: posa, .. } = *la;
let LineLocation { pos: posb, len: lenb } = *lb; let LineLocation { pos: posb, len: lenb } = *lb;
@ -81,15 +69,11 @@ fn lookback(
} }
// The following are fine // The following are fine
(Token::PreOperator(_,_), Token::PreNegative(_)) | (Token::PreOperator(_,_), Token::PreOperator(_,_)) |
(Token::PreOperator(_,_), Token::Number(_,_)) | (Token::PreOperator(_,_), Token::Number(_,_)) |
(Token::Number(_,_), Token::PreOperator(_,_)) | (Token::Number(_,_), Token::PreOperator(_,_)) |
(Token::PreOperator(_,_), Token::PreGroup(_,_)) | (Token::PreOperator(_,_), Token::PreGroup(_,_)) |
(Token::PreGroup(_,_), Token::PreOperator(_,_)) | (Token::PreGroup(_,_), Token::PreOperator(_,_))
(Token::PreNegative(_), Token::PreGroup(_,_)) |
(Token::PreNegative(_), Token::Number(_,_)) |
(Token::PreGroup(_,_), Token::PreFactorial(_)) |
(Token::Number(_,_), Token::PreFactorial(_))
=> { g.push_back(a); g.push_back(b); }, => { g.push_back(a); g.push_back(b); },
// If we get this far, we found a Token // If we get this far, we found a Token
@ -131,8 +115,6 @@ fn push_token(
}, },
Token::PreOperator(_, _) => t, Token::PreOperator(_, _) => t,
Token::PreGroup(_, _) => t, Token::PreGroup(_, _) => t,
Token::PreNegative(_) => t,
Token::PreFactorial(_) => t,
_ => panic!() _ => panic!()
}); });
@ -160,7 +142,10 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
match c { match c {
'!' => { '!' => {
push_token(g_now, i, t)?; push_token(g_now, i, t)?;
t = Some(Token::PreFactorial(LineLocation{pos: i, len: 1})); t = Some(Token::PreOperator(
LineLocation{pos: i, len: 1},
Operators::Factorial
));
}, },
// The minus sign can be both a Negative and an Operator. // The minus sign can be both a Negative and an Operator.
@ -181,7 +166,10 @@ pub fn tokenize(input: &String) -> Result<Token, (LineLocation, ParserError)> {
// Otherwise, this is a negative sign. // Otherwise, this is a negative sign.
_ => { _ => {
t = Some(Token::PreNegative(LineLocation{pos: i, len: 1})); t = Some(Token::PreOperator(
LineLocation{pos: i, len: 1},
Operators::Negative
));
} }
}; };
}, },

View File

@ -5,49 +5,103 @@ use crate::parser::LineLocation;
use crate::parser::ParserError; use crate::parser::ParserError;
use crate::parser::Operators; use crate::parser::Operators;
#[inline(always)]
pub fn treeify( fn get_line_location(t: &Token) -> &LineLocation {
g: &mut Token, match t {
) -> Result<(), (LineLocation, ParserError)> { Token::PreNumber(l, _) |
Token::PreWord(l, _) |
let g_inner: &mut VecDeque<Token> = match g { Token::PreOperator(l, _) |
Token::PreGroup(_, ref mut x) => x, Token::PreGroup(l, _)
=> l,
_ => panic!() _ => panic!()
}
}
#[inline(always)]
fn select_op(k: Operators, new_token_args: VecDeque<Token>) -> Token {
match k {
Operators::Subtract => Token::Subtract(new_token_args),
Operators::Add => Token::Add(new_token_args),
Operators::Divide => Token::Divide(new_token_args),
Operators::Multiply => Token::Multiply(new_token_args),
Operators::ImplicitMultiply => Token::Multiply(new_token_args),
Operators::Modulo => Token::Modulo(new_token_args),
Operators::ModuloLong => Token::Modulo(new_token_args),
Operators::Power => Token::Power(new_token_args),
Operators::Negative => Token::Negative(new_token_args),
Operators::Factorial => Token::Factorial(new_token_args)
}
}
fn treeify_binary(
mut i: usize,
g_inner: &mut VecDeque<Token>
) -> Result<usize, (LineLocation, ParserError)> {
let this: &Token = &g_inner[i];
let right: &Token = {
if i < g_inner.len()-1 {
&g_inner[i+1]
} else {
// This binary operator is at the end of the expression.
let l = match this {
Token::PreOperator(l, _) => l,
_ => panic!()
};
return Err((*l, ParserError::Syntax));
}
}; };
let mut i = 1;
while g_inner.len() > 1 {
let a: isize; if let Token::PreOperator(l, o) = right {
if i == 1 { match o {
a = Operators::Null as isize; // Binary operators
} else { Operators::ModuloLong |
let q: Operators = match g_inner[i-2] { Operators::Subtract |
Token::PreOperator(_, x) => x, Operators::Add |
_ => panic!() Operators::Divide |
}; Operators::Multiply |
a = q as isize; Operators::ImplicitMultiply |
} Operators::Modulo |
Operators::Power |
// Right unary operators
Operators::Factorial
=> {
// Binary and right-unary operators cannot
// follow a binary operator.
let LineLocation { pos: posa, .. } = *get_line_location(&this);
let LineLocation { pos: posb, len: lenb } = *l;
return Err((
LineLocation{pos: posa, len: posb - posa + lenb},
ParserError::Syntax
));
},
let b: isize = match g_inner[i] { // Left unary operators
Token::PreOperator(_, x) => x, Operators::Negative => {
i += 1;
return Ok(i);
}
};
} else {
// Precedence of this operator
let this_val: isize = match this {
Token::PreOperator(_, q) => *q as isize,
_ => panic!() _ => panic!()
} as isize; };
let c: isize; // Precedence of the operator contesting the right argument.
if i >= g_inner.len()-2 { let right_val = if i < g_inner.len()-2 {
c = Operators::Null as isize; match &g_inner[i+2] {
} else { Token::PreOperator(_, q) => Some(*q as isize),
let q: Operators = match g_inner[i+2] {
Token::PreOperator(_, x) => x,
_ => panic!() _ => panic!()
}; }
c = q as isize; } else { None };
}
println!("{}, {:?}", i, g_inner);
if b >= a && b >= c { if right_val.is_none() || this_val > right_val.unwrap() {
// This operator owns both its arguments. // This operator has higher precedence, it takes both arguments
let left = g_inner.remove(i-1).unwrap(); let left = g_inner.remove(i-1).unwrap();
let this = g_inner.remove(i-1).unwrap(); let this = g_inner.remove(i-1).unwrap();
let right = g_inner.remove(i-1).unwrap(); let right = g_inner.remove(i-1).unwrap();
@ -61,27 +115,245 @@ pub fn treeify(
new_token_args.push_back(left); new_token_args.push_back(left);
new_token_args.push_back(right); new_token_args.push_back(right);
g_inner.insert( g_inner.insert(i-1, select_op(k, new_token_args));
i-1,
match k { if i > 1 { i -= 2; } else { i = 0; }
Operators::Subtract => Token::Subtract(new_token_args), return Ok(i);
Operators::Add => Token::Add(new_token_args),
Operators::Divide => Token::Divide(new_token_args),
Operators::Multiply => Token::Multiply(new_token_args),
Operators::ImplicitMultiply => Token::Multiply(new_token_args),
Operators::Modulo => Token::Modulo(new_token_args),
Operators::ModuloLong => Token::Modulo(new_token_args),
Operators::Power => Token::Power(new_token_args),
Operators::Null => panic!()
}
);
if i >= 3 { i -= 2; }
} else { } else {
// This operator has lower precedence than another. // The operator to the right has higher precedence.
// skip it for now. // Move on, don't to anything yet.
i += 2; i += 2;
return Ok(i);
};
};
}
fn treeify_unaryleft(
mut i: usize,
g_inner: &mut VecDeque<Token>
) -> Result<usize, (LineLocation, ParserError)> {
let this: &Token = &g_inner[i];
let right: &Token = {
if i < g_inner.len()-1 {
&g_inner[i+1]
} else {
let l = match this {
Token::PreOperator(l, _) => l,
_ => panic!()
};
return Err((*l, ParserError::Syntax));
}
};
if let Token::PreOperator(l, o) = right {
match o {
// Binary operators
Operators::ModuloLong |
Operators::Subtract |
Operators::Add |
Operators::Divide |
Operators::Multiply |
Operators::ImplicitMultiply |
Operators::Modulo |
Operators::Power |
// Right unary operators
Operators::Factorial
=> {
// Binary and right-unary operators cannot
// follow a binary operator.
let LineLocation { pos: posa, .. } = *get_line_location(&this);
let LineLocation { pos: posb, len: lenb } = *l;
return Err((
LineLocation{pos: posa, len: posb - posa + lenb},
ParserError::Syntax
));
},
// Left unary operators
Operators::Negative => {
i += 1;
return Ok(i);
}
};
} else {
// Precedence of this operator
let this_val: isize = match this {
Token::PreOperator(_, q) => *q as isize,
_ => panic!()
};
// Precedence of the operator contesting its argument
let right_val = if i < g_inner.len()-2 {
match &g_inner[i+2] {
Token::PreOperator(_, q) => Some(*q as isize),
_ => panic!()
}
} else { None };
if right_val.is_none() || this_val > right_val.unwrap() {
let this = g_inner.remove(i).unwrap();
let right = g_inner.remove(i).unwrap();
let k = match this {
Token::PreOperator(_, k) => k,
_ => panic!()
};
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(3);
new_token_args.push_back(right);
g_inner.insert(i, select_op(k, new_token_args));
if i > 0 { i -= 1; } else { i = 0; }
return Ok(i);
} else {
// The operator to the right has higher precedence.
// Move on, don't to anything yet.
i += 2;
return Ok(i);
};
};
}
fn treeify_unaryright(
mut i: usize,
g_inner: &mut VecDeque<Token>
) -> Result<usize, (LineLocation, ParserError)> {
let this: &Token = &g_inner[i];
let left: &Token = {
if i > 0 {
&g_inner[i-1]
} else {
let l = match this {
Token::PreOperator(l, _) => l,
_ => panic!()
};
return Err((*l, ParserError::Syntax));
}
};
// We need to check the element after unary right operators too.
// Bad syntax like `3!3` won't be caught otherwise.
let right: Option<&Token> = {
if i < g_inner.len()-1 {
Some(&g_inner[i+1])
} else {None}
};
if right.is_some() {
if let Token::PreOperator(l, o) = right.unwrap() {
match o {
// Left unary operators
Operators::Negative => {
let LineLocation { pos: posa, .. } = *get_line_location(&this);
let LineLocation { pos: posb, len: lenb } = *l;
return Err((
LineLocation{pos: posa, len: posb - posa + lenb},
ParserError::Syntax
));
},
_ => {},
};
} else {
return Err((
*get_line_location(&this),
ParserError::Syntax
));
} }
println!("{}", i);
} }
if let Token::PreOperator(l, _) = left {
let LineLocation { pos: posa, .. } = *get_line_location(&this);
let LineLocation { pos: posb, len: lenb } = *l;
return Err((
LineLocation{pos: posa, len: posb - posa + lenb},
ParserError::Syntax
));
} else {
// Precedence of this operator
let this_val: isize = match this {
Token::PreOperator(_, q) => *q as isize,
_ => panic!()
};
// Precedence of the operator contesting its argument.
let left_val = if i >= 2 {
match &g_inner[i-2] {
Token::PreOperator(_, q) => Some(*q as isize),
_ => panic!()
}
} else { None };
if left_val.is_none() || this_val > left_val.unwrap() {
let this = g_inner.remove(i).unwrap();
let left = g_inner.remove(i-1).unwrap();
let k = match this {
Token::PreOperator(_, k) => k,
_ => panic!()
};
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(3);
new_token_args.push_back(left);
g_inner.insert(i-1, select_op(k, new_token_args));
if i > 2 { i -= 2; } else { i = 0; }
return Ok(i);
} else {
// The operator to the right has higher precedence.
// Move on, don't to anything yet.
i += 1;
return Ok(i);
};
};
}
pub fn treeify(
g: &mut Token,
) -> Result<(), (LineLocation, ParserError)> {
let g_inner: &mut VecDeque<Token> = match g {
Token::PreGroup(_, ref mut x) => x,
_ => panic!()
};
let mut i: usize = 0;
while g_inner.len() > 1 {
let this_op = match &g_inner[i] {
Token::PreOperator(_, o) => o,
_ => { i+=1; continue; }
};
match this_op {
Operators::ModuloLong |
Operators::Subtract |
Operators::Add |
Operators::Divide |
Operators::Multiply |
Operators::ImplicitMultiply |
Operators::Modulo |
Operators::Power
=> { i = treeify_binary(i, g_inner)?; },
Operators::Negative
=> { i = treeify_unaryleft(i, g_inner)?; },
Operators::Factorial
=> { i = treeify_unaryright(i, g_inner)?; }
};
}
return Ok(()); return Ok(());
} }