Restructured packages

This commit is contained in:
2023-06-11 13:53:45 -07:00
parent 813c1e7292
commit af2eb39fd5
25 changed files with 850 additions and 833 deletions

View File

@ -0,0 +1,96 @@
use std::collections::VecDeque;
use super::super::{
PreToken,
LineLocation
};
pub fn find_subs(
mut g: VecDeque<PreToken>,
) -> (
Vec<(LineLocation, String)>,
VecDeque<PreToken>
) {
// Array of replacements
let mut r: Vec<(LineLocation, String)> = Vec::with_capacity(8);
// New token array, with updated locations
let mut n: VecDeque<PreToken> = VecDeque::with_capacity(g.len());
let mut offset: usize = 0;
while g.len() > 0 {
// Read in reverse. Very important!
let mut t = g.pop_back().unwrap();
let target: Option<&str> = match &mut t {
PreToken::PreOperator(_, s) => {
let target = match &s[..] {
"*" => {Some("×")},
"/" => {Some("÷")},
"sqrt" => {Some("")},
"rt" => {Some("")},
_ => {None}
};
// Update token contents too.
// This makes sure that errors also contain the updated text.
if target.is_some() { *s = String::from(target.unwrap()); }
target
},
PreToken::PreWord(_, s) => {
let target = match &s[..] {
// Greek letters
"alpha" => {Some("α")},
"beta" => {Some("β")},
"gamma" => {Some("γ")},
"delta" => {Some("δ")},
"epsilon" => {Some("ε")},
"zeta" => {Some("ζ")},
"eta" => {Some("η")},
"theta" => {Some("θ")},
//"iota" => {Some("ι")},
//"kappa" => {Some("κ")},
"lambda" => {Some("λ")},
"mu" => {Some("μ")},
//"nu" => {Some("ν")},
"xi" => {Some("ξ")},
//"omicron" => {Some("ο")},
"pi" => {Some("π")},
"rho" => {Some("ρ")},
"sigma" => {Some("σ")},
"tau" => {Some("τ")},
//"upsilon" => {Some("υ")},
"phi" => {Some("φ")},
"chi" => {Some("χ")},
//"psi" => {Some("ψ")}, Conflict with pound / square inch
"omega" => {Some("ω")},
_ => {None}
};
if target.is_some() { *s = String::from(target.unwrap()); }
target
},
_ => {None}
};
if target.is_none() {
// Even if nothing changed, we need to update token location
let l = t.get_mut_line_location();
*l = LineLocation{pos: l.pos - offset, len: l.len};
} else {
let target = target.unwrap();
let l = t.get_mut_line_location();
r.push((l.clone(), String::from(target)));
*l = LineLocation{ pos: l.pos - offset, len: target.chars().count()};
offset += l.len - target.chars().count();
}
n.push_front(t);
}
return (r, n);
}

View File

@ -0,0 +1,263 @@
use std::collections::VecDeque;
use super::super::{
PreToken,
LineLocation,
ParserError,
Operator
};
fn lookback_signs(
g: &mut VecDeque<PreToken>
) -> Result<(), (LineLocation, ParserError)> {
// Convert `-` operators to `neg` operators
// Delete `+`s that mean "positive" instead of "add"
let mut i: usize = 0;
while i < g.len() {
if i == 0 {
let a: PreToken = g.remove(i).unwrap();
match &a {
PreToken::PreOperator(l,o)
=> {
if o == "-" {
g.insert(i, PreToken::PreOperator(*l, String::from("neg")));
} else if o == "+" {
continue; // We should not increment i if we remove a token
} else {g.insert(i, a);}
},
_ => { g.insert(i, a); }
};
} else {
let a: PreToken = g.remove(i-1).unwrap();
let b: PreToken = g.remove(i-1).unwrap();
match (&a, &b) {
(PreToken::PreOperator(_, sa), PreToken::PreOperator(l,sb))
=> {
if {
let o = Operator::from_string(sa);
o.is_some() &&
(
o.as_ref().unwrap().is_binary() ||
!o.as_ref().unwrap().is_left_associative()
)
} {
if sb == "-" {
g.insert(i-1, PreToken::PreOperator(*l, String::from("neg")));
g.insert(i-1, a);
} else if sb == "+" {
g.insert(i-1, a);
i -= 1; // g is now shorter, we don't need to advance i.
// This nullifies the i += 1 at the end of the loop.
} else { g.insert(i-1, b); g.insert(i-1, a); }
} else { g.insert(i-1, b); g.insert(i-1, a); }
},
_ => { g.insert(i-1, b); g.insert(i-1, a); }
}
}
i += 1;
}
// Delete consecutive `neg`s
let mut i: usize = 1;
while i < g.len() {
let a: PreToken = g.remove(i-1).unwrap();
let b: PreToken = g.remove(i-1).unwrap();
match (&a, &b) {
(PreToken::PreOperator(_,sa), PreToken::PreOperator(_,sb))
=> {
if !((sa == "neg") && (sb == "neg")) {
g.insert(i-1, b);
g.insert(i-1, a);
i += 1;
}
},
_ => {
g.insert(i-1, b);
g.insert(i-1, a);
i += 1;
}
}
}
return Ok(());
}
// Inserts implicit operators
fn lookback(
g: &mut VecDeque<PreToken>
) -> Result<(), (LineLocation, ParserError)> {
lookback_signs(g)?;
let mut i: usize = 0;
while i < g.len() {
if i >= 1 {
let a: PreToken = g.remove(i-1).unwrap();
let b: PreToken = g.remove(i-1).unwrap();
match (&a, &b) {
// Insert ImplicitMultiply
(PreToken::PreGroup(_,_), PreToken::PreGroup(l ,_))
| (PreToken::PreGroup(_,_), PreToken::PreQuantity(l,_))
| (PreToken::PreQuantity(_,_), PreToken::PreGroup(l,_))
| (PreToken::PreGroup(_,_), PreToken::PreWord(l,_))
| (PreToken::PreWord(_,_), PreToken::PreGroup(l,_))
| (PreToken::PreQuantity(_,_), PreToken::PreWord(l,_))
| (PreToken::PreWord(_,_), PreToken::PreQuantity(l,_))
| (PreToken::PreWord(_,_), PreToken::PreWord(l,_))
=> {
let loc = LineLocation{pos: l.pos-1, len: 0};
g.insert(i-1, b);
g.insert(i-1, PreToken::PreOperator(
loc,
String::from("i*")
));
g.insert(i-1, a);
},
// Insert implicit multiplications for right-unary operators
(PreToken::PreQuantity(_,_), PreToken::PreOperator(l,s))
| (PreToken::PreGroup(_,_), PreToken::PreOperator(l,s))
| (PreToken::PreWord(_,_), PreToken::PreOperator(l,s))
=> {
let o = Operator::from_string(s);
let loc = LineLocation{pos: l.pos-1, len: 0};
g.insert(i-1, b);
if o.is_some() {
let o = o.unwrap();
if (!o.is_binary()) && (!o.is_left_associative()) {
g.insert(i-1, PreToken::PreOperator(
loc,
String::from("i*")
));
}
}
g.insert(i-1, a);
},
// Insert implicit multiplications for left-unary operators.
(PreToken::PreOperator(_,s), PreToken::PreQuantity(l,_))
| (PreToken::PreOperator(_,s), PreToken::PreGroup(l,_))
| (PreToken::PreOperator(_,s), PreToken::PreWord(l,_))
=> {
let o = Operator::from_string(s);
let loc = LineLocation{pos: l.pos-1, len: 0};
g.insert(i-1, b);
if o.is_some() {
let o = o.unwrap();
if (!o.is_binary()) && o.is_left_associative() {
g.insert(i-1, PreToken::PreOperator(
loc,
String::from("i*")
));
}
}
g.insert(i-1, a);
},
// The following are syntax errors
(PreToken::PreQuantity(la,_), PreToken::PreQuantity(lb,_))
=> {
return Err((
LineLocation{pos: la.pos, len: lb.pos - la.pos + lb.len},
ParserError::Syntax
));
},
_ => {g.insert(i-1, b); g.insert(i-1, a);}
}
}
i += 1;
}
return Ok(());
}
pub fn groupify(
mut g: VecDeque<PreToken>
) -> Result<
PreToken,
(LineLocation, ParserError)
> {
// Vector of grouping levels
let mut levels: Vec<(LineLocation, VecDeque<PreToken>)> = Vec::with_capacity(8);
levels.push((LineLocation{pos: 0, len: 0}, VecDeque::with_capacity(8)));
// Makes sure parenthesis are matched
let mut i_level = 0;
while g.len() > 0 {
let t = g.pop_front().unwrap();
let (l_now, v_now) = levels.last_mut().unwrap();
match t {
PreToken::PreGroupStart(l) => {
levels.push((l, VecDeque::with_capacity(8)));
i_level += 1;
},
PreToken::PreGroupEnd(l) => {
let l = LineLocation {
pos: l_now.pos,
len: l.len + l.pos - l_now.pos
};
if i_level == 0 { return Err((l, ParserError::ExtraCloseParen)) }
if v_now.len() == 0 { return Err((l, ParserError::EmptyGroup)) }
i_level -= 1;
let (_, mut v) = levels.pop().unwrap();
let (_, v_now) = levels.last_mut().unwrap();
lookback(&mut v)?;
v_now.push_back(PreToken::PreGroup(l, v));
},
_ => {
v_now.push_back(t);
}
}
}
/*
// Error on missing parenthesis
if levels.len() != 1 {
let (l, _) = levels.pop().unwrap();
return Err((l, ParserError::MissingCloseParen))
}
*/
// Auto-close parenthesis
while levels.len() != 1 {
let (l, mut v) = levels.pop().unwrap();
let (_, v_now) = levels.last_mut().unwrap();
if v.len() == 0 { return Err((l, ParserError::EmptyGroup)) }
lookback(&mut v)?;
v_now.push_back(PreToken::PreGroup(l, v));
}
let (_, mut v) = levels.pop().unwrap();
lookback(&mut v)?;
return Ok(PreToken::PreGroup(LineLocation{pos:0, len:0}, v));
}

11
src/parser/stage/mod.rs Normal file
View File

@ -0,0 +1,11 @@
mod tokenize;
mod find_subs;
mod groupify;
mod treeify;
pub (in super) use self::{
tokenize::tokenize,
find_subs::find_subs,
groupify::groupify,
treeify::treeify,
};

View File

@ -0,0 +1,183 @@
use std::collections::VecDeque;
use super::super::{
PreToken,
LineLocation,
Operator
};
// Called whenever a token is finished.
#[inline(always)]
fn push_token(g: &mut VecDeque<PreToken>, t: Option<PreToken>, stop_i: usize) {
if t.is_none() { return }
let mut t = t.unwrap();
match t {
PreToken::PreGroupStart(ref mut l)
| PreToken::PreGroupEnd(ref mut l)
| PreToken::PreOperator(ref mut l, _)
| PreToken::PreQuantity(ref mut l, _)
| PreToken::PreWord(ref mut l, _)
=> {
*l = LineLocation{
pos: l.pos,
len: stop_i - l.pos,
};
},
PreToken::PreGroup(_,_)
| PreToken::Container(_)
=> panic!()
};
// `2e` isn't exponential notation, it's 2*e.
// If a number ends in `e`, disconnect the `e` and make it a word.
if let PreToken::PreQuantity(l, s) = &t {
let last = &s[s.len()-1..];
if last == "e" {
g.push_back(PreToken::PreQuantity(
LineLocation { pos: l.pos, len: l.len-1 },
String::from(&s[0..s.len()-1])
));
g.push_back(PreToken::PreWord(
LineLocation { pos: l.pos + l.len - 1, len: 1 },
String::from("e")
));
return;
}
}
if let PreToken::PreWord(l, s) = &t {
let o = Operator::from_string(s);
if o.is_some() {
t = PreToken::PreOperator(*l, s.clone());
}
}
g.push_back(t);
}
/// Turns a string into Tokens. First stage of parsing.
pub fn tokenize(input: &String) -> VecDeque<PreToken> {
let mut t: Option<PreToken> = None; // The current token we're reading
let mut g: VecDeque<PreToken> = VecDeque::with_capacity(32);
for (i, c) in input.chars().enumerate() {
match c {
// Number
// Commas act just like dots.
',' | '.' | '0'..='9' => {
match &mut t {
// If we're already building a number,
// append.
Some(PreToken::PreQuantity(_, val)) => {
val.push(if c == ',' {'.'} else {c});
},
// If we're not building a number, finalize
// previous token and start one.
_ => {
push_token(&mut g, t, i);
t = Some(PreToken::PreQuantity(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
},
// 'e' needs special treatment.
// Can be both a word or a number.
'e' => {
match &mut t {
Some(PreToken::PreWord(_, val)) => { val.push(c); },
Some(PreToken::PreQuantity(_, val)) => { val.push(c); },
_ => {
push_token(&mut g, t, i);
t = Some(PreToken::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
}
// The minus sign also needs special treatment.
// It can be the `neg` operator, the `minus` operator,
// or it can specify a negative exponent.
'-' | '+' => {
match &mut t {
Some(PreToken::PreQuantity(_, val)) => {
if &val[val.len()-1..] == "e" {
// If the current number ends in an `e`,
// this negative specifies a negative exponent
// like 2e-2 = 0.02.
val.push(c);
} else {
// Otherwise, end the number.
// We probably have a subtraction.
push_token(&mut g, t, i);
t = Some(PreToken::PreOperator(
LineLocation{pos: i, len: 1},
String::from(c)
));
}
},
// This may be a negative or a subtraction
_ => {
push_token(&mut g, t, i);
t = Some(PreToken::PreOperator(
LineLocation{pos: i, len: 1},
String::from(c)
));
}
};
},
// Operator
'*'|'×'|'/'|'÷'|
'^'|'!'|'%'
=> {
match &mut t {
Some(PreToken::PreOperator(_, val)) => { val.push(c); },
_ => {
push_token(&mut g, t, i);
t = Some(PreToken::PreOperator(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
},
// Group
'(' => {
push_token(&mut g, t, i);
t = Some(PreToken::PreGroupStart(LineLocation{pos: i, len: 0}));
},
')' => {
push_token(&mut g, t, i);
t = Some(PreToken::PreGroupEnd(LineLocation{pos: i, len: 0}));
},
// Space. Basic seperator.
' ' => {
push_token(&mut g, t, i);
t = None;
}
// Word
_ => {
match &mut t {
Some(PreToken::PreWord(_, val)) => { val.push(c); },
_ => {
push_token(&mut g, t, i);
t = Some(PreToken::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
}
};
}
push_token(&mut g, t, input.chars().count());
return g;
}

350
src/parser/stage/treeify.rs Normal file
View File

@ -0,0 +1,350 @@
use std::collections::VecDeque;
use super::super::{
PreToken,
ParserError,
LineLocation,
Token,
Operator
};
fn treeify_binary(
i: usize,
g_inner: &mut VecDeque<PreToken>
) -> Result<bool, (LineLocation, ParserError)> {
let this: &PreToken = &g_inner[i];
if i == 0 {
// This binary operator is at the end of an expression.
let l = match this {
PreToken::PreOperator(l, _) => l,
_ => panic!()
};
return Err((*l, ParserError::Syntax));
}
let left = {
if i > 0 {
&g_inner[i-1]
} else {
let l = match this {
PreToken::PreOperator(l, _) => l,
_ => panic!()
};
return Err((*l, ParserError::Syntax));
}
};
let right = {
if i < g_inner.len()-1 {
&g_inner[i+1]
} else {
let l = match this {
PreToken::PreOperator(l, _) => l,
_ => panic!()
};
return Err((*l, ParserError::Syntax));
}
};
if let PreToken::PreOperator(l, s) = left {
let o = Operator::from_string(s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
let o = o.unwrap();
if {
(!o.is_binary()) &&
o.is_left_associative()
} {
return Ok(false);
} else {
let tl = *this.get_line_location();
return Err((
LineLocation{pos: l.pos, len: tl.pos - l.pos + tl.len},
ParserError::Syntax
));
}
}
if let PreToken::PreOperator(l, s) = right {
let o = Operator::from_string(s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
let o = o.unwrap();
if {
(!o.is_binary()) &&
!o.is_left_associative()
} {
return Ok(false);
} else {
let tl = *this.get_line_location();
return Err((
LineLocation{pos: tl.pos, len: l.pos - tl.pos + l.len},
ParserError::Syntax
));
}
}
// This operator
let this_op = {
let PreToken::PreOperator(l, s) = this else {panic!()};
let o = Operator::from_string(s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
o.unwrap()
};
// The operators contesting our arguments
let left_op = if i > 1 {
let PreToken::PreOperator(l, s) = &g_inner[i-2] else {panic!()};
let o = Operator::from_string(s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
Some(o.unwrap())
} else { None };
let right_op = if i < g_inner.len()-2 {
let PreToken::PreOperator(l, s) = &g_inner[i+2] else {panic!()};
let o = Operator::from_string(s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
Some(o.unwrap())
} else { None };
if {
(left_op.is_none() || this_op >= left_op.unwrap()) &&
(right_op.is_none() || this_op >= right_op.unwrap())
} {
// This operator has higher precedence, it takes both arguments
let left_pre = g_inner.remove(i-1).unwrap();
let this_pre = g_inner.remove(i-1).unwrap();
let right_pre = g_inner.remove(i-1).unwrap();
let left: Token; let right: Token;
if let PreToken::PreGroup(_, _) = right_pre { right = treeify(right_pre)?; } else {right = right_pre.to_token()?;}
if let PreToken::PreGroup(_, _) = left_pre { left = treeify(left_pre)?; } else {left = left_pre.to_token()?;}
let o = {
let PreToken::PreOperator(_, s) = this_pre else {panic!()};
let o = Operator::from_string(&s);
if o.is_none() { panic!() }
o.unwrap()
};
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(2);
new_token_args.push_back(left);
new_token_args.push_back(right);
g_inner.insert(i-1, PreToken::Container(o.into_token(new_token_args)));
return Ok(true);
} else {
return Ok(false);
};
}
fn treeify_unary(
i: usize,
g_inner: &mut VecDeque<PreToken>,
left_associative: bool
) -> Result<bool, (LineLocation, ParserError)> {
let this: &PreToken = &g_inner[i];
let next: &PreToken;
if left_associative {
next = {
if i > 0 {
&g_inner[i-1]
} else {
let l = match this {
PreToken::PreOperator(l, _) => l,
_ => panic!()
};
return Err((*l, ParserError::Syntax));
}
};
} else {
next = {
if i < g_inner.len()-1 {
&g_inner[i+1]
} else {
let l = match this {
PreToken::PreOperator(l, _) => l,
_ => panic!()
};
return Err((*l, ParserError::Syntax));
}
};
}
// We need to check the element after unary operators too.
// Bad syntax like `3!3` won't be caught otherwise.
let prev: Option<&PreToken>;
if left_associative {
prev = if i < g_inner.len()-1 { Some(&g_inner[i+1]) } else {None};
} else {
prev = if i > 0 { Some(&g_inner[i-1]) } else {None};
}
if prev.is_some() {
if let PreToken::PreOperator(_,_) = prev.unwrap() {
} else {
return Err((
*this.get_line_location(),
ParserError::Syntax
));
}
}
if let PreToken::PreOperator(l, _) = next {
let tl = *this.get_line_location();
return Err((
LineLocation{pos: tl.pos, len: l.pos - tl.pos + l.len},
ParserError::Syntax
));
} else {
// This operator
let this_op = {
let PreToken::PreOperator(l, s) = this else {panic!()};
let o = Operator::from_string(s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
o.unwrap()
};
// The operator contesting our argument
let next_op = if left_associative {
if i > 1 {
let PreToken::PreOperator(l, s) = &g_inner[i-2] else {panic!()};
let o = Operator::from_string(s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
Some(o.unwrap())
} else { None }
} else {
if i < g_inner.len()-2 {
let PreToken::PreOperator(l, s) = &g_inner[i+2] else {panic!()};
let o = Operator::from_string(s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
Some(o.unwrap())
} else { None }
};
if next_op.is_none() || this_op > next_op.unwrap() {
let this_pre = g_inner.remove(i).unwrap();
let next_pre: PreToken; let next: Token;
if left_associative {
next_pre = g_inner.remove(i-1).unwrap();
} else {
next_pre = g_inner.remove(i).unwrap();
}
if let PreToken::PreGroup(_, _) = next_pre { next = treeify(next_pre)?; } else { next = next_pre.to_token()? }
let o = {
let PreToken::PreOperator(_, s) = this_pre else {panic!()};
let o = Operator::from_string(&s);
if o.is_none() { panic!() }
o.unwrap()
};
let mut new_token_args: VecDeque<Token> = VecDeque::with_capacity(3);
new_token_args.push_back(next);
if left_associative {
g_inner.insert(i-1, PreToken::Container(o.into_token(new_token_args)));
} else {
g_inner.insert(i, PreToken::Container(o.into_token(new_token_args)));
}
return Ok(true);
} else {
// The operator to the right has higher precedence.
// Move on, don't to anything yet.
return Ok(false);
};
};
}
pub fn treeify(
mut g: PreToken,
) -> Result<Token, (LineLocation, ParserError)> {
let g_inner: &mut VecDeque<PreToken> = match g {
PreToken::PreGroup(_, ref mut x) => x,
_ => panic!()
};
let mut left_associative = true;
let mut j: i64 = 0;
while g_inner.len() > 1 {
if j <= -1 {
left_associative = true;
j = 0;
} else if j >= g_inner.len() as i64 {
left_associative = false;
j = (g_inner.len() - 1) as i64;
}
let i = j as usize;
// Convert preoperators
// If not an operator, move on.
let this_op = match &g_inner[i] {
PreToken::PreOperator(l, s) => {
let o = Operator::from_string(&s);
if o.is_none() { return Err((*l, ParserError::Syntax)); }
o.unwrap()
},
_ => {
if left_associative { j += 1 } else { j -= 1 };
continue;
}
};
if left_associative {
let mut changed = false;
if this_op.is_left_associative() {
if this_op.is_binary() {
changed = treeify_binary(i, g_inner)?;
} else {
changed = treeify_unary(i, g_inner, left_associative)?;
}
}
// We only need to change j if we don't treeify.
// If the array length changes, j will point to the next
// element automatically.
if !changed { j += 1; }
} else {
if !this_op.is_left_associative() {
if this_op.is_binary() {
treeify_binary(i, g_inner)?;
} else {
treeify_unary(i, g_inner, left_associative)?;
}
}
j -= 1
}
}
let g = g_inner.pop_front().unwrap();
return match g {
// Catch edge cases
PreToken::PreOperator(l, _) => {
Err((l, ParserError::Syntax))
},
PreToken::PreGroup(_,_) => {
treeify(g)
},
_ => { Ok(g.to_token()?) }
};
}