Updated parser

pull/2/head
Mark 2023-03-19 17:46:30 -07:00
parent 883401cf66
commit 79be72903f
Signed by: Mark
GPG Key ID: AD62BB059C2AAEE4
2 changed files with 174 additions and 25 deletions

View File

@ -2,6 +2,7 @@ use std::io;
use std::io::Write;
//use std::io::Read;
use std::sync::Arc;
use std::collections::VecDeque;
use std::sync::atomic::{AtomicBool, Ordering};
use termcolor::{
@ -59,6 +60,75 @@ fn prompt(
Ok(())
}
fn treefold(
mut exp: tokenize::Token, // Must be a group
check: fn(&tokenize::Token) -> bool,
op_type: u8,
new_token: fn(VecDeque<tokenize::Token>) -> tokenize::Token,
) -> Result<tokenize::Token, ()> {
// Groups to process
let mut t_vec: VecDeque<&mut tokenize::Token> = VecDeque::with_capacity(32);
t_vec.push_back(&mut exp);
let mut out: Option<tokenize::Token> = None;
while t_vec.len() > 0 {
// The group we're currently working with
let g: &mut tokenize::Token = t_vec.pop_front().unwrap();
let g_inner: &mut VecDeque<tokenize::Token> = match g {
tokenize::Token::Group(ref mut x) => x,
_ => panic!()
};
let mut new: VecDeque<tokenize::Token> = VecDeque::with_capacity(8);
// Build new group array
while g_inner.len() > 0 {
let t: tokenize::Token = match g_inner.pop_front() {
Some(o) => o,
None => break
};
if check(&t) {
match op_type {
0 => {},
1 => {},
2 => {
let last: tokenize::Token = new.pop_back().unwrap();
let next: tokenize::Token = g_inner.pop_front().unwrap().clone();
let mut new_token_args: VecDeque<tokenize::Token> = VecDeque::with_capacity(2);
new_token_args.push_back(last);
new_token_args.push_back(next);
new.push_back(new_token(new_token_args));
},
_ => panic!()
};
} else {
new.push_back(t.clone());
}
}
*g_inner = new;
}
return Ok(exp);
}
fn is_mult(t: &tokenize::Token) -> bool {
match t {
tokenize::Token::Operator(s) => {s == "*"},
_ => false
}
}
fn new_mult(v: VecDeque<tokenize::Token>) -> tokenize::Token {
tokenize::Token::Mult(v)
}
fn main() -> Result<(), std::io::Error> {
let mut stdout = StandardStream::stdout(ColorChoice::Always);
@ -78,7 +148,7 @@ fn main() -> Result<(), std::io::Error> {
// Tokenize input.
// Fail if we encounter invalid characters.
let tokens = match tokenize::tokenize(&input) {
let exp = match tokenize::tokenize(&input) {
Ok(v) => v,
Err(_) => {
continue;
@ -91,8 +161,67 @@ fn main() -> Result<(), std::io::Error> {
stdout.reset()?;
write!(stdout, "Got {input}\n\n\n")?;
writeln!(stdout, "Tokenized: {tokens:#?}")?;
//writeln!(stdout, "Tokenized: {exp:#?}")?;
let q = treefold(
exp,
is_mult,
2,
new_mult
);
writeln!(stdout, "{q:#?}")?;
/*
// Groups to process
let mut t_vec: VecDeque<tokenize::Token> = VecDeque::with_capacity(32);
t_vec.push_back(exp);
while t_vec.len() > 0 {
let g: tokenize::Token = t_vec.pop_front().unwrap();
let mut g_inner: Vec<tokenize::Token> = match g {
tokenize::Token::Group(x) => x,
_ => panic!()
};
let mut new: Vec<tokenize::Token> = Vec::with_capacity(8);
// Parse binary operators
for o in ["*", "/", "+", "-"] {
let mut i = g_inner.iter();
loop {
let t = match i.next() {
Some(o) => o,
None => break
};
match t {
tokenize::Token::Operator(s) => {
if s == o {
let last = new.pop().unwrap();
let next = i.next().unwrap();
new.push(tokenize::Token::Op(
String::from(s),
Box::new(last.clone()),
Box::new(next.clone())
))
} else {
new.push(t.clone());
}
},
_ => {
new.push(t.clone());
}
}
}
g_inner = new.clone();
new = Vec::with_capacity(8);
}
writeln!(stdout, "{:?}", g_inner)?;
}
*/
}
writeln!(stdout, "Exiting.")?;

View File

@ -1,13 +1,26 @@
use std::collections::VecDeque;
#[derive(Debug)]
#[derive(Clone)]
pub enum Token {
// Only used after tokenizing
Negative,
Factorial,
Group(VecDeque<Token>), // Will be expanded during tree folding
Operator(String), // Will become Ops during tree folding
// Used in both.
Number(String),
Operator(String),
Word(String),
Group(Vec<Token>),
// Only used in tree
Op(String, Box<Token>, Box<Token>),
Mult(VecDeque<Token>)
}
/// Turn a string into a set of tokens.
/// Does not check syntax. Fails if `input` contains an invalid character.
//
@ -18,33 +31,40 @@ pub enum Token {
// * `Ok(Vec<token>)` if we were successful.
// * `Err(())` if we couldn't tokenize this string.
pub fn tokenize(input: &String) -> Result<Token, ()> {
let mut t: Option<Token> = None;
let mut g: Vec<Token> = Vec::with_capacity(8);
g.push(Token::Group(Vec::with_capacity(8)));
let mut t: Option<Token> = None; // The current token we're reading
let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels"
g.push(Token::Group(VecDeque::with_capacity(8)));
for c in input.chars() {
let v_now: &mut Vec<Token> = match g.last_mut().unwrap() {
// The grouping level we're on now
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::Group(ref mut x) => x,
_ => panic!()
};
match c {
'!' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
g_now.push_back(Token::Factorial);
},
// Minus sign can be both a Negative and an Operator.
// Needs special treatment.
'-' => {
if t.is_some() { v_now.push(t.unwrap()); t = None; }
match v_now.last() {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
match g_now.back() {
// If previous token was any of the following,
// this is the "minus" operator
Some(Token::Number(_)) |
Some(Token::Group(_)) |
Some(Token::Word(_)) => {
v_now.push(Token::Operator(String::from(c)));
g_now.push_back(Token::Operator(String::from(c)));
},
// Otherwise, this is a negative sign.
_ => { t = Some(Token::Negative); }
_ => { g_now.push_back(Token::Negative); }
};
},
@ -61,7 +81,7 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
// If we're not building a number, finalize
// previous token and start one.
_ => {
if t.is_some() { v_now.push(t.unwrap()); }
if t.is_some() { g_now.push_back(t.unwrap()); }
t = Some(Token::Number(String::from(c)));
}
};
@ -80,7 +100,7 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
// If we're not building a number, finalize
// previous token and start one.
_ => {
if t.is_some() { v_now.push(t.unwrap()); }
if t.is_some() { g_now.push_back(t.unwrap()); }
t = Some(Token::Word(String::from(c)));
}
};
@ -91,31 +111,31 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
// Always one character
'+' | '*' | '/' | '^' => {
// Finalize previous token
if t.is_some() { v_now.push(t.unwrap()); t = None; }
v_now.push(Token::Operator(String::from(c)));
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
g_now.push_back(Token::Operator(String::from(c)));
}
// Groups
// Always one character
'(' => {
if t.is_some() { v_now.push(t.unwrap()); t = None; }
g.push(Token::Group(Vec::with_capacity(8)));
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
g.push(Token::Group(VecDeque::with_capacity(8)));
},
')' => {
if t.is_some() { v_now.push(t.unwrap()); t = None; }
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
let new_group: Token = g.pop().unwrap();
let v_now: &mut Vec<Token> = match g.last_mut().unwrap() {
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::Group(ref mut x) => x,
_ => panic!()
};
v_now.push(new_group);
g_now.push_back(new_group);
},
// Space. Basic seperator.
' ' => {
if t.is_some() { v_now.push(t.unwrap()); t = None; }
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
}
// Invalid token
@ -124,11 +144,11 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
}
let v_now: &mut Vec<Token> = match g.last_mut().unwrap() {
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::Group(ref mut x) => x,
_ => panic!()
};
if t.is_some() { v_now.push(t.unwrap()); }
if t.is_some() { g_now.push_back(t.unwrap()); }
return Ok(Token::Group(v_now.to_vec()));
return Ok(g.pop().unwrap());
}