Updated parser

pull/2/head
Mark 2023-03-19 17:46:30 -07:00
parent 883401cf66
commit 79be72903f
Signed by: Mark
GPG Key ID: AD62BB059C2AAEE4
2 changed files with 174 additions and 25 deletions

View File

@ -2,6 +2,7 @@ use std::io;
use std::io::Write; use std::io::Write;
//use std::io::Read; //use std::io::Read;
use std::sync::Arc; use std::sync::Arc;
use std::collections::VecDeque;
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use termcolor::{ use termcolor::{
@ -59,6 +60,75 @@ fn prompt(
Ok(()) Ok(())
} }
fn treefold(
mut exp: tokenize::Token, // Must be a group
check: fn(&tokenize::Token) -> bool,
op_type: u8,
new_token: fn(VecDeque<tokenize::Token>) -> tokenize::Token,
) -> Result<tokenize::Token, ()> {
// Groups to process
let mut t_vec: VecDeque<&mut tokenize::Token> = VecDeque::with_capacity(32);
t_vec.push_back(&mut exp);
let mut out: Option<tokenize::Token> = None;
while t_vec.len() > 0 {
// The group we're currently working with
let g: &mut tokenize::Token = t_vec.pop_front().unwrap();
let g_inner: &mut VecDeque<tokenize::Token> = match g {
tokenize::Token::Group(ref mut x) => x,
_ => panic!()
};
let mut new: VecDeque<tokenize::Token> = VecDeque::with_capacity(8);
// Build new group array
while g_inner.len() > 0 {
let t: tokenize::Token = match g_inner.pop_front() {
Some(o) => o,
None => break
};
if check(&t) {
match op_type {
0 => {},
1 => {},
2 => {
let last: tokenize::Token = new.pop_back().unwrap();
let next: tokenize::Token = g_inner.pop_front().unwrap().clone();
let mut new_token_args: VecDeque<tokenize::Token> = VecDeque::with_capacity(2);
new_token_args.push_back(last);
new_token_args.push_back(next);
new.push_back(new_token(new_token_args));
},
_ => panic!()
};
} else {
new.push_back(t.clone());
}
}
*g_inner = new;
}
return Ok(exp);
}
fn is_mult(t: &tokenize::Token) -> bool {
match t {
tokenize::Token::Operator(s) => {s == "*"},
_ => false
}
}
fn new_mult(v: VecDeque<tokenize::Token>) -> tokenize::Token {
tokenize::Token::Mult(v)
}
fn main() -> Result<(), std::io::Error> { fn main() -> Result<(), std::io::Error> {
let mut stdout = StandardStream::stdout(ColorChoice::Always); let mut stdout = StandardStream::stdout(ColorChoice::Always);
@ -78,7 +148,7 @@ fn main() -> Result<(), std::io::Error> {
// Tokenize input. // Tokenize input.
// Fail if we encounter invalid characters. // Fail if we encounter invalid characters.
let tokens = match tokenize::tokenize(&input) { let exp = match tokenize::tokenize(&input) {
Ok(v) => v, Ok(v) => v,
Err(_) => { Err(_) => {
continue; continue;
@ -91,8 +161,67 @@ fn main() -> Result<(), std::io::Error> {
stdout.reset()?; stdout.reset()?;
write!(stdout, "Got {input}\n\n\n")?; write!(stdout, "Got {input}\n\n\n")?;
writeln!(stdout, "Tokenized: {tokens:#?}")?; //writeln!(stdout, "Tokenized: {exp:#?}")?;
let q = treefold(
exp,
is_mult,
2,
new_mult
);
writeln!(stdout, "{q:#?}")?;
/*
// Groups to process
let mut t_vec: VecDeque<tokenize::Token> = VecDeque::with_capacity(32);
t_vec.push_back(exp);
while t_vec.len() > 0 {
let g: tokenize::Token = t_vec.pop_front().unwrap();
let mut g_inner: Vec<tokenize::Token> = match g {
tokenize::Token::Group(x) => x,
_ => panic!()
};
let mut new: Vec<tokenize::Token> = Vec::with_capacity(8);
// Parse binary operators
for o in ["*", "/", "+", "-"] {
let mut i = g_inner.iter();
loop {
let t = match i.next() {
Some(o) => o,
None => break
};
match t {
tokenize::Token::Operator(s) => {
if s == o {
let last = new.pop().unwrap();
let next = i.next().unwrap();
new.push(tokenize::Token::Op(
String::from(s),
Box::new(last.clone()),
Box::new(next.clone())
))
} else {
new.push(t.clone());
}
},
_ => {
new.push(t.clone());
}
}
}
g_inner = new.clone();
new = Vec::with_capacity(8);
}
writeln!(stdout, "{:?}", g_inner)?;
}
*/
} }
writeln!(stdout, "Exiting.")?; writeln!(stdout, "Exiting.")?;

View File

@ -1,13 +1,26 @@
use std::collections::VecDeque;
#[derive(Debug)] #[derive(Debug)]
#[derive(Clone)] #[derive(Clone)]
pub enum Token { pub enum Token {
// Only used after tokenizing
Negative, Negative,
Factorial,
Group(VecDeque<Token>), // Will be expanded during tree folding
Operator(String), // Will become Ops during tree folding
// Used in both.
Number(String), Number(String),
Operator(String),
Word(String), Word(String),
Group(Vec<Token>),
// Only used in tree
Op(String, Box<Token>, Box<Token>),
Mult(VecDeque<Token>)
} }
/// Turn a string into a set of tokens. /// Turn a string into a set of tokens.
/// Does not check syntax. Fails if `input` contains an invalid character. /// Does not check syntax. Fails if `input` contains an invalid character.
// //
@ -18,33 +31,40 @@ pub enum Token {
// * `Ok(Vec<token>)` if we were successful. // * `Ok(Vec<token>)` if we were successful.
// * `Err(())` if we couldn't tokenize this string. // * `Err(())` if we couldn't tokenize this string.
pub fn tokenize(input: &String) -> Result<Token, ()> { pub fn tokenize(input: &String) -> Result<Token, ()> {
let mut t: Option<Token> = None; let mut t: Option<Token> = None; // The current token we're reading
let mut g: Vec<Token> = Vec::with_capacity(8); let mut g: Vec<Token> = Vec::with_capacity(8); // Vector of "grouping levels"
g.push(Token::Group(Vec::with_capacity(8))); g.push(Token::Group(VecDeque::with_capacity(8)));
for c in input.chars() { for c in input.chars() {
let v_now: &mut Vec<Token> = match g.last_mut().unwrap() {
// The grouping level we're on now
let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::Group(ref mut x) => x, Token::Group(ref mut x) => x,
_ => panic!() _ => panic!()
}; };
match c { match c {
'!' => {
if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
g_now.push_back(Token::Factorial);
},
// Minus sign can be both a Negative and an Operator. // Minus sign can be both a Negative and an Operator.
// Needs special treatment. // Needs special treatment.
'-' => { '-' => {
if t.is_some() { v_now.push(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
match v_now.last() { match g_now.back() {
// If previous token was any of the following, // If previous token was any of the following,
// this is the "minus" operator // this is the "minus" operator
Some(Token::Number(_)) | Some(Token::Number(_)) |
Some(Token::Group(_)) | Some(Token::Group(_)) |
Some(Token::Word(_)) => { Some(Token::Word(_)) => {
v_now.push(Token::Operator(String::from(c))); g_now.push_back(Token::Operator(String::from(c)));
}, },
// Otherwise, this is a negative sign. // Otherwise, this is a negative sign.
_ => { t = Some(Token::Negative); } _ => { g_now.push_back(Token::Negative); }
}; };
}, },
@ -61,7 +81,7 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
// If we're not building a number, finalize // If we're not building a number, finalize
// previous token and start one. // previous token and start one.
_ => { _ => {
if t.is_some() { v_now.push(t.unwrap()); } if t.is_some() { g_now.push_back(t.unwrap()); }
t = Some(Token::Number(String::from(c))); t = Some(Token::Number(String::from(c)));
} }
}; };
@ -80,7 +100,7 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
// If we're not building a number, finalize // If we're not building a number, finalize
// previous token and start one. // previous token and start one.
_ => { _ => {
if t.is_some() { v_now.push(t.unwrap()); } if t.is_some() { g_now.push_back(t.unwrap()); }
t = Some(Token::Word(String::from(c))); t = Some(Token::Word(String::from(c)));
} }
}; };
@ -91,31 +111,31 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
// Always one character // Always one character
'+' | '*' | '/' | '^' => { '+' | '*' | '/' | '^' => {
// Finalize previous token // Finalize previous token
if t.is_some() { v_now.push(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
v_now.push(Token::Operator(String::from(c))); g_now.push_back(Token::Operator(String::from(c)));
} }
// Groups // Groups
// Always one character // Always one character
'(' => { '(' => {
if t.is_some() { v_now.push(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
g.push(Token::Group(Vec::with_capacity(8))); g.push(Token::Group(VecDeque::with_capacity(8)));
}, },
')' => { ')' => {
if t.is_some() { v_now.push(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
let new_group: Token = g.pop().unwrap(); let new_group: Token = g.pop().unwrap();
let v_now: &mut Vec<Token> = match g.last_mut().unwrap() { let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::Group(ref mut x) => x, Token::Group(ref mut x) => x,
_ => panic!() _ => panic!()
}; };
v_now.push(new_group); g_now.push_back(new_group);
}, },
// Space. Basic seperator. // Space. Basic seperator.
' ' => { ' ' => {
if t.is_some() { v_now.push(t.unwrap()); t = None; } if t.is_some() { g_now.push_back(t.unwrap()); t = None; }
} }
// Invalid token // Invalid token
@ -124,11 +144,11 @@ pub fn tokenize(input: &String) -> Result<Token, ()> {
} }
let v_now: &mut Vec<Token> = match g.last_mut().unwrap() { let g_now: &mut VecDeque<Token> = match g.last_mut().unwrap() {
Token::Group(ref mut x) => x, Token::Group(ref mut x) => x,
_ => panic!() _ => panic!()
}; };
if t.is_some() { v_now.push(t.unwrap()); } if t.is_some() { g_now.push_back(t.unwrap()); }
return Ok(Token::Group(v_now.to_vec())); return Ok(g.pop().unwrap());
} }