daisy/src/parser/tokenize.rs
2023-03-25 10:32:51 -07:00

142 lines
3.4 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std::collections::VecDeque;
use crate::parser::Token;
use crate::parser::LineLocation;
use crate::parser::Operator;
/// Updates the length of a Token's LineLocation.
/// Run whenever a token is finished.
#[inline(always)]
fn update_line_location(mut t: Token, stop_i: usize) -> Token {
match t {
Token::PreGroupStart(ref mut l) |
Token::PreGroupEnd(ref mut l) |
Token::PreOperator(ref mut l, _) |
Token::PreNumber(ref mut l, _) |
Token::PreWord(ref mut l, _)
=> {
*l = LineLocation{
pos: l.pos,
len: stop_i - l.pos,
};
},
_ => panic!()
};
return t;
}
/// Turns a string into Tokens. First stage of parsing.
pub fn p_tokenize(input: &String) -> VecDeque<Token> {
let mut t: Option<Token> = None; // The current token we're reading
let mut g: VecDeque<Token> = VecDeque::with_capacity(32);
for (i, c) in input.chars().enumerate() {
match c {
// The minus sign can be both a Negative and an Operator.
// Needs special treatment.
'-' => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
match g.back() {
// If previous token was any of the following,
// this is the "minus" operator
Some(Token::Number(_, _)) |
Some(Token::PreGroup(_, _)) |
Some(Token::PreWord(_, _)) => {
t = Some(Token::PreOperator(
LineLocation{pos: i, len: 1},
Operator::Subtract
));
},
// Otherwise, this is a negative sign.
_ => {
t = Some(Token::PreOperator(
LineLocation{pos: i, len: 1},
Operator::Negative
));
}
};
},
// Number.
// Commas act just like dots.
',' | '.' | '0'..='9' => {
match &mut t {
// If we're already building a number,
// append.
Some(Token::PreNumber(_, val)) => {
val.push(if c == ',' {'.'} else {c});
},
// If we're not building a number, finalize
// previous token and start one.
_ => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreNumber(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
},
// Operator
// Always one character
'*'|'/'|'+'|
'^'|'!'|'%'
=> {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreOperator(
LineLocation{pos: i, len: 0},
match c {
'^' => Operator::Power,
'%' => Operator::Modulo,
'*'|'×' => Operator::Multiply,
'/'|'÷' => Operator::Divide,
'+' => Operator::Add,
'!' => Operator::Factorial,
_ => panic!()
}
));
}
// Group
'(' => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreGroupStart(LineLocation{pos: i, len: 0}));
},
')' => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreGroupEnd(LineLocation{pos: i, len: 0}));
},
// Space. Basic seperator.
' ' => {
if t.is_some() {
g.push_back(update_line_location(t.unwrap(), i));
t = None;
}
}
// Word
//'A'..='Z' |
//'a'..='z'
_ => {
match &mut t {
Some(Token::PreWord(_, val)) => {
val.push(c);
},
_ => {
if t.is_some() { g.push_back(update_line_location(t.unwrap(), i)); }
t = Some(Token::PreWord(LineLocation{pos: i, len: 0}, String::from(c)));
}
};
}
};
}
if t.is_some() { g.push_back(update_line_location(t.unwrap(), input.len())); }
return g;
}