2022-02-13 19:21:32 +05:30

461 lines
17 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
This is a slightly modified version of the grammar defined in Satriani,
the official reference implementation of Rockstar. Modifications made:
- Add very short comment for each grammar rule
- Adapt grammar to parse the program line-by-line
- Organize the rules into broad categories
- Adjust action returns to be easier to use in TypeScript
Apart from line-by-line parsing, all changes are cosmetic and the
actual grammar should be identical to Satriani. The rule names should not
be changed too much, so that matching any upstream updates is easy.
Satriani grammar: https://github.com/RockstarLang/rockstar/blob/main/satriani/rockstar.peg
After updating, run `node generate-parser.js` in this directory.
*/
{
/* initialiser code - this is JS that runs before the parser is generated */
const keywords = new Set([
// common variable prefixes
'a', 'an', 'the', 'my', 'your', 'our',
// pronouns
'it', 'he', 'she', 'him', 'her', 'they', 'them', 'ze', 'hir', 'zie', 'zir', 'xe', 'xem', 've', 'ver',
// literal values
'mysterious',
'null', 'nothing', 'nowhere', 'nobody', 'gone',
'true', 'right', 'yes', 'ok',
'false', 'wrong', 'no', 'lies',
'maybe', 'definitely', // reserved for future use
'empty', 'silent', 'silence',
// assignment
'let', 'be', 'put', 'into', 'in', // expression
'is', 'are', 'was', 'were', 'say', 'says', 'said', // poetic
// operations
'at', 'rock', 'with', 'roll', 'into', 'push', 'pop', 'like', // arrays
'cut', 'split', 'shatter', 'join', 'unite', 'cast', 'burn', // strings
'build', 'up', 'knock', 'down', // increment/decrement
'plus', 'with', 'minus', 'without', 'times', 'of', 'over', 'between', // arithmetic
'and', // list arithmetic
'turn', 'up', 'down', 'round', 'around', // rounding
'and', 'or', 'nor', 'not', // logical
// comparison
'is', "isn't", 'isnt', "ain't", 'aint',
'arent', "aren't", 'wasnt', "wasn't", 'werent', "weren't",
'not',
'than',
'higher', 'greater', 'bigger', 'stronger',
'lower', 'less', 'smaller', 'weaker',
'as',
'high', 'great', 'big', 'strong',
'low', 'little', 'small', 'weak',
// input/output
'listen', 'to',
'say', 'shout', 'whisper', 'scream',
// control flow
'if', 'else',
'while', 'until',
'break', 'continue',
'break', 'it', 'down',
'take', 'it', 'to', 'the', 'top',
'take',
// functions
'takes', 'wants',
'give', 'return', 'send', 'back',
'taking',
])
function isKeyword(string) {
return keywords.has(string.toLowerCase());
}
}
/* Rule for the entire program */
program = p:line * { return { list: p } }
/* Rule for a single block of the program */
line = _* s:statement _* (EOL / EOF) { return s }
/ _* EOL { return { type: 'blank' } }
/ _+ EOF {return { type: 'blank' } }
/* Utility types for whitespace and comments */
whitespace = [ \t]
comment = '(' [^)]* ')'
_ = (whitespace / comment)+
noise = (_ / [;,?!&.])
EOL "end of line" = noise* '\r'? '\n'
EOF = !.
ignore_rest_of_line = (_[^\n]*)?
/* Rule for a single statement */
statement = _* s:(break / continue / function_decl / function_call
/ function_return / loop / if / else / operation / expression) { return s }
/*********************************
FLOW CONTROL
*********************************/
/* Rule for an if-statement */
// To run inline statements in a separate step, we need location information
inline_statement = s:statement { return {s: s, start: location().start.offset }}
if = 'if'i _ e:expression s:inline_statement?
{ return {
type: 'if',
condition: e,
statement: s && s.s,
split: s && s.start,
} }
/* Rule for an else-statement */
else = _* 'else'i _ a:statement { return { type: 'else', statement: a } }
/ _* 'else'i _* {return { type: 'else', statement: null } }
/* Rule for starting a while-loop */
loop_keyword = ('while'i / 'until'i)
loop = loop_keyword _ e:expression { return { type: 'loop', condition: e } }
/* Rule for the loop break statement */
break = 'break'i ignore_rest_of_line { return { type: 'break' } }
/* Rule for the loop continue statement */
continue = ('continue'i ignore_rest_of_line / 'take it to the top'i)
{ return { type: 'continue' } }
/* Rule for function declaration statement */
takes = ('takes'i / 'wants'i)
function_decl = name:variable _ takes _ args:variable_list
{ return { type: 'function_decl', name: name, args: args } }
/* Rule for function return statement */
return = 'return'i / 'give back'i / 'send'i / 'give'i
function_return = return _ e:expression (_ 'back'i)?
{ return { type: 'return', expression: e } }
/*********************************
VARIABLES AND ASSIGNMENT
*********************************/
/* Keywords used to refer to last assigned variable */
pronoun "pronoun" = pronoun:(
'they'i / 'them'i
/ 'she'i / 'him'i / 'her'i / 'hir'i / 'zie'i / 'zir'i / 'xem'i / 'ver'i
/ 'ze'i / 've'i / 'xe'i / 'it'i / 'he'i
) &(is / _ / EOL / EOF)
{ return { pronoun: pronoun.toLowerCase() } }
/* Prefix for defining common variables */
common_prefix "common variable prefix" = ( 'an'i / 'a'i / 'the'i / 'my'i / 'your'i / 'our'i)
/* Set of recognized uppercase letters */
uppercase_letter "uppercase letter" = [A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶĸĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽ]
/* Set of recognized lowercase letters */
lowercase_letter "lowercase letter" = [a-zàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķĸĺļľŀłńņňŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷÿźżžʼnß]
/* Set of recognized letters */
letter "letter" = uppercase_letter / lowercase_letter
/* Rule for variable identifiers */
variable "variable" = common_variable / proper_variable / pronoun / simple_variable
/* Name of a common variable */
common_variable = prefix:common_prefix _ name:$(letter+)
{ return (prefix + '_' + name).toLowerCase() };
/* Rule for name of a simple variable */
simple_variable = name:$(letter letter*) !{ return isKeyword(name) } { return name.toLowerCase() }
/* Rule for a single word in proper variable */
proper_noun = noun:$(uppercase_letter letter*) !{ return isKeyword(noun) } { return noun }
/* Rule for name of a proper variable */
proper_variable = head:$(proper_noun (' ' $proper_noun)*)
{ return head.replace(/ /g, '_').toLowerCase() }
/* Rule for a list of variables */
variable_list_separator "separator" = expression_list_separator / _ 'and'i _
variable_list "variable list" = head:variable variable_list_separator tail:variable_list
{ return [head].concat(tail) }
/ arg:variable { return [arg] }
/* Rule for part of array access clause */
indexer = _ 'at'i _ i:expression { return i };
/* Rule for possible target of a value assignment */
assignable "assignable variable"
= v:variable i:indexer?
{ return { variable: v, index: i }; }
/* Operators allowed in compound assignment */
compoundable_operator "operator" = add / subtract / multiply / divide
/* Rule for assignment statements */
assignment "assignment statement" = target:assignable is _* e:(literal / poetic_number)
{ return { type: "assign", target: target, expression: e} }
/ target:assignable _ ('says 'i / 'say 'i / 'said 'i) e:poetic_string
{ return { type: "assign", target: target, expression: e} }
/ 'put'i _ e:expression into target:assignable
{ return { type: "assign", target: target, expression: e} }
/ 'let'i _ target:assignable _ 'be'i o:compoundable_operator e:expression {
return {
type: "assign",
target: target,
expression: { binary: { op: o, lhs: { lookup: target }, rhs: e } }
}
}
/ 'let'i _ t:assignable _ 'be'i _ e:expression
{ return { type: "assign", target: t, expression: e} }
/ push _ e:expression into v:variable
{ return { type: "enlist", variable: v, expression: e } }
/ push _ v:variable _ 'like'i _ e:(literal / poetic_number)
{ return { type: "enlist", variable: v, expression: e } }
/ push _ v:variable (_ 'with'i)? _ e:expression
{ return { type: "enlist", variable: v, expression: e } }
/ push _ v:variable
{ return { type: "enlist", variable: v } }
/ e:delist into target:assignable
{ return { type: "assign", target: target, expression: e } }
/*********************************
EXPRESSION TREE
*********************************/
/* Rule for a list of expressions */
expression_list_separator "separator" = (_? ', and'i _ / _?('&' / ',' / "'n'"i)_?)
expression_list "expression list" = head:simple_expression expression_list_separator tail:expression_list
{ return [head].concat(tail) }
/ arg:simple_expression { return [arg] }
/* Root rule for expression tree */
expression "expression" = boolean
/* Rule for a boolean operation clause */
boolean = nor
/* Rule for NOR operation */
nor = lhs:or _ 'nor'i _ rhs:nor {
return { type: "binary", op: 'nor', lhs: lhs, rhs: rhs } }
/ or
/* Rule for OR operation */
or = lhs:and _ 'or'i _ rhs:or {
return { type: "binary", op: 'or', lhs: lhs, rhs: rhs } }
/ and
/* Rule for AND operation */
and = lhs:equality_check _ 'and'i _ rhs:and {
return { type: "binary", op: 'and', lhs: lhs, rhs: rhs } }
/ equality_check
/* Keywords for equality/inequality check */
is = ("'s"i / "'re"i / _ ('=' / 'is'i / 'was'i / 'are'i / 'were'i)) _
isnt = _ (
'isnt'i / "isn't"i /
'aint'i / "ain't"i /
'arent'i / "aren't"i /
'wasnt'i / "wasn't"i /
'werent'i / "weren't"i
) _
/* Rule for equality/inequality check */
eq = isnt { return 'ne' } / is { return 'eq' }
equality_check = lhs:not c:eq rhs:equality_check
{ return { type: "comparison", comparator: c, lhs: lhs, rhs: rhs } }
/ not
/* Rule for NOT operation */
not = 'not'i _ e:not { return { type: "not", expression: e } }
/ comparison
/* Keywords for comparison operators */
greater = ('higher'i / 'greater'i / 'bigger'i / 'stronger'i)
smaller = ('lower'i / 'less'i / 'smaller'i / 'weaker'i)
great = ('high'i / 'great'i / 'big'i / 'strong'i)
small = ('low'i / 'little'i / 'small'i / 'weak'i)
comparator = is greater _ 'than'i _ { return 'gt' }
/ is smaller _ 'than'i _ { return 'lt' }
/ is 'as'i _ great _ 'as'i _ { return 'ge' }
/ is 'as'i _ small _ 'as'i _ { return 'le' }
/* Rule for comparison clause */
comparison = lhs:arithmetic c:comparator rhs:comparison
{ return { type: "comparison", comparator: c, lhs: lhs, rhs: rhs } }
/ arithmetic
/* Rule for plus/minus arithmetic clause */
arithmetic = first:product rest:((add / subtract) product)+
{ return rest.reduce(function(memo, curr) {
return { type: "binary", op: curr[0], lhs: memo, rhs: curr[1] }
}, first); }
/ product
/* Rule for multiply/divide arithmetic clause */
product = first:simple_expression rest:((multiply / divide) expression_list)+
{ return rest.reduce(function(memo, curr) {
return { binary: { op: curr[0], lhs: memo, rhs: curr[1] } };
}, first); }
/ expression_list
/ simple_expression
/* Rule for the leaf of an expression tree */
simple_expression = function_call / constant / lookup / literal / pronoun
/* Rule for function call expression */
function_call "function call" = name:variable _ 'taking'i _ args:expression_list
{ return { type: "call", name: name, args: Array.isArray(args) ? args : [args] } }
/* Rule for a constant literal */
literal "literal" = constant / number / string
/* Rule for keyword-based constant literals */
constant "constant" = null / true / false / empty_string / mysterious
null = ('null'i / 'nothing'i / 'nowhere'i / 'nobody'i / 'gone'i) { return { constant: null } }
true = ('true'i / 'ok'i / 'right'i / 'yes'i) !letter { return { constant: true } }
false = ('false'i / 'lies'i / 'wrong'i / 'no'i) !letter { return { constant: false } }
empty_string = ('empty'i / 'silent'i / 'silence'i) { return { constant: "" } }
mysterious = 'mysterious'i { return '__MYSTERIOUS__' }
/* Rule for a numeric literal */
number "number" = n:$('-'?[0-9]+ ('.' [0-9]+)?) '.'?
{ return {number: parseFloat(n)} }
/ n:$('.' [0-9]+)
{ return {number: parseFloat(n) } }
/* Rule for a string literal */
string "string" = '"' s:$[^"]* '"' { return {string: s }; }
/* 'TODO remove */
/*********************************
OPERATION STATEMENTS
*********************************/
/* Rule for single-operation statements */
operation "operation statement" = readline / output / crement / mutation / assignment / rounding
/* Rule for STDIN operation statement */
readline "stdin statement" = 'listen to'i _ target:assignable
{ return { type: "stdin", target: target } }
/ 'listen'i { return { type: "stdin" } }
/* Rule for STDOUT statement */
output "stdout statement" = ('say'i/'shout'i/'whisper'i/'scream'i) _ e:expression
{ return { type: "stdout", output: e } }
/* Rule for increment/decrement statements */
crement "increment/decrement statement" = increment / decrement
/* Rule for increment statement */
increment = 'build'i _ v:variable _ t:('up'i noise*)+
{ return { type: "increment", variable: v, multiple: t.length } }
/* Rule for decrement statement */
decrement = 'knock'i _ v:variable _ t:('down'i noise*)+
{ return { type: "decrement", variable: v, multiple: t.length } }
split = ('cut'i / 'split'i / 'shatter'i) { return 'split' }
cast = ('cast'i / 'burn'i) { return 'cast' }
join = ('join'i / 'unite'i) { return 'join' }
/* Rule for mutation operation statements */
mutator "mutation keyword" = split / cast / join
modifier = _ ('with'i / 'using'i) _ m:expression { return m }
mutation "mutation statement" = op:mutator _ s:expression into t:assignable m:modifier?
{ return { assign: { target: t, expression: { mutation: { type: op, source: s, modifier: m } } } } ; }
/ op:mutator _ s:assignable m:modifier?
{ return { assign: { target: s, expression: { mutation: { type: op, source: { lookup: s }, modifier: m } } } } ; }
/* Rule for rounding operation statements */
rounding "rounding statement" = floor / ceil / math_round
floor = 'turn'i _ v:variable _ 'down'i
{ return { type: "rounding", variable: v, direction: 'down' } }
/ 'turn'i _ 'down'i _ v:variable
{ return { type: "rounding", variable: v, direction: 'down' } }
ceil = 'turn'i _ v:variable _ 'up'i
{ return { type: "rounding", variable: v, direction: 'up' } }
/ 'turn'i _ 'up'i _ v:variable
{ return { type: "rounding", variable: v, direction: 'up' } }
math_round = 'turn'i _ v:variable _ ('round'i/'around'i)
{ return { type: "rounding", variable: v, direction: 'nearest' } }
/ 'turn'i _ ('round'i/'around'i) _ v:variable
{ return { type: "rounding", variable: v, direction: 'nearest' } }
/*********************************
KITCHEN SINK
*********************************/
/* Keywords for arithmetic operators */
// Note that operator aliases explicitly include a trailing space,
// otherwise 'with' is a prefix code for 'without' and confuses the parser.
add = _* ('+' / 'plus 'i / 'with 'i) _* { return '+' }
subtract = _* ('-' / 'minus 'i / 'without 'i) _* { return '-' }
multiply = _* ('*' / 'times 'i / 'of 'i) _* { return '*' }
divide = _* ('/' / 'over 'i / 'between 'i) _* { return '/' }
push = ('rock'i / 'push'i )
pop = ('roll'i / 'pop'i)
into = _ ('into'i / 'in'i) _
/* Rule representing array dequeue clause */
delist "array roll" = pop _ v:variable
{ return { type: "delist", variable: v } }
/* Rule for variable in expression tree leaf */
lookup "variable or array element" = d:delist { return d; }
/ v:variable _ 'at'i _ i:expression
{ return { type: "lookup", variable: v, index: i } }
/ v:variable
{ return { type: "lookup", variable: v } }
/* Rule for poetic string literal */
poetic_string "poetic string" = s:$[^\r\n]* { return { string: s } }
/* Rule for poetic number literal */
poetic_number "poetic number" = poetic_digit_separator* n:poetic_digits poetic_digit_separator* d:poetic_decimal? poetic_digit_separator*
{ return { number: parseFloat(d?n+'.'+d:n)}}
/* Rule for poetic decimal literal */
poetic_decimal = '.' poetic_decimal_digit_separator* d:poetic_decimal_digits poetic_decimal_digit_separator* {return d}
/ '.' poetic_decimal_digit_separator*
/* Separator used in poetic literals */
poetic_digit_separator = ( _ / [0-9\',;:?!+_/] )
poetic_digits = poetic_digit_separator* head:poetic_digit poetic_digit_separator+ tail:poetic_digits
{ return head + tail }
/ d: poetic_digit
{ return d }
poetic_decimal_digit_separator = ( _ / poetic_digit_separator / '.')
poetic_decimal_digits = poetic_decimal_digit_separator* head:poetic_digit poetic_decimal_digit_separator+ tail:poetic_decimal_digits
{ return head + tail }
/ d: poetic_digit
{ return d }
poetic_digit = t:[A-Za-z\-']+
{ return (t.filter(c => /[A-Za-z\-]/.test(c)).length%10).toString() }