461 lines
17 KiB
Plaintext
461 lines
17 KiB
Plaintext
/*
|
||
This is a slightly modified version of the grammar defined in Satriani,
|
||
the official reference implementation of Rockstar. Modifications made:
|
||
|
||
- Add very short comment for each grammar rule
|
||
- Adapt grammar to parse the program line-by-line
|
||
- Organize the rules into broad categories
|
||
- Adjust action returns to be easier to use in TypeScript
|
||
|
||
Apart from line-by-line parsing, all changes are cosmetic and the
|
||
actual grammar should be identical to Satriani. The rule names should not
|
||
be changed too much, so that matching any upstream updates is easy.
|
||
|
||
Satriani grammar: https://github.com/RockstarLang/rockstar/blob/main/satriani/rockstar.peg
|
||
After updating, run `node generate-parser.js` in this directory.
|
||
*/
|
||
|
||
{
|
||
/* initialiser code - this is JS that runs before the parser is generated */
|
||
|
||
const keywords = new Set([
|
||
// common variable prefixes
|
||
'a', 'an', 'the', 'my', 'your', 'our',
|
||
|
||
// pronouns
|
||
'it', 'he', 'she', 'him', 'her', 'they', 'them', 'ze', 'hir', 'zie', 'zir', 'xe', 'xem', 've', 'ver',
|
||
|
||
// literal values
|
||
'mysterious',
|
||
'null', 'nothing', 'nowhere', 'nobody', 'gone',
|
||
'true', 'right', 'yes', 'ok',
|
||
'false', 'wrong', 'no', 'lies',
|
||
'maybe', 'definitely', // reserved for future use
|
||
'empty', 'silent', 'silence',
|
||
|
||
// assignment
|
||
'let', 'be', 'put', 'into', 'in', // expression
|
||
'is', 'are', 'was', 'were', 'say', 'says', 'said', // poetic
|
||
|
||
// operations
|
||
'at', 'rock', 'with', 'roll', 'into', 'push', 'pop', 'like', // arrays
|
||
'cut', 'split', 'shatter', 'join', 'unite', 'cast', 'burn', // strings
|
||
'build', 'up', 'knock', 'down', // increment/decrement
|
||
'plus', 'with', 'minus', 'without', 'times', 'of', 'over', 'between', // arithmetic
|
||
'and', // list arithmetic
|
||
'turn', 'up', 'down', 'round', 'around', // rounding
|
||
'and', 'or', 'nor', 'not', // logical
|
||
|
||
// comparison
|
||
'is', "isn't", 'isnt', "ain't", 'aint',
|
||
'arent', "aren't", 'wasnt', "wasn't", 'werent', "weren't",
|
||
'not',
|
||
'than',
|
||
'higher', 'greater', 'bigger', 'stronger',
|
||
'lower', 'less', 'smaller', 'weaker',
|
||
'as',
|
||
'high', 'great', 'big', 'strong',
|
||
'low', 'little', 'small', 'weak',
|
||
|
||
// input/output
|
||
'listen', 'to',
|
||
'say', 'shout', 'whisper', 'scream',
|
||
|
||
// control flow
|
||
'if', 'else',
|
||
'while', 'until',
|
||
'break', 'continue',
|
||
'break', 'it', 'down',
|
||
'take', 'it', 'to', 'the', 'top',
|
||
'take',
|
||
|
||
// functions
|
||
'takes', 'wants',
|
||
'give', 'return', 'send', 'back',
|
||
'taking',
|
||
])
|
||
|
||
function isKeyword(string) {
|
||
return keywords.has(string.toLowerCase());
|
||
}
|
||
}
|
||
|
||
/* Rule for the entire program */
|
||
program = p:line * { return { list: p } }
|
||
|
||
/* Rule for a single block of the program */
|
||
line = _* s:statement _* (EOL / EOF) { return s }
|
||
/ _* EOL { return { type: 'blank' } }
|
||
/ _+ EOF {return { type: 'blank' } }
|
||
|
||
/* Utility types for whitespace and comments */
|
||
whitespace = [ \t]
|
||
comment = '(' [^)]* ')'
|
||
_ = (whitespace / comment)+
|
||
|
||
noise = (_ / [;,?!&.])
|
||
EOL "end of line" = noise* '\r'? '\n'
|
||
EOF = !.
|
||
|
||
ignore_rest_of_line = (_[^\n]*)?
|
||
|
||
/* Rule for a single statement */
|
||
statement = _* s:(break / continue / function_decl / function_call
|
||
/ function_return / loop / if / else / operation / expression) { return s }
|
||
|
||
/*********************************
|
||
FLOW CONTROL
|
||
*********************************/
|
||
|
||
/* Rule for an if-statement */
|
||
// To run inline statements in a separate step, we need location information
|
||
inline_statement = s:statement { return {s: s, start: location().start.offset }}
|
||
if = 'if'i _ e:expression s:inline_statement?
|
||
{ return {
|
||
type: 'if',
|
||
condition: e,
|
||
statement: s && s.s,
|
||
split: s && s.start,
|
||
} }
|
||
|
||
/* Rule for an else-statement */
|
||
else = _* 'else'i _ a:statement { return { type: 'else', statement: a } }
|
||
/ _* 'else'i _* {return { type: 'else', statement: null } }
|
||
|
||
/* Rule for starting a while-loop */
|
||
loop_keyword = ('while'i / 'until'i)
|
||
loop = loop_keyword _ e:expression { return { type: 'loop', condition: e } }
|
||
|
||
/* Rule for the loop break statement */
|
||
break = 'break'i ignore_rest_of_line { return { type: 'break' } }
|
||
|
||
/* Rule for the loop continue statement */
|
||
continue = ('continue'i ignore_rest_of_line / 'take it to the top'i)
|
||
{ return { type: 'continue' } }
|
||
|
||
/* Rule for function declaration statement */
|
||
takes = ('takes'i / 'wants'i)
|
||
function_decl = name:variable _ takes _ args:variable_list
|
||
{ return { type: 'function_decl', name: name, args: args } }
|
||
|
||
/* Rule for function return statement */
|
||
return = 'return'i / 'give back'i / 'send'i / 'give'i
|
||
function_return = return _ e:expression (_ 'back'i)?
|
||
{ return { type: 'return', expression: e } }
|
||
|
||
/*********************************
|
||
VARIABLES AND ASSIGNMENT
|
||
*********************************/
|
||
|
||
/* Keywords used to refer to last assigned variable */
|
||
pronoun "pronoun" = pronoun:(
|
||
'they'i / 'them'i
|
||
/ 'she'i / 'him'i / 'her'i / 'hir'i / 'zie'i / 'zir'i / 'xem'i / 'ver'i
|
||
/ 'ze'i / 've'i / 'xe'i / 'it'i / 'he'i
|
||
) &(is / _ / EOL / EOF)
|
||
{ return { pronoun: pronoun.toLowerCase() } }
|
||
|
||
/* Prefix for defining common variables */
|
||
common_prefix "common variable prefix" = ( 'an'i / 'a'i / 'the'i / 'my'i / 'your'i / 'our'i)
|
||
|
||
/* Set of recognized uppercase letters */
|
||
uppercase_letter "uppercase letter" = [A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶĸĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽ]
|
||
|
||
/* Set of recognized lowercase letters */
|
||
lowercase_letter "lowercase letter" = [a-zàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķĸĺļľŀłńņňŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷÿźżžʼnß]
|
||
|
||
/* Set of recognized letters */
|
||
letter "letter" = uppercase_letter / lowercase_letter
|
||
|
||
/* Rule for variable identifiers */
|
||
variable "variable" = common_variable / proper_variable / pronoun / simple_variable
|
||
|
||
/* Name of a common variable */
|
||
common_variable = prefix:common_prefix _ name:$(letter+)
|
||
{ return (prefix + '_' + name).toLowerCase() };
|
||
|
||
/* Rule for name of a simple variable */
|
||
simple_variable = name:$(letter letter*) !{ return isKeyword(name) } { return name.toLowerCase() }
|
||
|
||
/* Rule for a single word in proper variable */
|
||
proper_noun = noun:$(uppercase_letter letter*) !{ return isKeyword(noun) } { return noun }
|
||
|
||
/* Rule for name of a proper variable */
|
||
proper_variable = head:$(proper_noun (' ' $proper_noun)*)
|
||
{ return head.replace(/ /g, '_').toLowerCase() }
|
||
|
||
/* Rule for a list of variables */
|
||
variable_list_separator "separator" = expression_list_separator / _ 'and'i _
|
||
variable_list "variable list" = head:variable variable_list_separator tail:variable_list
|
||
{ return [head].concat(tail) }
|
||
/ arg:variable { return [arg] }
|
||
|
||
/* Rule for part of array access clause */
|
||
indexer = _ 'at'i _ i:expression { return i };
|
||
|
||
/* Rule for possible target of a value assignment */
|
||
assignable "assignable variable"
|
||
= v:variable i:indexer?
|
||
{ return { variable: v, index: i }; }
|
||
|
||
/* Operators allowed in compound assignment */
|
||
compoundable_operator "operator" = add / subtract / multiply / divide
|
||
|
||
/* Rule for assignment statements */
|
||
assignment "assignment statement" = target:assignable is _* e:(literal / poetic_number)
|
||
{ return { type: "assign", target: target, expression: e} }
|
||
|
||
/ target:assignable _ ('says 'i / 'say 'i / 'said 'i) e:poetic_string
|
||
{ return { type: "assign", target: target, expression: e} }
|
||
|
||
/ 'put'i _ e:expression into target:assignable
|
||
{ return { type: "assign", target: target, expression: e} }
|
||
|
||
/ 'let'i _ target:assignable _ 'be'i o:compoundable_operator e:expression {
|
||
return {
|
||
type: "assign",
|
||
target: target,
|
||
expression: { binary: { op: o, lhs: { lookup: target }, rhs: e } }
|
||
}
|
||
}
|
||
|
||
/ 'let'i _ t:assignable _ 'be'i _ e:expression
|
||
{ return { type: "assign", target: t, expression: e} }
|
||
|
||
/ push _ e:expression into v:variable
|
||
{ return { type: "enlist", variable: v, expression: e } }
|
||
|
||
/ push _ v:variable _ 'like'i _ e:(literal / poetic_number)
|
||
{ return { type: "enlist", variable: v, expression: e } }
|
||
|
||
/ push _ v:variable (_ 'with'i)? _ e:expression
|
||
{ return { type: "enlist", variable: v, expression: e } }
|
||
|
||
/ push _ v:variable
|
||
{ return { type: "enlist", variable: v } }
|
||
|
||
/ e:delist into target:assignable
|
||
{ return { type: "assign", target: target, expression: e } }
|
||
|
||
/*********************************
|
||
EXPRESSION TREE
|
||
*********************************/
|
||
|
||
/* Rule for a list of expressions */
|
||
expression_list_separator "separator" = (_? ', and'i _ / _?('&' / ',' / "'n'"i)_?)
|
||
expression_list "expression list" = head:simple_expression expression_list_separator tail:expression_list
|
||
{ return [head].concat(tail) }
|
||
/ arg:simple_expression { return [arg] }
|
||
|
||
/* Root rule for expression tree */
|
||
expression "expression" = boolean
|
||
|
||
/* Rule for a boolean operation clause */
|
||
boolean = nor
|
||
|
||
/* Rule for NOR operation */
|
||
nor = lhs:or _ 'nor'i _ rhs:nor {
|
||
return { type: "binary", op: 'nor', lhs: lhs, rhs: rhs } }
|
||
/ or
|
||
|
||
/* Rule for OR operation */
|
||
or = lhs:and _ 'or'i _ rhs:or {
|
||
return { type: "binary", op: 'or', lhs: lhs, rhs: rhs } }
|
||
/ and
|
||
|
||
/* Rule for AND operation */
|
||
and = lhs:equality_check _ 'and'i _ rhs:and {
|
||
return { type: "binary", op: 'and', lhs: lhs, rhs: rhs } }
|
||
/ equality_check
|
||
|
||
/* Keywords for equality/inequality check */
|
||
is = ("'s"i / "'re"i / _ ('=' / 'is'i / 'was'i / 'are'i / 'were'i)) _
|
||
isnt = _ (
|
||
'isnt'i / "isn't"i /
|
||
'aint'i / "ain't"i /
|
||
'arent'i / "aren't"i /
|
||
'wasnt'i / "wasn't"i /
|
||
'werent'i / "weren't"i
|
||
) _
|
||
|
||
/* Rule for equality/inequality check */
|
||
eq = isnt { return 'ne' } / is { return 'eq' }
|
||
equality_check = lhs:not c:eq rhs:equality_check
|
||
{ return { type: "comparison", comparator: c, lhs: lhs, rhs: rhs } }
|
||
/ not
|
||
|
||
/* Rule for NOT operation */
|
||
not = 'not'i _ e:not { return { type: "not", expression: e } }
|
||
/ comparison
|
||
|
||
/* Keywords for comparison operators */
|
||
greater = ('higher'i / 'greater'i / 'bigger'i / 'stronger'i)
|
||
smaller = ('lower'i / 'less'i / 'smaller'i / 'weaker'i)
|
||
great = ('high'i / 'great'i / 'big'i / 'strong'i)
|
||
small = ('low'i / 'little'i / 'small'i / 'weak'i)
|
||
comparator = is greater _ 'than'i _ { return 'gt' }
|
||
/ is smaller _ 'than'i _ { return 'lt' }
|
||
/ is 'as'i _ great _ 'as'i _ { return 'ge' }
|
||
/ is 'as'i _ small _ 'as'i _ { return 'le' }
|
||
|
||
/* Rule for comparison clause */
|
||
comparison = lhs:arithmetic c:comparator rhs:comparison
|
||
{ return { type: "comparison", comparator: c, lhs: lhs, rhs: rhs } }
|
||
/ arithmetic
|
||
|
||
/* Rule for plus/minus arithmetic clause */
|
||
arithmetic = first:product rest:((add / subtract) product)+
|
||
{ return rest.reduce(function(memo, curr) {
|
||
return { type: "binary", op: curr[0], lhs: memo, rhs: curr[1] }
|
||
}, first); }
|
||
/ product
|
||
|
||
/* Rule for multiply/divide arithmetic clause */
|
||
product = first:simple_expression rest:((multiply / divide) expression_list)+
|
||
{ return rest.reduce(function(memo, curr) {
|
||
return { binary: { op: curr[0], lhs: memo, rhs: curr[1] } };
|
||
}, first); }
|
||
/ expression_list
|
||
/ simple_expression
|
||
|
||
/* Rule for the leaf of an expression tree */
|
||
simple_expression = function_call / constant / lookup / literal / pronoun
|
||
|
||
/* Rule for function call expression */
|
||
function_call "function call" = name:variable _ 'taking'i _ args:expression_list
|
||
{ return { type: "call", name: name, args: Array.isArray(args) ? args : [args] } }
|
||
|
||
/* Rule for a constant literal */
|
||
literal "literal" = constant / number / string
|
||
|
||
/* Rule for keyword-based constant literals */
|
||
constant "constant" = null / true / false / empty_string / mysterious
|
||
null = ('null'i / 'nothing'i / 'nowhere'i / 'nobody'i / 'gone'i) { return { constant: null } }
|
||
true = ('true'i / 'ok'i / 'right'i / 'yes'i) !letter { return { constant: true } }
|
||
false = ('false'i / 'lies'i / 'wrong'i / 'no'i) !letter { return { constant: false } }
|
||
empty_string = ('empty'i / 'silent'i / 'silence'i) { return { constant: "" } }
|
||
mysterious = 'mysterious'i { return '__MYSTERIOUS__' }
|
||
|
||
/* Rule for a numeric literal */
|
||
number "number" = n:$('-'?[0-9]+ ('.' [0-9]+)?) '.'?
|
||
{ return {number: parseFloat(n)} }
|
||
/ n:$('.' [0-9]+)
|
||
{ return {number: parseFloat(n) } }
|
||
|
||
/* Rule for a string literal */
|
||
string "string" = '"' s:$[^"]* '"' { return {string: s }; }
|
||
|
||
/* 'TODO remove */
|
||
|
||
/*********************************
|
||
OPERATION STATEMENTS
|
||
*********************************/
|
||
|
||
/* Rule for single-operation statements */
|
||
operation "operation statement" = readline / output / crement / mutation / assignment / rounding
|
||
|
||
/* Rule for STDIN operation statement */
|
||
readline "stdin statement" = 'listen to'i _ target:assignable
|
||
{ return { type: "stdin", target: target } }
|
||
/ 'listen'i { return { type: "stdin" } }
|
||
|
||
/* Rule for STDOUT statement */
|
||
output "stdout statement" = ('say'i/'shout'i/'whisper'i/'scream'i) _ e:expression
|
||
{ return { type: "stdout", output: e } }
|
||
|
||
/* Rule for increment/decrement statements */
|
||
crement "increment/decrement statement" = increment / decrement
|
||
|
||
/* Rule for increment statement */
|
||
increment = 'build'i _ v:variable _ t:('up'i noise*)+
|
||
{ return { type: "increment", variable: v, multiple: t.length } }
|
||
|
||
/* Rule for decrement statement */
|
||
decrement = 'knock'i _ v:variable _ t:('down'i noise*)+
|
||
{ return { type: "decrement", variable: v, multiple: t.length } }
|
||
|
||
split = ('cut'i / 'split'i / 'shatter'i) { return 'split' }
|
||
cast = ('cast'i / 'burn'i) { return 'cast' }
|
||
join = ('join'i / 'unite'i) { return 'join' }
|
||
|
||
/* Rule for mutation operation statements */
|
||
mutator "mutation keyword" = split / cast / join
|
||
modifier = _ ('with'i / 'using'i) _ m:expression { return m }
|
||
mutation "mutation statement" = op:mutator _ s:expression into t:assignable m:modifier?
|
||
{ return { assign: { target: t, expression: { mutation: { type: op, source: s, modifier: m } } } } ; }
|
||
/ op:mutator _ s:assignable m:modifier?
|
||
{ return { assign: { target: s, expression: { mutation: { type: op, source: { lookup: s }, modifier: m } } } } ; }
|
||
|
||
/* Rule for rounding operation statements */
|
||
rounding "rounding statement" = floor / ceil / math_round
|
||
|
||
floor = 'turn'i _ v:variable _ 'down'i
|
||
{ return { type: "rounding", variable: v, direction: 'down' } }
|
||
/ 'turn'i _ 'down'i _ v:variable
|
||
{ return { type: "rounding", variable: v, direction: 'down' } }
|
||
|
||
ceil = 'turn'i _ v:variable _ 'up'i
|
||
{ return { type: "rounding", variable: v, direction: 'up' } }
|
||
/ 'turn'i _ 'up'i _ v:variable
|
||
{ return { type: "rounding", variable: v, direction: 'up' } }
|
||
|
||
math_round = 'turn'i _ v:variable _ ('round'i/'around'i)
|
||
{ return { type: "rounding", variable: v, direction: 'nearest' } }
|
||
/ 'turn'i _ ('round'i/'around'i) _ v:variable
|
||
{ return { type: "rounding", variable: v, direction: 'nearest' } }
|
||
|
||
/*********************************
|
||
KITCHEN SINK
|
||
*********************************/
|
||
|
||
/* Keywords for arithmetic operators */
|
||
// Note that operator aliases explicitly include a trailing space,
|
||
// otherwise 'with' is a prefix code for 'without' and confuses the parser.
|
||
add = _* ('+' / 'plus 'i / 'with 'i) _* { return '+' }
|
||
subtract = _* ('-' / 'minus 'i / 'without 'i) _* { return '-' }
|
||
multiply = _* ('*' / 'times 'i / 'of 'i) _* { return '*' }
|
||
divide = _* ('/' / 'over 'i / 'between 'i) _* { return '/' }
|
||
|
||
push = ('rock'i / 'push'i )
|
||
pop = ('roll'i / 'pop'i)
|
||
into = _ ('into'i / 'in'i) _
|
||
|
||
/* Rule representing array dequeue clause */
|
||
delist "array roll" = pop _ v:variable
|
||
{ return { type: "delist", variable: v } }
|
||
|
||
/* Rule for variable in expression tree leaf */
|
||
lookup "variable or array element" = d:delist { return d; }
|
||
/ v:variable _ 'at'i _ i:expression
|
||
{ return { type: "lookup", variable: v, index: i } }
|
||
/ v:variable
|
||
{ return { type: "lookup", variable: v } }
|
||
|
||
/* Rule for poetic string literal */
|
||
poetic_string "poetic string" = s:$[^\r\n]* { return { string: s } }
|
||
|
||
/* Rule for poetic number literal */
|
||
poetic_number "poetic number" = poetic_digit_separator* n:poetic_digits poetic_digit_separator* d:poetic_decimal? poetic_digit_separator*
|
||
{ return { number: parseFloat(d?n+'.'+d:n)}}
|
||
|
||
/* Rule for poetic decimal literal */
|
||
poetic_decimal = '.' poetic_decimal_digit_separator* d:poetic_decimal_digits poetic_decimal_digit_separator* {return d}
|
||
/ '.' poetic_decimal_digit_separator*
|
||
|
||
/* Separator used in poetic literals */
|
||
poetic_digit_separator = ( _ / [0-9\',;:?!+_/] )
|
||
|
||
poetic_digits = poetic_digit_separator* head:poetic_digit poetic_digit_separator+ tail:poetic_digits
|
||
{ return head + tail }
|
||
/ d: poetic_digit
|
||
{ return d }
|
||
|
||
poetic_decimal_digit_separator = ( _ / poetic_digit_separator / '.')
|
||
poetic_decimal_digits = poetic_decimal_digit_separator* head:poetic_digit poetic_decimal_digit_separator+ tail:poetic_decimal_digits
|
||
{ return head + tail }
|
||
/ d: poetic_digit
|
||
{ return d }
|
||
|
||
poetic_digit = t:[A-Za-z\-']+
|
||
{ return (t.filter(c => /[A-Za-z\-]/.test(c)).length%10).toString() }
|