220 lines
8.1 KiB
Scala
220 lines
8.1 KiB
Scala
package wacc
|
||
|
||
import parsley.Result
|
||
import parsley.Parsley
|
||
import parsley.Parsley.{atomic, many, notFollowedBy, pure}
|
||
import parsley.combinator.{countSome, sepBy}
|
||
import parsley.expr.{precedence, SOps, InfixL, InfixN, InfixR, Prefix, Atoms}
|
||
import parsley.errors.combinator._
|
||
import parsley.errors.patterns.VerifiedErrors
|
||
import parsley.syntax.zipped._
|
||
import parsley.cats.combinator.{some}
|
||
import cats.data.NonEmptyList
|
||
import parsley.errors.DefaultErrorBuilder
|
||
import parsley.errors.ErrorBuilder
|
||
import parsley.errors.tokenextractors.LexToken
|
||
import parsley.character.char
|
||
|
||
object parser {
|
||
import lexer.implicits.implicitSymbol
|
||
import lexer.{ident, integer, charLit, stringLit, negateCheck, errTokens}
|
||
import ast._
|
||
|
||
// error extensions
|
||
extension [A](p: Parsley[A]) {
|
||
// combines label and explain together into one function call
|
||
def labelAndExplain(label: String, explanation: String): Parsley[A] = {
|
||
p.label(label).explain(explanation)
|
||
}
|
||
def labelAndExplain(t: LabelType): Parsley[A] = {
|
||
t match {
|
||
case LabelType.Expr =>
|
||
labelWithType(t).explain(
|
||
"a valid expression can start with: null, literals, identifiers, unary operators, or parentheses. " +
|
||
"Expressions can also contain array indexing and binary operators. " +
|
||
"Pair extraction is not allowed in expressions, only in assignments."
|
||
)
|
||
case _ => labelWithType(t)
|
||
}
|
||
}
|
||
|
||
def labelWithType(t: LabelType): Parsley[A] = {
|
||
t match {
|
||
case LabelType.Expr => p.label("valid expression")
|
||
case LabelType.Pair => p.label("valid pair")
|
||
}
|
||
}
|
||
}
|
||
|
||
enum LabelType:
|
||
case Expr
|
||
case Pair
|
||
|
||
val _parensCheck =
|
||
char('(').verifiedExplain("functions can only be called using 'call' keyword")
|
||
|
||
implicit val builder: ErrorBuilder[String] = new DefaultErrorBuilder with LexToken {
|
||
def tokens = errTokens
|
||
}
|
||
def parse(input: String): Result[String, Program] = parser.parse(input)
|
||
private val parser = lexer.fully(`<program>`)
|
||
|
||
// Expressions
|
||
private lazy val `<expr>`: Parsley[Expr] = precedence {
|
||
SOps(InfixR)(Or from "||") +:
|
||
SOps(InfixR)(And from "&&") +:
|
||
SOps(InfixN)(Eq from "==", Neq from "!=") +:
|
||
SOps(InfixN)(
|
||
Less from "<",
|
||
LessEq from "<=",
|
||
Greater from ">",
|
||
GreaterEq from ">="
|
||
) +:
|
||
SOps(InfixL)(
|
||
((Add from "+").label("binary operator") | _parensCheck),
|
||
((Sub from "-").label("binary operator") | _parensCheck)
|
||
) +:
|
||
SOps(InfixL)(Mul from "*", Div from "/", Mod from "%") +:
|
||
SOps(Prefix)(
|
||
Not from "!",
|
||
// notFollowedBy(negateCheck) ensures that negative numbers are parsed as a single int literal
|
||
(Negate from (notFollowedBy(negateCheck) ~> "-")).hide,
|
||
Len from "len",
|
||
Ord from "ord",
|
||
Chr from "chr"
|
||
) +:
|
||
`<atom>`
|
||
}
|
||
|
||
// Atoms
|
||
private lazy val `<atom>`: Atoms[Expr6] = Atoms(
|
||
IntLiter(integer).label("integer literal"),
|
||
BoolLiter(("true" as true) | ("false" as false)).label("boolean literal"),
|
||
CharLiter(charLit).label("character literal"),
|
||
StrLiter(stringLit).label("string literal"),
|
||
PairLiter from "null",
|
||
`<ident-or-array-elem>`,
|
||
Parens("(" ~> `<expr>` <~ ")")
|
||
)
|
||
private val `<ident>` =
|
||
Ident(ident) | some("*" | "&").verifiedExplain("pointer operators are not allowed")
|
||
private lazy val `<ident-or-array-elem>` =
|
||
`<ident>` <**> (`<array-indices>` </> identity)
|
||
private val `<array-indices>` = ArrayElem(some("[" ~> `<expr>` <~ "]"))
|
||
|
||
// Types
|
||
private lazy val `<type>`: Parsley[Type] =
|
||
(`<base-type>` | (`<pair-type>` ~> `<pair-elems-type>`)) <**> (`<array-type>` </> identity)
|
||
private val `<base-type>` =
|
||
(IntType from "int") | (BoolType from "bool") | (CharType from "char") | (StringType from "string")
|
||
private lazy val `<array-type>` =
|
||
ArrayType(countSome("[" ~> "]"))
|
||
private val `<pair-type>` = "pair"
|
||
private val `<pair-elems-type>`: Parsley[PairType] = PairType(
|
||
"(" ~> `<pair-elem-type>` <~ ",",
|
||
`<pair-elem-type>` <~ ")"
|
||
)
|
||
private lazy val `<pair-elem-type>` =
|
||
(`<base-type>` <**> (`<array-type>` </> identity)) |
|
||
((UntypedPairType from `<pair-type>`) <**>
|
||
((`<pair-elems-type>` <**> `<array-type>`)
|
||
.map(arr => (_: UntypedPairType) => arr) </> identity))
|
||
|
||
/* Statements
|
||
Atomic is used in two places here:
|
||
1. Atomic for function return type - code may be a variable declaration instead, If we were
|
||
to factor out the type, the resulting code would be rather messy. It can only fail once
|
||
in the entire program so it creates minimal overhead.
|
||
2. Atomic for function missing return type check - there is no easy way around an explicit
|
||
invalid syntax check, this only happens at most once per program so this is not a major
|
||
concern.
|
||
*/
|
||
private lazy val `<program>` = Program(
|
||
"begin" ~> (
|
||
many(
|
||
atomic(
|
||
`<type>`.label("function declaration") <~> `<ident>` <~ "("
|
||
) <**> `<partial-func-decl>`
|
||
).label("function declaration") |
|
||
atomic(`<ident>` <~ "(").verifiedExplain("function declaration is missing return type")
|
||
),
|
||
`<stmt>`.label(
|
||
"main program body"
|
||
) <~ "end"
|
||
)
|
||
private lazy val `<partial-func-decl>` =
|
||
FuncDecl(
|
||
sepBy(`<param>`, ",") <~ ")" <~ "is",
|
||
`<stmt>`.guardAgainst {
|
||
case stmts if !stmts.isReturning => Seq("all functions must end in a returning statement")
|
||
} <~ "end"
|
||
)
|
||
private lazy val `<param>` = Param(`<type>`, `<ident>`)
|
||
private lazy val `<stmt>`: Parsley[NonEmptyList[Stmt]] =
|
||
(
|
||
`<basic-stmt>`.label("main program body"),
|
||
(many(";" ~> `<basic-stmt>`.label("statement after ';'"))) </> Nil
|
||
).zipped(NonEmptyList.apply)
|
||
|
||
private lazy val `<basic-stmt>` =
|
||
(Skip from "skip")
|
||
| Read("read" ~> `<lvalue>`)
|
||
| Free("free" ~> `<expr>`.labelAndExplain(LabelType.Expr))
|
||
| Return("return" ~> `<expr>`.labelAndExplain(LabelType.Expr))
|
||
| Exit("exit" ~> `<expr>`.labelAndExplain(LabelType.Expr))
|
||
| Print("print" ~> `<expr>`.labelAndExplain(LabelType.Expr), pure(false))
|
||
| Print("println" ~> `<expr>`.labelAndExplain(LabelType.Expr), pure(true))
|
||
| If(
|
||
"if" ~> `<expr>`.labelWithType(LabelType.Expr) <~ "then",
|
||
`<stmt>` <~ "else",
|
||
`<stmt>` <~ "fi"
|
||
)
|
||
| While("while" ~> `<expr>`.labelWithType(LabelType.Expr) <~ "do", `<stmt>` <~ "done")
|
||
| Block("begin" ~> `<stmt>` <~ "end")
|
||
| VarDecl(
|
||
`<type>`,
|
||
`<ident>` <~ ("=" | "(".verifiedExplain(
|
||
"all function declarations must be above the main program body"
|
||
)),
|
||
`<rvalue>`.label("valid initial value for variable")
|
||
)
|
||
// TODO: Can we inline the name of the variable in the message
|
||
| Assign(`<lvalue>` <~ "=", `<rvalue>`)
|
||
private lazy val `<lvalue>`: Parsley[LValue] =
|
||
`<pair-elem>` | `<ident-or-array-elem>`
|
||
private lazy val `<rvalue>`: Parsley[RValue] =
|
||
`<array-liter>` |
|
||
NewPair(
|
||
"newpair" ~> "(" ~> `<expr>` <~ ",",
|
||
`<expr>` <~ ")"
|
||
) |
|
||
`<pair-elem>` |
|
||
Call(
|
||
"call" ~> `<ident>` <~ "(",
|
||
sepBy(`<expr>`, ",") <~ ")"
|
||
) | `<expr>`.labelWithType(LabelType.Expr)
|
||
private lazy val `<pair-elem>` =
|
||
Fst("fst" ~> `<lvalue>`.label("valid pair"))
|
||
| Snd("snd" ~> `<lvalue>`.label("valid pair"))
|
||
private lazy val `<array-liter>` = ArrayLiter(
|
||
"[" ~> sepBy(`<expr>`, ",") <~ "]"
|
||
)
|
||
|
||
extension (stmts: NonEmptyList[Stmt]) {
|
||
|
||
/** Determines whether a function body is guaranteed to return in all cases This is required as
|
||
* all functions must end via a "return" or "exit" statement
|
||
*
|
||
* @return
|
||
* true if the statement list ends in a return statement, false otherwise
|
||
*/
|
||
def isReturning: Boolean = stmts.last match {
|
||
case Return(_) | Exit(_) => true
|
||
case If(_, thenStmt, elseStmt) => thenStmt.isReturning && elseStmt.isReturning
|
||
case While(_, body) => body.isReturning
|
||
case Block(body) => body.isReturning
|
||
case _ => false
|
||
}
|
||
}
|
||
}
|