wacc_37/src/main/wacc/parser.scala

187 lines
6.7 KiB
Scala

package wacc
import parsley.Result
import parsley.Parsley
import parsley.Parsley.{atomic, many, notFollowedBy, pure}
import parsley.combinator.{countSome, sepBy}
import parsley.expr.{precedence, SOps, InfixL, InfixN, InfixR, Prefix, Atoms}
import parsley.errors.combinator._
import parsley.syntax.zipped._
import parsley.cats.combinator.{some}
import cats.data.NonEmptyList
import parsley.errors.DefaultErrorBuilder
import parsley.errors.ErrorBuilder
import parsley.errors.tokenextractors.LexToken
object parser {
import lexer.implicits.implicitSymbol
import lexer.{ident, integer, charLit, stringLit, negateCheck, errTokens}
import ast._
// error extensions
extension [A](p: Parsley[A]) {
// combines label and explain together into one function call
def labelAndExplain(label: String, explanation: String): Parsley[A] = {
p.label(label).explain(explanation)
}
def labelAndExplain(t: LabelType): Parsley[A] = {
t match {
case LabelType.Expr =>
labelWithType(t).explain(
"a valid expression can start with: null, literals, identifiers, unary operators, or parentheses. " +
"Expressions can also contain array indexing and binary operators. " +
"Pair extraction is not allowed in expressions, only in assignments."
)
case _ => labelWithType(t)
}
}
def labelWithType(t: LabelType): Parsley[A] = {
t match {
case LabelType.Expr => p.label("valid expression")
case LabelType.Pair => p.label("valid pair")
}
}
}
enum LabelType:
case Expr
case Pair
implicit val builder: ErrorBuilder[String] = new DefaultErrorBuilder with LexToken {
def tokens = errTokens
}
def parse(input: String): Result[String, Program] = parser.parse(input)
private val parser = lexer.fully(`<program>`)
// Expressions
private lazy val `<expr>`: Parsley[Expr] = precedence {
SOps(InfixR)(Or from "||") +:
SOps(InfixR)(And from "&&") +:
SOps(InfixN)(Eq from "==", Neq from "!=") +:
SOps(InfixN)(
Less from "<",
LessEq from "<=",
Greater from ">",
GreaterEq from ">="
) +:
SOps(InfixL)(
(Add from "+").label("binary operator"),
(Sub from "-").label("binary operator")
) +:
SOps(InfixL)(Mul from "*", Div from "/", Mod from "%") +:
SOps(Prefix)(
Not from "!",
(Negate from (notFollowedBy(negateCheck) ~> "-")).hide,
Len from "len",
Ord from "ord",
Chr from "chr"
) +:
`<atom>`
}
// Atoms
private lazy val `<atom>`: Atoms[Expr6] = Atoms(
IntLiter(integer).label("integer literal"),
BoolLiter(("true" as true) | ("false" as false)).label("boolean literal"),
CharLiter(charLit).label("character literal"),
StrLiter(stringLit).label("string literal"),
PairLiter from "null",
`<ident-or-array-elem>`,
Parens("(" ~> `<expr>` <~ ")")
)
private val `<ident>` = Ident(ident)
private lazy val `<ident-or-array-elem>` =
`<ident>` <**> (`<array-indices>` </> identity)
private val `<array-indices>` = ArrayElem(some("[" ~> `<expr>` <~ "]"))
// Types
private lazy val `<type>`: Parsley[Type] =
(`<base-type>` | (`<pair-type>` ~> `<pair-elems-type>`)) <**> (`<array-type>` </> identity)
private val `<base-type>` =
(IntType from "int") | (BoolType from "bool") | (CharType from "char") | (StringType from "string")
private lazy val `<array-type>` =
ArrayType(countSome("[" ~> "]"))
private val `<pair-type>` = "pair"
private val `<pair-elems-type>`: Parsley[PairType] = PairType(
"(" ~> `<pair-elem-type>` <~ ",",
`<pair-elem-type>` <~ ")"
)
private lazy val `<pair-elem-type>` =
(`<base-type>` <**> (`<array-type>` </> identity)) |
((UntypedPairType from `<pair-type>`) <**>
((`<pair-elems-type>` <**> `<array-type>`)
.map(arr => (_: UntypedPairType) => arr) </> identity))
// Statements
private lazy val `<program>` = Program(
"begin" ~> many(
atomic(`<type>`.label("function declaration") <~> `<ident>` <~ "(") <**> `<partial-func-decl>`
).label("function declaration"),
(atomic(`<ident>` <~ "(") ~> fail("function is missing return type") | `<stmt>`.label(
"main program body"
)) <~ "end"
)
private lazy val `<partial-func-decl>` =
FuncDecl(
sepBy(`<param>`, ",") <~ ")" <~ "is",
`<stmt>`.guardAgainst {
case stmts if !stmts.isReturning => Seq("All functions must end in a returning statement")
} <~ "end"
)
private lazy val `<param>` = Param(`<type>`, `<ident>`)
private lazy val `<stmt>`: Parsley[NonEmptyList[Stmt]] =
(
`<basic-stmt>`.label("main program body"),
(many(";" ~> `<basic-stmt>`.label("statement after ';'"))) </> Nil
).zipped(NonEmptyList.apply)
private lazy val `<basic-stmt>` =
(Skip from "skip")
| Read("read" ~> `<lvalue>`)
| Free("free" ~> `<expr>`.labelAndExplain(LabelType.Expr))
| Return("return" ~> `<expr>`.labelAndExplain(LabelType.Expr))
| Exit("exit" ~> `<expr>`.labelAndExplain(LabelType.Expr))
| Print("print" ~> `<expr>`.labelAndExplain(LabelType.Expr), pure(false))
| Print("println" ~> `<expr>`.labelAndExplain(LabelType.Expr), pure(true))
| If(
"if" ~> `<expr>`.labelWithType(LabelType.Expr) <~ "then",
`<stmt>` <~ "else",
`<stmt>` <~ "fi"
)
| While("while" ~> `<expr>`.labelWithType(LabelType.Expr) <~ "do", `<stmt>` <~ "done")
| Block("begin" ~> `<stmt>` <~ "end")
| VarDecl(`<type>`, `<ident>` <~ "=", `<rvalue>`.label("valid initial value for variable"))
// TODO: Can we inline the name of the variable in the message
| Assign(`<lvalue>` <~ "=", `<rvalue>`)
private lazy val `<lvalue>`: Parsley[LValue] =
`<pair-elem>` | `<ident-or-array-elem>`
private lazy val `<rvalue>`: Parsley[RValue] =
`<array-liter>` |
NewPair(
"newpair" ~> "(" ~> `<expr>` <~ ",",
`<expr>` <~ ")"
) |
`<pair-elem>` |
Call(
"call" ~> `<ident>` <~ "(",
sepBy(`<expr>`, ",") <~ ")"
) | `<expr>`.labelWithType(LabelType.Expr)
private lazy val `<pair-elem>` =
Fst("fst" ~> `<lvalue>`.label("valid pair"))
| Snd("snd" ~> `<lvalue>`.label("valid pair"))
private lazy val `<array-liter>` = ArrayLiter(
"[" ~> sepBy(`<expr>`, ",") <~ "]"
)
extension (stmts: NonEmptyList[Stmt]) {
def isReturning: Boolean = stmts.last match {
case Return(_) | Exit(_) => true
case If(_, thenStmt, elseStmt) => thenStmt.isReturning && elseStmt.isReturning
case While(_, body) => body.isReturning
case Block(body) => body.isReturning
case _ => false
}
}
}