wacc_37/src/main/wacc/parser.scala
2025-02-07 18:48:34 +00:00

225 lines
8.2 KiB
Scala
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package wacc
import parsley.Result
import parsley.Parsley
import parsley.Parsley.{atomic, many, notFollowedBy, pure, unit}
import parsley.combinator.{countSome, sepBy}
import parsley.expr.{precedence, SOps, InfixL, InfixN, InfixR, Prefix, Atoms}
import parsley.errors.combinator._
import parsley.errors.patterns.VerifiedErrors
import parsley.syntax.zipped._
import parsley.cats.combinator.{some}
import cats.data.NonEmptyList
import parsley.errors.DefaultErrorBuilder
import parsley.errors.ErrorBuilder
import parsley.errors.tokenextractors.LexToken
object parser {
import lexer.implicits.implicitSymbol
import lexer.{ident, integer, charLit, stringLit, negateCheck, errTokens}
import ast._
// error extensions
extension [A](p: Parsley[A]) {
// combines label and explain together into one function call
def labelAndExplain(label: String, explanation: String): Parsley[A] = {
p.label(label).explain(explanation)
}
def labelAndExplain(t: LabelType): Parsley[A] = {
t match {
case LabelType.Expr =>
labelWithType(t).explain(
"a valid expression can start with: null, literals, identifiers, unary operators, or parentheses. " +
"Expressions can also contain array indexing and binary operators. " +
"Pair extraction is not allowed in expressions, only in assignments."
)
case _ => labelWithType(t)
}
}
def labelWithType(t: LabelType): Parsley[A] = {
t match {
case LabelType.Expr => p.label("valid expression")
case LabelType.Pair => p.label("valid pair")
}
}
}
enum LabelType:
case Expr
case Pair
implicit val builder: ErrorBuilder[String] = new DefaultErrorBuilder with LexToken {
def tokens = errTokens
}
def parse(input: String): Result[String, Program] = parser.parse(input)
private val parser = lexer.fully(`<program>`)
// Expressions
private lazy val `<expr>`: Parsley[Expr] = precedence {
SOps(InfixR)(Or from "||") +:
SOps(InfixR)(And from "&&") +:
SOps(InfixN)(Eq from "==", Neq from "!=") +:
SOps(InfixN)(
Less from "<",
LessEq from "<=",
Greater from ">",
GreaterEq from ">="
) +:
SOps(InfixL)(
(Add from "+").label("binary operator"),
(Sub from "-").label("binary operator")
) +:
SOps(InfixL)(Mul from "*", Div from "/", Mod from "%") +:
SOps(Prefix)(
Not from "!",
// notFollowedBy(negateCheck) ensures that negative numbers are parsed as a single int literal
(Negate from (notFollowedBy(negateCheck) ~> "-")).hide,
Len from "len",
Ord from "ord",
Chr from "chr"
) +:
`<atom>`
}
// Atoms
private lazy val `<atom>`: Atoms[Expr6] = Atoms(
IntLiter(integer).label("integer literal"),
BoolLiter(("true" as true) | ("false" as false)).label("boolean literal"),
CharLiter(charLit).label("character literal"),
StrLiter(stringLit).label("string literal"),
PairLiter from "null",
`<ident-or-array-elem>`,
Parens("(" ~> `<expr>` <~ ")")
)
private val `<ident>` =
Ident(ident) | some("*" | "&").verifiedExplain("pointer operators are not allowed")
private lazy val `<ident-or-array-elem>` =
(`<ident>` <~ ("(".verifiedExplain(
"functions can only be called using 'call' keyword"
) | unit)) <**> (`<array-indices>` </> identity)
private val `<array-indices>` = ArrayElem(some("[" ~> `<expr>` <~ "]"))
// Types
private lazy val `<type>`: Parsley[Type] =
(`<base-type>` | (`<pair-type>` ~> `<pair-elems-type>`)) <**> (`<array-type>` </> identity)
private val `<base-type>` =
(IntType from "int") | (BoolType from "bool") | (CharType from "char") | (StringType from "string")
private lazy val `<array-type>` =
ArrayType(countSome("[" ~> "]"))
private val `<pair-type>` = "pair"
private val `<pair-elems-type>`: Parsley[PairType] = PairType(
"(" ~> `<pair-elem-type>` <~ ",",
`<pair-elem-type>` <~ ")"
)
private lazy val `<pair-elem-type>` =
(`<base-type>` <**> (`<array-type>` </> identity)) |
((UntypedPairType from `<pair-type>`) <**>
((`<pair-elems-type>` <**> `<array-type>`)
.map(arr => (_: UntypedPairType) => arr) </> identity))
/* Statements
Atomic is used in two places here:
1. Atomic for function return type - code may be a variable declaration instead, If we were
to factor out the type, the resulting code would be rather messy. It can only fail once
in the entire program so it creates minimal overhead.
2. Atomic for function missing return type check - there is no easy way around an explicit
invalid syntax check, this only happens at most once per program so this is not a major
concern.
*/
private lazy val `<program>` = Program(
"begin" ~> (
many(
atomic(
`<type>`.label("function declaration") <~> `<ident>` <~ "("
) <**> `<partial-func-decl>`
).label("function declaration") |
atomic(`<ident>` <~ "(").verifiedExplain("function declaration is missing return type")
),
`<stmt>`.label(
"main program body"
) <~ "end"
)
private lazy val `<partial-func-decl>` =
FuncDecl(
sepBy(`<param>`, ",") <~ ")" <~ "is",
`<stmt>`.guardAgainst {
case stmts if !stmts.isReturning => Seq("all functions must end in a returning statement")
} <~ "end"
)
private lazy val `<param>` = Param(`<type>`, `<ident>`)
private lazy val `<stmt>`: Parsley[NonEmptyList[Stmt]] =
(
`<basic-stmt>`.label("main program body"),
(many(";" ~> `<basic-stmt>`.label("statement after ';'"))) </> Nil
).zipped(NonEmptyList.apply)
private lazy val `<basic-stmt>` =
(Skip from "skip")
| Read("read" ~> `<lvalue>`)
| Free("free" ~> `<expr>`.labelAndExplain(LabelType.Expr))
| Return("return" ~> `<expr>`.labelAndExplain(LabelType.Expr))
| Exit("exit" ~> `<expr>`.labelAndExplain(LabelType.Expr))
| Print("print" ~> `<expr>`.labelAndExplain(LabelType.Expr), pure(false))
| Print("println" ~> `<expr>`.labelAndExplain(LabelType.Expr), pure(true))
| If(
"if" ~> `<expr>`.labelWithType(LabelType.Expr) <~ "then",
`<stmt>` <~ "else",
`<stmt>` <~ "fi"
)
| While("while" ~> `<expr>`.labelWithType(LabelType.Expr) <~ "do", `<stmt>` <~ "done")
| Block("begin" ~> `<stmt>` <~ "end")
| VarDecl(
`<type>`,
`<ident>` <~ ("=" | "(".verifiedExplain(
"all function declarations must be above the main program body"
)),
`<rvalue>`.label("valid initial value for variable")
)
| Assign(
`<lvalue>` <~ ("=" | "(".verifiedExplain(
"function calls must use the 'call' keyword and the result must be assigned to a variable"
)),
`<rvalue>`
) |
("call" ~> `<ident>`).verifiedExplain(
"function calls' results must be assigned to a variable"
)
private lazy val `<lvalue>`: Parsley[LValue] =
`<pair-elem>` | `<ident-or-array-elem>`
private lazy val `<rvalue>`: Parsley[RValue] =
`<array-liter>` |
NewPair(
"newpair" ~> "(" ~> `<expr>` <~ ",",
`<expr>` <~ ")"
) |
`<pair-elem>` |
Call(
"call" ~> `<ident>` <~ "(",
sepBy(`<expr>`, ",") <~ ")"
) | `<expr>`.labelWithType(LabelType.Expr)
private lazy val `<pair-elem>` =
Fst("fst" ~> `<lvalue>`.label("valid pair"))
| Snd("snd" ~> `<lvalue>`.label("valid pair"))
private lazy val `<array-liter>` = ArrayLiter(
"[" ~> sepBy(`<expr>`, ",") <~ "]"
)
extension (stmts: NonEmptyList[Stmt]) {
/** Determines whether a function body is guaranteed to return in all cases This is required as
* all functions must end via a "return" or "exit" statement
*
* @return
* true if the statement list ends in a return statement, false otherwise
*/
def isReturning: Boolean = stmts.last match {
case Return(_) | Exit(_) => true
case If(_, thenStmt, elseStmt) => thenStmt.isReturning && elseStmt.isReturning
case While(_, body) => body.isReturning
case Block(body) => body.isReturning
case _ => false
}
}
}