From bf163e52fe5212bdcaeab97c0a9af1472319b622 Mon Sep 17 00:00:00 2001 From: Jonny Date: Sat, 1 Feb 2025 02:21:39 +0000 Subject: [PATCH] feat: initial lexer implementation --- src/main/wacc/lexer.scala | 51 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/src/main/wacc/lexer.scala b/src/main/wacc/lexer.scala index f633bf6..907053b 100644 --- a/src/main/wacc/lexer.scala +++ b/src/main/wacc/lexer.scala @@ -1,16 +1,61 @@ package wacc import parsley.Parsley -import parsley.token.Lexer +import parsley.token.{Basic, Lexer} import parsley.token.descriptions.* object lexer { private val desc = LexicalDesc.plain.copy( - // your configuration goes here + nameDesc = NameDesc.plain.copy( + identifierStart = Basic(c => c.isLetter || c == '_'), + identifierLetter = Basic(c => c.isLetterOrDigit || c == '_') + ), + symbolDesc = SymbolDesc.plain.copy( + hardKeywords = Set( + "begin", "end", "is", "skip", "if", "then", "else", "fi", "while", "do", + "done", "read", "free", "return", "exit", "print", "println", "true", + "false", "int", "bool", "char", "string", "pair", "newpair", "fst", + "snd", "call", "chr", "ord", "len", "null" + ), + hardOperators = Set( + "+", "-", "*", "/", "%", ">", "<", ">=", "<=", "==", "!=", "&&", "||", + "!" + ) + ), + spaceDesc = SpaceDesc.plain.copy( + lineCommentStart = "#" + ), + // TODO - See BNF 1.1 and Table 5 2.3.6 + textDesc = TextDesc.plain.copy( + graphicCharacter = + Basic(c => c >= ' ' && c != '\\' && c != '\'' && c != '"'), + escapeSequences = EscapeDesc.plain.copy( + literals = Set('\\', '"', '\''), + mapping = Map( + "0" -> '\u0000', + "b" -> '\b', + "t" -> '\t', + "n" -> '\n', + "f" -> '\f', + "r" -> '\r' + ) + ) + ) ) + private val lexer = Lexer(desc) - val integer = lexer.lexeme.integer.decimal + // Enforce 32-bit signed integer range - see 1.5 + // TODO By default leadingZerosAllowed = true in NumericDesc - Wacc doesnt specify (I think) but should reach consensus + val integer = lexer.lexeme.integer.decimal32[Int] + + // TODO Check if textDesc can handle this + val charLit = lexer.lexeme.character.ascii + + // TODO Check if textDesc can handle this + val stringLit = lexer.lexeme.string.ascii + val implicits = lexer.lexeme.symbol.implicits + def fully[A](p: Parsley[A]): Parsley[A] = lexer.fully(p) }