diff --git a/docs/ytd 12-bit computer.md b/docs/ytd 12-bit computer.md index fc2d846..1d81b0f 100644 --- a/docs/ytd 12-bit computer.md +++ b/docs/ytd 12-bit computer.md @@ -114,8 +114,44 @@ unsigned int fixed float Number Literals +``` +number ::= decinteger | bininteger | octinteger | hexinteger | pointfloat | exponentfloat +decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")* +bininteger ::= "0" ("b" | "B") (["_"] bindigit)+ +octinteger ::= "0" ("o" | "O") (["_"] octdigit)+ +hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+ +nonzerodigit ::= "1"..."9" +digit ::= "0"..."9" +bindigit ::= "0" | "1" +octdigit ::= "0"..."7" +hexdigit ::= digit | "a"..."f" | "A"..."F" +pointfloat ::= ([digitpart] fraction) | (digitpart ".") +exponentfloat ::= (digitpart | pointfloat) exponent +digitpart ::= digit (["_"] digit)* +fraction ::= "." digitpart +exponent ::= ("e" | "E") ["+" | "-"] digitpart +``` + +Character Literals + +Character Literals must be on one line and begin and end with `'`. They can only +have one printable ascii character or escape code. + String Literals +String Literals must be on one line and begin and end with `"`. They can have a +unspecified number of printable ascii characters and escape codes. + +Escape Codes + +| Code | Meaning | +| - | - | +| `\n` | Line Feed | +| `\r` | Return Carriage | +| `\0` | Null Character | +| `\'` | Single Quotation Mark (In char literals) | +| `\"` | Double Quotation Mark (In str literals) | + #### Punctuation ``` @@ -168,3 +204,20 @@ String Literals { } [ ] ; : ``` + +#### Escape Codes + +| Code | Meaning | +| - | - | +| `\0` | Null Character | +| `\a` | Alert (Beep, Bell) | +| `\b` | Backspace | +| `\e` | Escape Character | +| `\f` | Formfeed Page Break* | +| `\n` | Line Feed | +| `\r` | Return Carriage | +| `\t` | Horizontal Tab | +| `\v` | Vertical Tab* | +| `\\` | Backslash | +| `\'` | Single Quotation Mark (In char literals) | +| `\"` | Double Quotation Mark (In str literals) | diff --git a/pytd12dk/compiler/lexer.py b/pytd12dk/compiler/lexer.py index e93d8f3..dded81d 100644 --- a/pytd12dk/compiler/lexer.py +++ b/pytd12dk/compiler/lexer.py @@ -2,7 +2,7 @@ # Feb 2024 from enum import Enum -from typing import ClassVar, Sequence, TextIO +from typing import ClassVar, Sequence from .compiler_types import CompilerError, FileInfo @@ -19,6 +19,16 @@ class _InterTokenType(Enum): Punctuation = 'Punctuation' +class _NumberLiteralType(Enum): + Number = 'Number' + Real = 'Real' + Exp = 'Exp' + Base = 'Base' + Binary = 'Binary' + Octal = 'Octal' + Hex = 'Hex' + + _OnlyNewLineTerminatedTokens = ( _InterTokenType.Directive, _InterTokenType.SingleLineComment, @@ -49,9 +59,59 @@ _Keywords = ( _Num_Start = "0123456789" -_Num_Second = _Num_Start + "box._Ee" +_Num_Start_Next = { + _NumberLiteralType.Number: { + '.': _NumberLiteralType.Real, + '0': _NumberLiteralType.Base, + } +} -_Num_Continue = _Num_Start + "._" "ABCDEF" "abcdef" +_Num_Second = { + _NumberLiteralType.Number: _Num_Start + ".eE_", + _NumberLiteralType.Real: _Num_Start + "eE_", + _NumberLiteralType.Base: "bBoOxX", +} + +_Num_Second_Next = { + _NumberLiteralType.Number: { + '.': _NumberLiteralType.Real, + 'e': _NumberLiteralType.Exp, + 'E': _NumberLiteralType.Exp, + }, + _NumberLiteralType.Real: { + 'e': _NumberLiteralType.Exp, + 'E': _NumberLiteralType.Exp, + }, + _NumberLiteralType.Base: { + 'b': _NumberLiteralType.Binary, + 'B': _NumberLiteralType.Binary, + 'o': _NumberLiteralType.Octal, + 'O': _NumberLiteralType.Octal, + 'x': _NumberLiteralType.Hex, + 'X': _NumberLiteralType.Hex, + } +} + +_Num_Continue = { + _NumberLiteralType.Number: _Num_Start + ".eE_", + _NumberLiteralType.Real: _Num_Start + "eE_", + _NumberLiteralType.Exp: _Num_Start + "_", + _NumberLiteralType.Binary: "01_", + _NumberLiteralType.Octal: "01234567_", + _NumberLiteralType.Hex: _Num_Start + "abcdefABCDEF_", +} + +_Num_Continue_Next = { + _NumberLiteralType.Number: { + '.': _NumberLiteralType.Real, + 'e': _NumberLiteralType.Exp, + 'E': _NumberLiteralType.Exp, + }, + _NumberLiteralType.Real: { + 'e': _NumberLiteralType.Exp, + 'E': _NumberLiteralType.Exp, + } +} _Punctuation_Any = "@$+-*/%~&|^<>=!?{}[]().->,;:" @@ -95,14 +155,13 @@ class StringLiteral(Token): _type = 'StringLiteral' class Punctuation(Token): _type = 'Punctuation' -def lexer(file: str | TextIO, filename: str) -> Sequence[Token]: - if not isinstance(file, str): - file = file.read() +def lexer(file: str, filename: str) -> Sequence[Token]: tokens: list[Token] = [] current: str = "" current_line: int = 0 current_col: int = 0 escaped: bool = False + number_type: _NumberLiteralType = _NumberLiteralType.Number token_type: _InterTokenType = _InterTokenType.Generic for line, line_str in enumerate(file.splitlines()): @@ -149,11 +208,14 @@ def lexer(file: str | TextIO, filename: str) -> Sequence[Token]: tokens.append(Identifier(current, fi)) token_type = _InterTokenType.Generic elif token_type is _InterTokenType.NumberLiteral: - if ( - (len(current) == 2 and char in _Num_Second) ^ - (char in _Num_Continue) - ): + if len(current) == 2 and char in _Num_Second[number_type]: current += char + if char in _Num_Second_Next[number_type]: + number_type = _Num_Second_Next[number_type][char] + elif char in _Num_Continue: + current += char + if char in _Num_Continue_Next[number_type]: + number_type = _Num_Continue_Next[number_type][char] else: fi = FileInfo( filename, current_line, current_col, len(current)) @@ -214,10 +276,17 @@ def lexer(file: str | TextIO, filename: str) -> Sequence[Token]: token_type = _InterTokenType.MultiLineComment elif char in _ID_Start: token_type = _InterTokenType.Word - elif char == '.' and line_str[col+1] in _Num_Second: + elif ( + char == '.' and + line_str[col+1] in _Num_Second[_NumberLiteralType.Real] + ): token_type = _InterTokenType.NumberLiteral + if char in _Num_Start_Next[number_type]: + number_type = _Num_Start_Next[number_type][char] elif char in _Num_Start: token_type = _InterTokenType.NumberLiteral + if char in _Num_Start_Next[number_type]: + number_type = _Num_Start_Next[number_type][char] elif char == "'": token_type = _InterTokenType.CharLiteral elif char == '"':