def _skip_multi_line_comment(self) -> None: self._advance() self._advance() line_no = self._line_no column = self._column while True: if self._current_char == '|': next_char = self._peek() if next_char == '#': self._advance() self._advance() break elif next_char is None: raise err.LexerError( error_code=err.ErrorCode.RS_EOF_IN_BLOCK_COMMENT, token=t.Token(type=t.TokenType.INVALID, value=None, line_no=line_no, column=column)) elif self._current_char is None: raise err.LexerError( error_code=err.ErrorCode.RS_EOF_IN_BLOCK_COMMENT, token=t.Token(type=t.TokenType.INVALID, value=None, line_no=line_no, column=column)) self._advance()
def _string(self) -> t.Token: """Handles strings.""" line_no = self._line_no column = self._column self._advance() string = '' while self._current_char is not None and self._current_char != '"': string += self._current_char self._advance() if self._current_char is None: raise err.LexerError( error_code=err.ErrorCode.RS_EXPECTED_DOUBLE_QUOTE, token=t.Token(type=t.TokenType.INVALID, value=string, line_no=line_no, column=column)) self._advance() return t.Token(type=t.TokenType.STRING, value=string, line_no=line_no, column=column)
def _boolean(self) -> t.Token: line_no = self._line_no column = self._column boolean = self._current_char self._advance() while self._current_char is not None and not self._current_char.isspace( ): current_char = self._current_char if current_char in ['"', "'", '`', '#']: boolean += self._current_char break elif current_char in ['(', ')', '{', '}', '[', ']']: break boolean += self._current_char self._advance() lowered = boolean.lower() if lowered not in '#true' and lowered not in '#false': break if self._current_char is None or boolean not in [ '#T', '#t', '#true', '#F', '#f', '#false' ]: raise err.LexerError(error_code=err.ErrorCode.RS_BAD_SYNTAX, token=t.Token(type=t.TokenType.INVALID, value=boolean, line_no=line_no, column=column), text=boolean) if boolean in ['#T', '#t', '#true']: return t.Token(type=t.TokenType.BOOLEAN, value=True, line_no=line_no, column=column) elif boolean in ['#F', '#f', '#false']: return t.Token(type=t.TokenType.BOOLEAN, value=False, line_no=line_no, column=column)
def _process_next_token(self) -> tp.Optional[t.Token]: token = None while self._current_char: try: self._skip_whitespace_or_comments() except err.ReachedEOF as e: break if (self._current_char.isdigit() or self._current_char == '.' or (self._current_char == '-' and (self._peek().isdigit() or self._peek() == '.'))): token = self._number() break if self._current_char not in self.NON_ID_CHARS: token = self._identifier() break if self._current_char == '#': token = self._boolean() break if self._current_char == '"': token = self._string() break if self._current_char == "'": token_type = t.TokenType.QUOTE value = self._current_char token = t.Token(type=token_type, value=value, line_no=self._line_no, column=self._column) self._advance() break if self._current_char in ['(', '{', '[']: token_type = t.TokenType.LPAREN value = self._current_char token = t.Token(type=token_type, value=value, line_no=self._line_no, column=self._column) self._advance() break if self._current_char in [')', '}', ']']: token_type = t.TokenType.RPAREN value = self._current_char token = t.Token(type=token_type, value=value, line_no=self._line_no, column=self._column) self._advance() break if self._current_char == '|': raise err.LexerError( error_code=err.ErrorCode.FEATURE_NOT_IMPLEMENTED, token=t.Token(type=t.TokenType.INVALID, value="''", line_no=self._line_no, column=self._column)) raise err.IllegalStateError return token
def _number(self) -> t.Token: """Return a number token from a number consumed from the input (or an ID if not a valid number).""" line_no = self._line_no column = self._column if self._current_char == '-': number = '-' self._advance() else: number = '' is_rational = False numerator = '' denominator = '' while (self._current_char is not None and not self._current_char.isspace() and self._current_char not in self.NON_ID_CHARS): if self._current_char == '/': is_rational = True numerator = number number += self._current_char self._advance() continue if is_rational: denominator += self._current_char number += self._current_char self._advance() if is_rational: try: numerator = int(numerator) denominator = int(denominator) if denominator < 0: raise ValueError except ValueError: return t.Token(type=t.TokenType.NAME, value=number, line_no=line_no, column=column) else: token = t.Token(type=t.TokenType.RATIONAL, value=(numerator, denominator), line_no=line_no, column=column) if denominator == 0: raise err.LexerError( error_code=err.ErrorCode.DIVISION_BY_ZERO, token=token) return token else: try: number = int(number) except ValueError: try: number = float(number) except ValueError: return t.Token(type=t.TokenType.NAME, value=number, line_no=line_no, column=column) else: return t.Token(type=t.TokenType.DECIMAL, value=number, line_no=line_no, column=column) else: return t.Token(type=t.TokenType.INTEGER, value=number, line_no=line_no, column=column)
def data( self ) -> Union[ast.Bool, ast.Dec, ast.Int, ast.List, ast.Rat, ast.Str, ast.Sym]: """ data: BOOLEAN | DECIMAL | INTEGER | LIST | RATIONAL | STRING | SYMBOL """ token = self.current_token if token.type is t.TokenType.BOOLEAN: self.eat(t.TokenType.BOOLEAN) return ast.Bool(token) elif token.type is t.TokenType.DECIMAL: self.eat(t.TokenType.DECIMAL) return ast.Dec(token) elif token.type is t.TokenType.INTEGER: self.eat(t.TokenType.INTEGER) return ast.Int(token) elif token.type is t.TokenType.RATIONAL: self.eat(t.TokenType.RATIONAL) return ast.Rat(token) elif token.type is t.TokenType.STRING: self.eat(t.TokenType.STRING) return ast.Str(token) elif token.type is t.TokenType.QUOTE: self.eat(t.TokenType.QUOTE) next_token = self.current_token if next_token.type is t.TokenType.LPAREN: self.eat(t.TokenType.LPAREN) prims_stack = [[]] open_parens = 1 while open_parens > 0: curr_token = self.current_token if curr_token.type is t.TokenType.EOF: raise err.LexerError( error_code=err.ErrorCode.RS_SYMBOL_FOUND_EOF, token=t.Token(type=t.TokenType.INVALID, value="'", line_no=curr_token.line_no, column=curr_token.column)) elif curr_token.type is t.TokenType.LPAREN: open_parens += 1 self.eat(t.TokenType.LPAREN) prims = [] prims_stack.append(prims) continue elif curr_token.type is t.TokenType.RPAREN: open_parens -= 1 self.eat(t.TokenType.RPAREN) if open_parens > 0: expr = ast.List(token, prims_stack[-1]) prims_stack = prims_stack[:-1] prims_stack[-1].append(expr) continue prims = prims_stack[-1] if curr_token.type in [ t.TokenType.BOOLEAN, t.TokenType.DECIMAL, t.TokenType.INTEGER, t.TokenType.RATIONAL, t.TokenType.STRING ]: prims.append(self.data()) elif curr_token.type is t.TokenType.NAME: self.eat(t.TokenType.NAME) name_token = t.Token(type=t.TokenType.SYMBOL, value=curr_token.value, line_no=curr_token.line_no, column=curr_token.column) prims.append(ast.Sym(name_token)) else: raise err.IllegalStateError node = ast.List(token, prims_stack[0]) return node elif next_token.type in [ t.TokenType.BOOLEAN, t.TokenType.DECIMAL, t.TokenType.INTEGER, t.TokenType.RATIONAL, t.TokenType.STRING ]: return self.data() elif next_token.type is t.TokenType.NAME: self.eat(t.TokenType.NAME) name_token = t.Token(type=t.TokenType.SYMBOL, value=next_token.value, line_no=token.line_no, column=token.column) return ast.Sym(name_token) else: raise err.IllegalStateError else: self.error_unexpected_token(token=token)