def test_match_bitvector_captures_args_out_of_order(): m = match_bitvector("0101 BBBB aaaa cccc", 0b0101011010111111) assert m == [ Token(Token.INT, 0b1011), Token(Token.REGISTER, 0b0110), Token(Token.INT, 0b1111), ]
def test_lexer_with_invalid_hex_escape(): lexer = lex_helper("'\\xa' \"\\xgh\"") assert len(lexer.messages.warnings) == 1 assert eq(lexer.tkn, Token(Token.ERROR, "over-long character literal")) assert eq(lexer.next_token(), Token(Token.STRING, "xgh")) assert len(lexer.messages.warnings) == 2
def test_match_bitvector_captures_multiple_args(): m = match_bitvector("0101 aaaa BBBB cccc", 0b0101011010111111) assert m == [ Token(Token.INT, 0b0110), Token(Token.REGISTER, 0b1011), Token(Token.INT, 0b1111), ]
def test_lexer_with_negative_integer(): lexer = lex_helper("-1") assert eq(lexer.tkn, Token(Token.MINUS, "-")) assert eq(lexer.next_token(), Token(Token.INT, "1")) assert eq(lexer.next_token(), Token(Token.EOF, "")) assert eq(lexer.next_token(), Token(Token.EOF, ""))
def test_lexer_with_include(): lexer = lex_helper('#include <HERA.h> #include "lib.hera"') assert eq(lexer.tkn, Token(Token.INCLUDE, "#include")) assert eq(lexer.next_token(), Token(Token.BRACKETED, "HERA.h")) assert eq(lexer.next_token(), Token(Token.INCLUDE, "#include")) assert eq(lexer.next_token(), Token(Token.STRING, "lib.hera")) assert eq(lexer.next_token(), Token(Token.EOF, ""))
def test_lexer_with_string(): lexer = lex_helper("""\ "a double quote: \\", a backslash: \\\\" """) assert eq(lexer.tkn, Token(Token.STRING, 'a double quote: ", a backslash: \\')) assert eq(lexer.next_token(), Token(Token.EOF, ""))
def test_convert_register_branch_with_integer(): # Simulates BR(l) where l = 1000 ops = helper("BR(1000)") assert ops == [ SETLO(Token.R(11), Token.Int(232)), SETHI(Token.R(11), Token.Int(3)), BR(Token.R(11)), ]
def test_lexer_with_multiline_comment(): lexer = lex_helper("""\ 1 /* a multiline comment */ 2""") assert eq(lexer.tkn, Token(Token.INT, "1")) assert eq(lexer.next_token(), Token(Token.INT, "2")) assert eq(lexer.next_token(), Token(Token.EOF, ""))
def test_match_bitvector_captures_split_arg(): m = match_bitvector("dcba abcd dcba abcd", 0b0111000011100000) assert m == [ Token(Token.INT, 0b1000), Token(Token.INT, 0b1010), Token(Token.INT, 0b1010), Token(Token.INT, 0b0010), ]
def test_typecheck_single_error(): # Second argument to SETHI is out of range. program = [ SETLO(Token.R(1), Token.Int(10)), SETHI(Token.R(1), Token.Int(1000)) ] symbol_table, messages = typecheck(program) assert len(messages.errors) == 1 assert "integer must be in range [-128, 256)" in messages.errors[0][0]
def test_lexer_with_multiple_single_line_comments(): lexer = lex_helper("""\ 1 // one 2 // two // no three 4 // four""") assert eq(lexer.tkn, Token(Token.INT, "1")) assert eq(lexer.next_token(), Token(Token.INT, "2")) assert eq(lexer.next_token(), Token(Token.INT, "4")) assert eq(lexer.next_token(), Token(Token.EOF, ""))
def consume_bracketed(self) -> None: self.next_char() loc = self.get_location() start = self.position while self.position < len(self.text) and self.text[self.position] != ">": self.next_char() if self.position == len(self.text): self.tkn = Token(Token.ERROR, "unclosed bracketed expression", loc) return self.tkn = Token(Token.BRACKETED, self.text[start : self.position], loc) self.next_char()
def consume_str(self) -> None: """ Advance the lexer to one past the end of a string literal starting at the current position, and set self.tkn appropriately. """ loc = self.get_location() self.next_char() s = self.consume_delimited('"') if self.position == len(self.text): self.tkn = Token(Token.ERROR, "unclosed string literal", loc) return self.next_char() self.tkn = Token(Token.STRING, s, loc)
def test_typecheck_multiple_errors(): program = [ ADD(Token.R(1), Token.Int(10)), INC(Token.R(3), Token.Int(1), Token.Int(2)), ] symbol_table, messages = typecheck(program) assert len(messages.errors) == 3 assert "ADD" in messages.errors[0][0] assert "too few" in messages.errors[0][0] assert "expected register" in messages.errors[1][0] assert "INC" in messages.errors[2][0] assert "too many" in messages.errors[2][0]
def set_token(self, typ: str, *, length=1) -> None: """ Set self.tkn to a Token object whose type is `typ` and whose value is the substring of the input of length `length` starting at the current position. The lexer's position is advanced to one past the end of the token. """ loc = self.get_location() value = self.text[self.position : self.position + length] for _ in range(length): self.next_char() self.tkn = Token(typ, value, loc)
def consume_char(self) -> None: """ Advance the lexer to one past the end of a character literal starting at the current position, and set self.tkn appropriately. """ loc = self.get_location() self.next_char() s = self.consume_delimited("'") if self.position == len(self.text): self.tkn = Token(Token.ERROR, "unclosed character literal", loc) return self.next_char() if len(s) == 1: self.tkn = Token(Token.CHAR, s, loc) elif len(s) == 2 and s[0] == "\\": self.tkn = Token(Token.CHAR, s[1], loc) else: self.tkn = Token(Token.ERROR, "over-long character literal", loc)
def test_convert_CALL(): # Simulates CALL(R12, l) where l = 1000 ops = helper("CALL(R12, 1000)") assert ops == [ SETLO(Token.R(13), Token.Int(232)), SETHI(Token.R(13), Token.Int(3)), CALL(Token.R(12), Token.R(13)), ]
def test_convert_ops_with_constant(): oplist, messages = convert_ops([SET(Token.R(1), Token.Sym("n"))], {"n": 100}) assert len(messages.errors) == 0 assert oplist == [ SETLO(Token.R(1), Token.Int(100)), SETHI(Token.R(1), Token.Int(0)), ]
def test_substitute_label_with_other_op(): program = INC(Token.R(1), Token.Sym("N")) labels = {"N": 10} assert substitute_label(program, labels) == INC(Token.R(1), Token.Int(10))
def test_get_labels_with_invalid_code(settings): labels, messages = get_labels([CALL(Token.Sym("l"))], settings) assert len(labels) == 0 assert len(messages.errors) == 0
def test_substitute_label_with_SETHI(): program = SETHI(Token.R(1), Token.Sym("N")) labels = {"N": 10} assert substitute_label(program, labels) == SETHI(Token.R(1), Token.Int(10))
def test_operation_length_of_CALL_with_label(): assert operation_length(CALL(Token.R(12), Token.Sym("l"))) == 3
def test_operation_length_of_CALL_with_register(): assert operation_length(CALL(Token.R(12), Token.R(13))) == 1
def test_operation_length_of_NOT(): assert operation_length(NOT(Token.R(5), Token.R(7))) == 3
def test_operation_lentgh_of_FLAGS(): assert operation_length(FLAGS(Token.R(3))) == 2
def test_operation_length_of_NEG(): assert operation_length(NEG(Token.R(7), Token.R(15))) == 2
def test_operation_length_of_CMP(): assert operation_length(CMP(Token.R(1), Token.R(0))) == 2
def test_operation_length_of_MOVE(): assert operation_length(MOVE(Token.R(1), Token.R(2))) == 1
def test_operation_length_of_SETRF(): assert operation_length(SETRF(Token.R(1), Token.Int(10))) == 4
def test_operation_length_of_register_branch_with_register(): assert operation_length(BNZ(Token.R(1))) == 1