def test_literal_escape_hex(self): """Test whether Lexer.lex_string() can tokenize interpreted string literal with a hexadecimal escape sequence.""" end, lit = Lexer.lex_string('"\\x00"', 0) self.assertEqual(end, 6) self.assertEqual(lit, '\0') end, lit = Lexer.lex_string('"\\xff"', 0) self.assertEqual(end, 6) self.assertEqual(lit, '\xff') tests = [ '"\\x', '"\\x" ', '"\\x0" ', '"\\xg" ', '"\\x0g"', ] for test in tests: with self.assertRaises(LexerError) as ctx: Lexer.lex_string(test, 0) self.assertEqual(ctx.exception.line, 1) self.assertEqual(ctx.exception.column, 2)
def test_literal_escape_big_u(self): """Test whether Lexer.lex_string() can tokenize interpreted string literal with a big u escape sequence.""" end, lit = Lexer.lex_string('"\\U00000000"', 0) self.assertEqual(end, 12) self.assertEqual(lit, '\0') end, lit = Lexer.lex_string('"\\U0001ffff"', 0) self.assertEqual(end, 12) self.assertEqual(lit, py3_str(u'\U0001ffff')) tests = [ '"\\U', '"\\U" ', '"\\U0" ', '"\\Ug" ', '"\\U0g" ', '"\\U00g" ', '"\\U000g" ', '"\\U000g" ', '"\\U0000g" ', '"\\U00000g" ', '"\\U000000g" ', '"\\U0000000g"', ] for test in tests: with self.assertRaises(LexerError) as ctx: Lexer.lex_string(test, 0) self.assertEqual(ctx.exception.line, 1) self.assertEqual(ctx.exception.column, 2)
def test_lex_space(self): """Test whether Lexer.lex() can lex whitespaces.""" token, end, lit = Lexer.lex(' ', 0) self.assertEqual(token, Token.SPACE) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('\t', 0) self.assertEqual(token, Token.SPACE) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('\r', 0) self.assertEqual(token, Token.SPACE) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('\n', 0) self.assertEqual(token, Token.SPACE) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('\n \r\t\n', 0) self.assertEqual(token, Token.SPACE) self.assertEqual(end, 5) self.assertEqual(lit, None)
def test_lex_string(self): """Test whether Lexer.lex() can lex a string.""" token, end, lit = Lexer.lex('"a"', 0) self.assertEqual(token, Token.STRING) self.assertEqual(end, 3) self.assertEqual(lit, 'a') token, end, lit = Lexer.lex('`a\nb`', 0) self.assertEqual(token, Token.STRING) self.assertEqual(end, 5) self.assertEqual(lit, 'a\nb')
def test_lex_char(self): """Test whether Lexer.lex() can lex a character.""" token, end, lit = Lexer.lex('(', 0) self.assertEqual(token, Token.LPAREN) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex(')', 0) self.assertEqual(token, Token.RPAREN) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('[', 0) self.assertEqual(token, Token.LBRACKET) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex(']', 0) self.assertEqual(token, Token.RBRACKET) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('{', 0) self.assertEqual(token, Token.LBRACE) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('}', 0) self.assertEqual(token, Token.RBRACE) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex(':', 0) self.assertEqual(token, Token.COLON) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('=', 0) self.assertEqual(token, Token.ASSIGN) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex('+', 0) self.assertEqual(token, Token.PLUS) self.assertEqual(end, 1) self.assertEqual(lit, None) token, end, lit = Lexer.lex(',', 0) self.assertEqual(token, Token.COMMA) self.assertEqual(end, 1) self.assertEqual(lit, None)
def test_lexer(self): """Test token, start, end, literal, and consume().""" lexer = Lexer('a b //a\n "c"', 0) self.assertEqual(lexer.start, 0) self.assertEqual(lexer.end, 1) self.assertEqual(lexer.token, Token.IDENT) self.assertEqual(lexer.literal, 'a') lexer.consume(Token.IDENT) self.assertEqual(lexer.start, 2) self.assertEqual(lexer.end, 3) self.assertEqual(lexer.token, Token.IDENT) self.assertEqual(lexer.literal, 'b') lexer.consume(Token.IDENT) self.assertEqual(lexer.start, 9) self.assertEqual(lexer.end, 12) self.assertEqual(lexer.token, Token.STRING) self.assertEqual(lexer.literal, 'c') lexer.consume(Token.STRING) self.assertEqual(lexer.start, 12) self.assertEqual(lexer.end, 12) self.assertEqual(lexer.token, Token.EOF) self.assertEqual(lexer.literal, None)
def test_lexer_offset(self): """Test the offset argument of Lexer.__init__().""" lexer = Lexer('a b', 2) self.assertEqual(lexer.start, 2) self.assertEqual(lexer.end, 3) self.assertEqual(lexer.token, Token.IDENT) self.assertEqual(lexer.literal, 'b') lexer.consume(Token.IDENT) self.assertEqual(lexer.start, 3) self.assertEqual(lexer.end, 3) self.assertEqual(lexer.token, Token.EOF) self.assertEqual(lexer.literal, None) lexer.consume(Token.EOF)
def test_list_two_elements(self): lexer = Lexer('a = ["x", "y"]') parser = Parser(lexer) parser.parse() self.assertEqual(parser.var_defs[0][0], 'a') self.assertEqual(repr(parser.var_defs[0][1]), repr(['x', 'y']))
def test_interpreted_string_literal(self): """Test whether Lexer.lex_string() can tokenize interpreted string literal.""" end, lit = Lexer.lex_string('"a"', 0) self.assertEqual(end, 3) self.assertEqual(lit, 'a') end, lit = Lexer.lex_string('"n"', 0) self.assertEqual(end, 3) self.assertEqual(lit, 'n') with self.assertRaises(LexerError) as ctx: Lexer.lex_string('"\\', 0) self.assertEqual(ctx.exception.line, 1) self.assertEqual(ctx.exception.column, 2)
def test_dict_empty(self): lexer = Lexer('a = {}') parser = Parser(lexer) parser.parse() self.assertEqual(parser.var_defs[0][0], 'a') self.assertEqual(repr(parser.var_defs[0][1]), repr({}))
def test_dict_one_element(self): lexer = Lexer('a = {x: "1"}') parser = Parser(lexer) parser.parse() self.assertEqual(parser.var_defs[0][0], 'a') self.assertEqual(repr(parser.var_defs[0][1]), '{x: \'1\'}')
def test_dict_two_elements_comma(self): lexer = Lexer('a = {x: "1", y: "2",}') parser = Parser(lexer) parser.parse() self.assertEqual(parser.var_defs[0][0], 'a') self.assertEqual(repr(parser.var_defs[0][1]), '{x: \'1\', y: \'2\'}')
def test_lex_ident(self): """Test whether Lexer.lex() can lex an identifier.""" token, end, lit = Lexer.lex('ident', 0) self.assertEqual(token, Token.IDENT) self.assertEqual(end, 5) self.assertEqual(lit, 'ident')
def test_list_empty(self): lexer = Lexer('a = []') parser = Parser(lexer) parser.parse() self.assertEqual(parser.var_defs[0][0], 'a') self.assertEqual(repr(parser.var_defs[0][1]), repr([]))
def test_assign_string(self): lexer = Lexer('a = "example"') parser = Parser(lexer) parser.parse() self.assertEqual(parser.var_defs[0][0], 'a') self.assertEqual(repr(parser.var_defs[0][1]), repr('example'))
def test_lex_assign_plus(self): """Test whether Lexer.lex() can lex `+=` without problems.""" token, end, lit = Lexer.lex('+=', 0) self.assertEqual(token, Token.ASSIGNPLUS) self.assertEqual(end, 2) self.assertEqual(lit, None)
def test_list_one_element_comma(self): lexer = Lexer('a = ["x",]') parser = Parser(lexer) parser.parse() self.assertEqual(parser.var_defs[0][0], 'a') self.assertEqual(repr(parser.var_defs[0][1]), repr(['x']))
def test_lex_offset(self): """Test the offset argument of Lexer.lex().""" token, end, lit = Lexer.lex('a "b"', 0) self.assertEqual(token, Token.IDENT) self.assertEqual(end, 1) self.assertEqual(lit, 'a') token, end, lit = Lexer.lex('a "b"', end) self.assertEqual(token, Token.SPACE) self.assertEqual(end, 2) self.assertEqual(lit, None) token, end, lit = Lexer.lex('a "b"', end) self.assertEqual(token, Token.STRING) self.assertEqual(end, 5) self.assertEqual(lit, 'b')
def test_literal_escape_octal(self): """Test whether Lexer.lex_string() can tokenize interpreted string literal with an octal escape sequence.""" end, lit = Lexer.lex_string('"\\000"', 0) self.assertEqual(end, 6) self.assertEqual(lit, '\0') end, lit = Lexer.lex_string('"\\377"', 0) self.assertEqual(end, 6) self.assertEqual(lit, '\377') tests = [ '"\\0', '"\\0" ', '"\\09" ', '"\\009"', ] for test in tests: with self.assertRaises(LexerError) as ctx: Lexer.lex_string(test, 0) self.assertEqual(ctx.exception.line, 1) self.assertEqual(ctx.exception.column, 2)
def test_raw_string_lit(self): """Test whether Lexer.lex_string() can tokenize raw string literal.""" end, lit = Lexer.lex_string('`a`', 0) self.assertEqual(end, 3) self.assertEqual(lit, 'a') end, lit = Lexer.lex_string('`a\nb`', 0) self.assertEqual(end, 5) self.assertEqual(lit, 'a\nb') end, lit = Lexer.lex_string('"a""b"', 3) self.assertEqual(end, 6) self.assertEqual(lit, 'b') with self.assertRaises(LexerError) as ctx: Lexer.lex_string('`a', 0) self.assertEqual(ctx.exception.line, 1) self.assertEqual(ctx.exception.column, 3) with self.assertRaises(LexerError) as ctx: Lexer.lex_string('"a\nb"', 0) self.assertEqual(ctx.exception.line, 1) self.assertEqual(ctx.exception.column, 3)
def test_lexer_path(self): """Test the path attribute of the Lexer object.""" lexer = Lexer('content', path='test_path') self.assertEqual(lexer.path, 'test_path')
def test_literal_escape_char(self): """Test whether Lexer.lex_string() can tokenize interpreted string literal with a escaped character.""" end, lit = Lexer.lex_string('"\\a"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\a') end, lit = Lexer.lex_string('"\\b"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\b') end, lit = Lexer.lex_string('"\\f"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\f') end, lit = Lexer.lex_string('"\\n"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\n') end, lit = Lexer.lex_string('"\\r"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\r') end, lit = Lexer.lex_string('"\\t"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\t') end, lit = Lexer.lex_string('"\\v"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\v') end, lit = Lexer.lex_string('"\\\\"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\\') end, lit = Lexer.lex_string('"\\\'"', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\'') end, lit = Lexer.lex_string('"\\\""', 0) self.assertEqual(end, 4) self.assertEqual(lit, '\"') with self.assertRaises(LexerError) as ctx: Lexer.lex_string('"\\?"', 0) self.assertEqual(ctx.exception.line, 1) self.assertEqual(ctx.exception.column, 2)
def test_lex_comment(self): """Test whether Lexer.lex() can lex comments.""" token, end, lit = Lexer.lex('// abcd', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 7) self.assertEqual(lit, None) token, end, lit = Lexer.lex('// abcd\nnext', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 7) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/*a\nb*/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 7) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/*a\n *b*/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 9) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/*a**b*/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 8) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/*a***b*/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 9) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/**/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 4) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/***/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 5) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/**a*/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 6) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/*a**/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 6) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/***a*/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 7) self.assertEqual(lit, None) token, end, lit = Lexer.lex('/*a***/', 0) self.assertEqual(token, Token.COMMENT) self.assertEqual(end, 7) self.assertEqual(lit, None)
def test_compute_line_column(self): """Test the line and column computation.""" # Line 1 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 0) self.assertEqual(line, 1) self.assertEqual(column, 1) line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 1) self.assertEqual(line, 1) self.assertEqual(column, 2) line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 2) self.assertEqual(line, 1) self.assertEqual(column, 3) # Line 2 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 3) self.assertEqual(line, 2) self.assertEqual(column, 1) line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 4) self.assertEqual(line, 2) self.assertEqual(column, 2) line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 5) self.assertEqual(line, 2) self.assertEqual(column, 3) line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 6) self.assertEqual(line, 2) self.assertEqual(column, 4) # Line 3 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 7) self.assertEqual(line, 3) self.assertEqual(column, 1) line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 8) self.assertEqual(line, 3) self.assertEqual(column, 2) line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 9) self.assertEqual(line, 3) self.assertEqual(column, 3) # Line 4 (empty line) line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 10) self.assertEqual(line, 4) self.assertEqual(column, 1)