def test_relex_altered_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[0-9]+":INT "\+":PLUS """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("123+\"\"")) bos.insert_after(text1) lexer.relex(text1) assert bos.next_term.symbol == Terminal("123") assert bos.next_term.lookup == "INT" assert bos.next_term.lookahead == 1 assert bos.next_term.next_term.symbol == Terminal("+") assert bos.next_term.next_term.lookup == "PLUS" assert bos.next_term.next_term.lookahead == 0 assert bos.next_term.next_term.next_term.symbol == Terminal("\"\"") assert bos.next_term.next_term.next_term.lookup == "str" assert bos.next_term.next_term.next_term.lookahead == 0 string = bos.next_term.next_term.next_term string.symbol.name = "\"abc\"" lexer.relex(string)
def test_multitoken_real_lbox_cut_off_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("d\"ef\"g")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) pytest.raises(LexingError, lexer.relex, text1) assert type(bos.next_term) is MultiTextNode assert bos.next_term.children[0] is text1 assert bos.next_term.children[1] is lbox assert bos.next_term.children[2] is text2 assert text2.symbol.name == "d\"" assert bos.next_term.next_term.symbol.name == "ef" leftover = bos.next_term.next_term.next_term assert leftover.symbol.name == "\"g" leftover.symbol.name = "g" leftover.changed = True lexer.relex(leftover) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("d\"")]) assert bos.next_term.next_term.symbol.name == "efg"
def test_relex(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("1 + 2 * 3")) bos.insert_after(new) self.relex(new) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("1") assert node.lookahead == 1 node = node.next_term assert node.symbol == Terminal(" ") assert node.lookahead == 1 node = node.next_term assert node.symbol == Terminal("+") assert node.lookahead == 0 node = node.next_term assert node.symbol == Terminal(" ") node = node.next_term assert node.symbol == Terminal("2") node = node.next_term assert node.symbol == Terminal(" ") node = node.next_term assert node.symbol == Terminal("*") node = node.next_term assert node.symbol == Terminal(" ") node = node.next_term assert node.symbol == Terminal("3") node = node.next_term assert isinstance(node, EOS)
def test_triplequotes1(self): lexer = IncrementalLexer(""" "\"\"\"[^\"]*\"\"\"":triplestring "\"[^\"]*\"":string "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"\"\"")) text2 = TextNode(Terminal("abc")) text3 = TextNode(Terminal("\"\"\"")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) lexer.relex(text1) assert bos.next_term.symbol == Terminal("\"\"\"abc\"\"\"") assert bos.next_term.lookup == "triplestring" bos.next_term.symbol.name = "\"\"\"ab\"\"\"c\"\"\"" pytest.raises(LexingError, lexer.relex, bos.next_term) bos.next_term.symbol.name = "\"\"\"ab\"\"\"c\"\"" lexer.relex(bos.next_term)
def test_multi(self): ast = AST() ast.init() new = TextNode(Terminal('--[[test\rtest]]')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == (['--[[test', '\r', 'test]]'], "mcomment", 0, [TextNode(Terminal('--[[test\rtest]]'))], 0)
def test_simple(self): bos = self.ast.parent.children[0] new = TextNode(Terminal("1+2")) bos.insert_after(new) self.lexer.relex(new) assert self.parser.inc_parse([]) == True assert self.ast.parent.symbol == Nonterminal("Root") assert isinstance(self.ast.parent.children[0], BOS) assert isinstance(self.ast.parent.children[-1], EOS) bos = self.ast.parent.children[0] root = TextNode(Nonterminal("Root")) bos = BOS(Terminal("")) eos = EOS(FinishSymbol()) Start = TextNode(Nonterminal("Startrule")) root.set_children([bos, Start, eos]) E1 = TextNode(Nonterminal("E")) Start.set_children([TextNode(N("WS")), E1]) E1.set_children(self.make_nodes([N("E"), T("+"), N("WS"), N("T")])) E2 = E1.children[0] E2.set_children(self.make_nodes([N("T")])) T1 = E2.children[0] T1.set_children(self.make_nodes([N("P")])) P1 = T1.children[0] P1.set_children(self.make_nodes([T("1"), N("WS")])) T2 = E1.children[3] T2.set_children(self.make_nodes([N("P")])) P2 = T2.children[0] P2.set_children(self.make_nodes([T("2"), N("WS")])) self.compare_trees(self.ast.parent, root)
def test_simple(self): ast = AST() ast.init() new = TextNode(Terminal('--[[testtest]]')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ('--[[testtest]]', "mcomment", 0, [TextNode(Terminal('--[[testtest]]'))], 0)
def test_lookahead(self): ast = AST() ast.init() new = TextNode(Terminal('--[[test\rtest')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ('--[[test', "scomment", 6, [TextNode(Terminal('--[[test\rtest'))], -5)
def test_simple3(self): ast = AST() ast.init() new = TextNode(Terminal('"""')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ('""', "dstring", 2, [TextNode(Terminal('"""'))], -1)
def test_simple(self): ast = AST() ast.init() new = TextNode(Terminal("asd")) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ("asd", "NAME", 1, [TextNode(Terminal("asd"))], 0)
def test_normal_and_multi_to_multi2(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] n1 = TextNode(Terminal("\"abc")) n2 = TextNode(Terminal("\r")) n3 = mk_multitextnode( [Terminal("def"), Terminal("\r"), Terminal("gh\"")]) bos.insert_after(n1) n1.insert_after(n2) n2.insert_after(n3) lexer.relex(n1) assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), Terminal("\r"), Terminal("def"), Terminal("\r"), Terminal("gh\"") ]) assert bos.next_term.next_term is eos
def test_stringwrapper(self): ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("abc")) text2 = TextNode(Terminal("+")) text3 = TextNode(Terminal("1")) text4 = TextNode(Terminal("*")) text5 = TextNode(Terminal("3456")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) text3.insert_after(text4) text4.insert_after(text5) wrapper = StringWrapper(text1) assert wrapper[0] == "a" assert wrapper[2] == "c" assert wrapper[3] == "+" assert wrapper[4] == "1" assert wrapper[5] == "*" assert wrapper[6] == "3" assert wrapper[9] == "6" s = "abc+1*3456" for i in range(len(s)): for j in range(len(s)): assert wrapper[i:j] == s[i:j] print(i, j, wrapper[i:j])
def test_backwards_lexing(self): lexer = IncrementalLexer(""" "::=":doublecolon "=":equal ":":singlecolon """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal(":")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.symbol.name == ":" assert bos.next_term.lookup == "singlecolon" assert text.lookahead == 1 text2 = TextNode(Terminal(":")) text.insert_after(text2) lexer.relex(text2) assert text2.lookahead == 1 assert bos.next_term.symbol.name == ":" assert bos.next_term.next_term.symbol.name == ":" text3 = TextNode(Terminal("=")) text2.insert_after(text3) lexer.relex(text3) assert bos.next_term.symbol.name == "::=" assert isinstance(bos.next_term.next_term, EOS)
def parse(self, tokens): self.log = [] tokens = iter(tokens) token = tokens.next() la = Terminal(token[1]) while True: self.log.append(token) elem = self.stable.lookup(self.state, la) if type(elem) is Shift: self.state = elem.action self.stack.append((la, self.state)) try: token = tokens.next() la = Terminal(token[1]) except StopIteration: la = FinishSymbol() elif type(elem) is Reduce: for i in range(elem.amount()): self.stack.pop() self.state = self.stack[-1][1] goto = self.stable.lookup(self.state, elem.action.left) assert goto != None self.state = goto.action self.stack.append((elem.action.left, self.state)) elif type(elem) is Accept: return True else: return False
def test_simple(self): ast = AST() ast.init() new = TextNode(Terminal('"""abc"""')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert it.next() == ('"""abc"""', "MLS", 0, [TextNode(Terminal('"""abc"""'))], 0)
def test_terminal(): t1 = Nonterminal("E") t2 = Nonterminal("E") assert t1 == t2 t1 = Terminal("a") t2 = Terminal("a") assert t1 == t2
def is_finished(self): states = list(self.state) if self.temp_parse(states, Terminal("NEWLINE")): element = self.syntaxtable.lookup(states[-1], FinishSymbol()) if element: return True elif self.temp_parse(states, Terminal("DEDENT")): return True return False
def get_lookup(self, la): """Get the lookup symbol of a node. If no such lookup symbol exists use the nodes symbol instead.""" if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol if isinstance(lookup_symbol, IndentationTerminal): #XXX hack: change parsing table to accept IndentationTerminals lookup_symbol = Terminal(lookup_symbol.name) return lookup_symbol
def get_lookup(la): """Get the lookup symbol of a node. If no such lookup symbol exists use the nodes symbol instead.""" if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol if isinstance(lookup_symbol, IndentationTerminal): lookup_symbol = Terminal(lookup_symbol.name) return lookup_symbol
def test_multitoken_relex_merge(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal("\"abc\rde\rf\"")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), Terminal("\r"), Terminal("de"), Terminal("\r"), Terminal("f\"") ]) bos.next_term.children.pop(3) # remove a newline bos.next_term.update_children() child0 = bos.next_term.children[0] child1 = bos.next_term.children[1] child2 = bos.next_term.children[2] lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), Terminal("\r"), Terminal("def\"")]) assert bos.next_term.children[0] is child0 assert bos.next_term.children[1] is child1 assert bos.next_term.children[2] is child2
def test_relex3(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("1+2")) new2 = TextNode(Terminal("345")) new3 = TextNode(Terminal("6+")) new4 = TextNode(Terminal("789")) # this should never be touched new4.lookup = "INT" new5 = TextNode(Terminal("+")) # this should never be touched new5.lookup = "plus" bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) new4.insert_after(new5) self.relex(new1) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("1") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("23456") node = node.next_term assert node.symbol == Terminal("+") # check that 789 hasn't been relexed assert node.next_term is new4 assert node.next_term.symbol is new4.symbol
def test_lexingerror(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("1b")) bos.insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ("1", "INT", 1, [TextNode(Terminal("1b"))], -1) with pytest.raises(LexingError): next(it)
def test_option_rule(): p = Parser(""" A ::= "a" [ "b" ] "g" """) p.parse() print(p.rules) assert p.rules[Nonterminal("A")].alternatives == [[ Terminal("a"), Nonterminal("A_option") ]] assert p.rules[Nonterminal("A_option")].alternatives == [[ Terminal("b"), Terminal("g") ], [Terminal("g")]]
def test_loop_rule(): p = Parser(""" A ::= "a" { "b" } "g" """) p.parse() print(p.rules) assert p.rules[Nonterminal("A")].alternatives == [[ Terminal("a"), Nonterminal("A_loop") ]] assert p.rules[Nonterminal("A_loop")].alternatives == [[ Terminal("b"), Nonterminal("A_loop") ], [Terminal("g")]]
def test_more_complex_grammar(): p = Parser(""" name ::= "ID" | "&" "ID" | splice | insert """) p.parse() assert p.rules[Nonterminal("name")].alternatives == [ [Terminal("ID")], [Terminal("&"), Terminal("ID")], [Nonterminal("splice")], [Nonterminal("insert")] ]
def test_group_rule(): p = Parser(""" A ::= "a" ( "b" | "c" ) "g" """) p.parse() print(p.rules) assert p.rules[Nonterminal("A")].alternatives == [[ Terminal("a"), Nonterminal("A_group1") ]] assert p.rules[Nonterminal("A_group1")].alternatives == [[ Terminal("b"), Nonterminal("A_group2") ], [Terminal("c"), Nonterminal("A_group2")]] assert p.rules[Nonterminal("A_group2")].alternatives == [[Terminal("g")]]
def test_token_iter2(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("12")) new2 = TextNode(Terminal("34")) bos.insert_after(new) new.insert_after(new2) next_token = self.lexer.lexer.get_token_iter(new).next assert next_token() == ("1234", "INT", 1, [ TextNode(Terminal("12")), TextNode(Terminal("34")) ], 0)
def test_multitoken_reuse1(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal("\"abc\rdef\"")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), Terminal("\r"), Terminal("def\"")]) assert bos.next_term.children[0] is text bos.next_term.children[ 2].symbol.name = "de\rf\"" # insert another newline child0 = bos.next_term.children[0] child1 = bos.next_term.children[1] child2 = bos.next_term.children[2] mt = bos.next_term lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), Terminal("\r"), Terminal("de"), Terminal("\r"), Terminal("f\"") ]) # test if nodes within a MultiTextNode are reused assert bos.next_term.children[0] is child0 assert bos.next_term.children[1] is child1 assert bos.next_term.children[2] is child2 child3 = bos.next_term.children[3] child4 = bos.next_term.children[4] assert child0.prev_term is None assert child0.next_term is child1 assert child1.prev_term is child0 assert child1.next_term is child2 assert child2.prev_term is child1 assert child2.next_term is child3 assert child3.prev_term is child2 assert child3.next_term is child4 assert child4.prev_term is child3 assert child4.next_term is None assert bos.next_term is mt # reused the MultiTextNode
def test_normal_to_normal_and_multi(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal("\"abc\rdef\"")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), Terminal("\r"), Terminal("def\"")]) bos.next_term.children[0].symbol.name = "ab\"c" lexer.relex(bos.next_term) assert bos.next_term == TextNode(Terminal("ab")) assert bos.next_term.next_term == mk_multitextnode( [Terminal("\"c"), Terminal("\r"), Terminal("def\"")])
def test_relex_stop(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("1+2")) old1 = TextNode(Terminal("*")) old2 = TextNode(Terminal("3")) old2.lookup = "INT" bos.insert_after(new) new.insert_after(old1) old1.insert_after(old2) self.relex(new) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("1") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("2") node = node.next_term assert node.symbol == Terminal("*") node = node.next_term assert node.symbol == Terminal("3") node = node.next_term assert isinstance(node, EOS)