def test_simple(self): ast = AST() ast.init() new = TextNode(Terminal('--[[testtest]]')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ('--[[testtest]]', "mcomment", 0, [TextNode(Terminal('--[[testtest]]'))], 0)
def test_normal_to_normal_and_multi(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal("\"abc\rdef\"")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), Terminal("\r"), Terminal("def\"")]) bos.next_term.children[0].symbol.name = "ab\"c" lexer.relex(bos.next_term) assert bos.next_term == TextNode(Terminal("ab")) assert bos.next_term.next_term == mk_multitextnode( [Terminal("\"c"), Terminal("\r"), Terminal("def\"")])
def test_simple(self): ast = AST() ast.init() new = TextNode(Terminal("asd")) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ("asd", "NAME", 1, [TextNode(Terminal("asd"))], 0)
def test_multi(self): ast = AST() ast.init() new = TextNode(Terminal('--[[test\rtest]]')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == (['--[[test', '\r', 'test]]'], "mcomment", 0, [TextNode(Terminal('--[[test\rtest]]'))], 0)
def test_simple3(self): ast = AST() ast.init() new = TextNode(Terminal('"""')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ('""', "dstring", 2, [TextNode(Terminal('"""'))], -1)
def test_normal_and_multi_to_normal_and_multi(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] n1 = TextNode(Terminal("ab")) n2 = mk_multitextnode( [Terminal("cd\"e"), Terminal("\r"), Terminal("fg\"")]) bos.insert_after(n1) n1.insert_after(n2) lexer.relex(n1) assert bos.next_term.symbol.name == "abcd" assert bos.next_term is n1 assert bos.next_term.next_term.lookup == "str" assert bos.next_term.next_term == mk_multitextnode( [Terminal("\"e"), Terminal("\r"), Terminal("fg\"")]) assert bos.next_term.next_term is n2 assert bos.next_term.next_term.next_term is eos
def test_normal_and_multi_to_multi3(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] n1 = TextNode(Terminal("\"abc\r")) n2 = mk_multitextnode( [Terminal("def"), Terminal("\r"), Terminal("gh\"")]) bos.insert_after(n1) n1.insert_after(n2) lexer.relex(n1) assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), Terminal("\r"), Terminal("def"), Terminal("\r"), Terminal("gh\"") ]) assert bos.next_term.next_term is eos
def test_lookahead(self): ast = AST() ast.init() new = TextNode(Terminal('--[[test\rtest')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ('--[[test', "scomment", 6, [TextNode(Terminal('--[[test\rtest'))], -5)
def test_simple(self): ast = AST() ast.init() new = TextNode(Terminal('"""abc"""')) ast.parent.children[0].insert_after(new) it = self.lexer.get_token_iter(new) assert it.next() == ('"""abc"""', "MLS", 0, [TextNode(Terminal('"""abc"""'))], 0)
def create_token(self, name): if name == "newline": return TextNode(IndentationTerminal("NEWLINE")) if name == "indent": return TextNode(IndentationTerminal("INDENT")) if name == "dedent": return TextNode(IndentationTerminal("DEDENT")) if name == "unbalanced": return TextNode(IndentationTerminal("UNBALANCED"))
def test_multitoken_real_lbox_relex(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("def\"")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) lexer.relex(text1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def\"")]) bos.next_term.children[0].symbol.name = "\"ab\rc" lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode([ Terminal("\"ab"), Terminal("\r"), Terminal("c"), MagicTerminal("<SQL>"), Terminal("def\"") ])
def test_triplequotes1(self): lexer = IncrementalLexer(""" "\"\"\"[^\"]*\"\"\"":triplestring "\"[^\"]*\"":string "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"\"\"")) text2 = TextNode(Terminal("abc")) text3 = TextNode(Terminal("\"\"\"")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) lexer.relex(text1) assert bos.next_term.symbol == Terminal("\"\"\"abc\"\"\"") assert bos.next_term.lookup == "triplestring" bos.next_term.symbol.name = "\"\"\"ab\"\"\"c\"\"\"" pytest.raises(LexingError, lexer.relex, bos.next_term) bos.next_term.symbol.name = "\"\"\"ab\"\"\"c\"\"" lexer.relex(bos.next_term)
def test_simple(self): bos = self.ast.parent.children[0] new = TextNode(Terminal("1+2")) bos.insert_after(new) self.lexer.relex(new) assert self.parser.inc_parse([]) == True assert self.ast.parent.symbol == Nonterminal("Root") assert isinstance(self.ast.parent.children[0], BOS) assert isinstance(self.ast.parent.children[-1], EOS) bos = self.ast.parent.children[0] root = TextNode(Nonterminal("Root")) bos = BOS(Terminal("")) eos = EOS(FinishSymbol()) Start = TextNode(Nonterminal("Startrule")) root.set_children([bos, Start, eos]) E1 = TextNode(Nonterminal("E")) Start.set_children([TextNode(N("WS")), E1]) E1.set_children(self.make_nodes([N("E"), T("+"), N("WS"), N("T")])) E2 = E1.children[0] E2.set_children(self.make_nodes([N("T")])) T1 = E2.children[0] T1.set_children(self.make_nodes([N("P")])) P1 = T1.children[0] P1.set_children(self.make_nodes([T("1"), N("WS")])) T2 = E1.children[3] T2.set_children(self.make_nodes([N("P")])) P2 = T2.children[0] P2.set_children(self.make_nodes([T("2"), N("WS")])) self.compare_trees(self.ast.parent, root)
def test_multitoken_real_lbox_cut_off_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("d\"ef\"g")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) pytest.raises(LexingError, lexer.relex, text1) assert type(bos.next_term) is MultiTextNode assert bos.next_term.children[0] is text1 assert bos.next_term.children[1] is lbox assert bos.next_term.children[2] is text2 assert text2.symbol.name == "d\"" assert bos.next_term.next_term.symbol.name == "ef" leftover = bos.next_term.next_term.next_term assert leftover.symbol.name == "\"g" leftover.symbol.name = "g" leftover.changed = True lexer.relex(leftover) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("d\"")]) assert bos.next_term.next_term.symbol.name == "efg"
def test_backwards_lexing(self): lexer = IncrementalLexer(""" "::=":doublecolon "=":equal ":":singlecolon """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal(":")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.symbol.name == ":" assert bos.next_term.lookup == "singlecolon" assert text.lookahead == 1 text2 = TextNode(Terminal(":")) text.insert_after(text2) lexer.relex(text2) assert text2.lookahead == 1 assert bos.next_term.symbol.name == ":" assert bos.next_term.next_term.symbol.name == ":" text3 = TextNode(Terminal("=")) text2.insert_after(text3) lexer.relex(text3) assert bos.next_term.symbol.name == "::=" assert isinstance(bos.next_term.next_term, EOS)
def test_multitoken_real_lbox_relex_cut_off_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("def\"")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) lexer.relex(text1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def\"")]) assert bos.next_term.lookahead == 0 bos.next_term.children[2].symbol.name = "d\"ef\"" pytest.raises(LexingError, lexer.relex, bos.next_term) bos.next_term.children[2].symbol.name = "d\"ef" lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("d\"")]) assert bos.next_term.next_term.symbol.name == "ef"
def test_lexingerror(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("1b")) bos.insert_after(new) it = self.lexer.get_token_iter(new) assert next(it) == ("1", "INT", 1, [TextNode(Terminal("1b"))], -1) with pytest.raises(LexingError): next(it)
def parse_after(self, la, split=None, maxtoks=1, maxdist=0): """Checks if la can be parsed in the current state. If la is whitespace, continue until we can parse the next non-whitespace token.""" parsed_tokens = 0 parsed_distance = 0 if split: token = self.lexer.lex(la.prev_term.symbol.name[split:]) tmpla = la la = TextNode(Terminal(token[0][1])) la.next_term = tmpla while True: lookup = get_lookup(la) element = self.syntaxtable.lookup(self.state[-1], lookup) # If we see the errornode here and the parse table action is # either Shift or Accept, then the inserted language box has fixed # the error without wrapping it inside the box if la is self.errornode and type(element) in [Shift, Accept]: self.seen_error = True if type(element) is Reduce: for i in range(element.amount()): self.state.pop() goto = self.syntaxtable.lookup(self.state[-1], element.action.left) assert goto is not None self.state.append(goto.action) continue if type(element) is Shift: # if whitespace continue if la.lookup in ws_tokens: self.state.append(element.action) self.abs_parse_distance += len(la.symbol.name) parsed_distance += len(la.symbol.name) la = la.next_term continue self.state.append(element.action) self.abs_parse_distance += len(la.symbol.name) parsed_tokens += 1 parsed_distance += len(la.symbol.name) if parsed_tokens >= maxtoks and parsed_distance >= maxdist: return True la = la.next_term continue if type(element) is Accept: return True if parsed_tokens > 0: return True return False
def relex_import(self, startnode, version = 0): """Optimised relex for freshly imported files.""" success = self.lex(startnode.symbol.name) bos = startnode.prev_term # bos parent = bos.parent eos = parent.children.pop() last_node = bos for match in success: if match is success[0]: # reuse old node for fist node to mimic the behaviour of a # normal relex node = startnode node.symbol.name = match[0] else: node = TextNode(Terminal(match[0])) node.lookup = match[1] parent.children.append(node) last_node.next_term = node last_node.right = node node.left = last_node node.prev_term = last_node node.parent = parent last_node = node node.mark_changed() parent.children.append(eos) last_node.right = eos # link to eos last_node.next_term = eos eos.left = last_node eos.prev_term = last_node bos.mark_changed() eos.mark_changed() parent.mark_changed()
def manual_relex(self, bos, pattern): """To avoid a bootstrapping loop (inclexer depends on Lexer and thus RegexParser), we need to lex the regex grammar manually""" import re pos = 0 while pos < len(pattern): for name, regex in self.lrules: r = re.match(regex, pattern[pos:]) if r: n = TextNode(Terminal(r.group(0))) n.lookup = name bos.insert_after(n) bos = n pos += len(r.group(0)) break
def test_token_iter_newline(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("'a")) new2 = TextNode(Terminal("\r")) new3 = TextNode(Terminal("b'")) bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) it = self.lexer.get_token_iter(new1) assert next(it) == (["'a", "\r", "b'"], "string", 0, [TextNode(Terminal("'a")), TextNode(Terminal("\r")), TextNode(Terminal("b'"))], 0) with pytest.raises(StopIteration): next(it)
def incparse_inc_parse_top(self): self.incparser.stack[0].indent = [0] # init bos with indent self.last_indent = [0] self.multimode = None self.multinewlines = [] self.lookahead = {} self.deleted = set() bos = self.incparser.previous_version.parent.children[0] eos = self.incparser.previous_version.parent.children[-1] d = eos.prev_term while isinstance(d.symbol, IndentationTerminal): d = d.prev_term self.last_token_before_eos = d if isinstance(d, BOS): # if file is empty, delete left over indentation tokens n = d.next_term while isinstance(n.symbol, IndentationTerminal): self.deleted.add(n) n.mark_changed() n = n.next_term # fix indentation after bos. Should result in an error for whitespace # at the beginning if bos.next_term.lookup == "<ws>": self.pseudo_insert(bos, TextNode(IndentationTerminal("INDENT"))) elif isinstance(bos.next_term.symbol, IndentationTerminal): self.deleted.add(bos.next_term) bos.next_term.mark_changed()
def test_multitoken_relex_merge(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal("\"abc\rde\rf\"")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), Terminal("\r"), Terminal("de"), Terminal("\r"), Terminal("f\"") ]) bos.next_term.children.pop(3) # remove a newline bos.next_term.update_children() child0 = bos.next_term.children[0] child1 = bos.next_term.children[1] child2 = bos.next_term.children[2] lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), Terminal("\r"), Terminal("def\"")]) assert bos.next_term.children[0] is child0 assert bos.next_term.children[1] is child1 assert bos.next_term.children[2] is child2
def test_lookahead(self): lexer = IncrementalLexer(""" "aaa":aaa "a":a "b":b """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal("baab")) bos.insert_after(text) lexer.relex(text) assert ast.parent.children[1].symbol.name == "b" assert ast.parent.children[2].symbol.name == "a" assert ast.parent.children[3].symbol.name == "a" assert ast.parent.children[4].symbol.name == "b" ast.parent.children[ 1].symbol = None # Check that lookback doesn't overreach ast.parent.children[3].symbol.name = "aa" lexer.relex(ast.parent.children[3]) assert ast.parent.children[2].symbol.name == "aaa" assert ast.parent.children[3].symbol.name == "aa" assert ast.parent.children[3].deleted is True assert ast.parent.children[4].symbol.name == "b"
def test_relex2(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("1")) bos.insert_after(new) self.relex(new) node = bos.next_term assert node.symbol == Terminal("1") new.symbol.name = "1+" self.relex(new) node = bos.next_term assert node.symbol == Terminal("1") node = node.next_term assert node.symbol == Terminal("+") node.symbol.name = "+2" self.relex(node) node = bos.next_term assert node.symbol == Terminal("1") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("2")
def test_relex(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("1 + 2 * 3")) bos.insert_after(new) self.relex(new) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("1") assert node.lookahead == 1 node = node.next_term assert node.symbol == Terminal(" ") assert node.lookahead == 1 node = node.next_term assert node.symbol == Terminal("+") assert node.lookahead == 0 node = node.next_term assert node.symbol == Terminal(" ") node = node.next_term assert node.symbol == Terminal("2") node = node.next_term assert node.symbol == Terminal(" ") node = node.next_term assert node.symbol == Terminal("*") node = node.next_term assert node.symbol == Terminal(" ") node = node.next_term assert node.symbol == Terminal("3") node = node.next_term assert isinstance(node, EOS)
def test_relex_altered_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[0-9]+":INT "\+":PLUS """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("123+\"\"")) bos.insert_after(text1) lexer.relex(text1) assert bos.next_term.symbol == Terminal("123") assert bos.next_term.lookup == "INT" assert bos.next_term.lookahead == 1 assert bos.next_term.next_term.symbol == Terminal("+") assert bos.next_term.next_term.lookup == "PLUS" assert bos.next_term.next_term.lookahead == 0 assert bos.next_term.next_term.next_term.symbol == Terminal("\"\"") assert bos.next_term.next_term.next_term.lookup == "str" assert bos.next_term.next_term.next_term.lookahead == 0 string = bos.next_term.next_term.next_term string.symbol.name = "\"abc\"" lexer.relex(string)
def relex_import(self, startnode, version = 0): """ Replace a node with the tokens of its name :param startnode: node to expand :param version: version assigned to each created node :return: """ success = self.lex(startnode.symbol.name) bos = startnode.prev_term # bos startnode.parent.remove_child(startnode) parent = bos.parent eos = parent.children.pop() last_node = bos for match in success: node = TextNode(Terminal(match[0])) node.version = version node.lookup = match[1] parent.children.append(node) last_node.next_term = node last_node.right = node node.left = last_node node.prev_term = last_node node.parent = parent last_node = node parent.children.append(eos) last_node.right = eos # link to eos last_node.next_term = eos eos.left = last_node eos.prev_term = last_node
def merge_back(self, read_nodes, generated_tokens): any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source if node.lookup != t.name or t.source.find("*/") > 0: any_changes = True node.mark_changed() else: node.mark_version() node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def init_ast(self, magic_parent=None): bos = BOS(Terminal(""), 0, []) eos = EOS(FinishSymbol(), 0, []) bos.magic_parent = magic_parent eos.magic_parent = magic_parent bos.next_term = eos eos.prev_term = bos root = TextNode(Nonterminal("Root"), 0, [bos, eos]) self.previous_version = AST(root)
def test_bug_two_newlines_delete_one(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[0-1]+":INT "\+":plus """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("1+\"abc\"")) bos.insert_after(text1) lexer.relex(text1) assert bos.next_term == TextNode(Terminal("1")) assert bos.next_term.next_term == TextNode(Terminal("+")) assert bos.next_term.next_term.next_term == TextNode( Terminal("\"abc\"")) s = bos.next_term.next_term.next_term s.symbol.name = "\"a\rb\rc\"" lexer.relex(s) assert bos.next_term.next_term.next_term == mk_multitextnode([ Terminal("\"a"), Terminal("\r"), Terminal("b"), Terminal("\r"), Terminal("c\"") ]) bos.next_term.next_term.next_term.children[3].symbol.name = "" assert bos.next_term.next_term.next_term == mk_multitextnode([ Terminal("\"a"), Terminal("\r"), Terminal("b"), Terminal(""), Terminal("c\"") ]) lexer.relex(bos.next_term.next_term.next_term) assert bos.next_term.next_term.next_term == mk_multitextnode( [Terminal("\"a"), Terminal("\r"), Terminal("bc\"")])
def test_multitoken_reuse1(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal("\"abc\rdef\"")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), Terminal("\r"), Terminal("def\"")]) assert bos.next_term.children[0] is text bos.next_term.children[ 2].symbol.name = "de\rf\"" # insert another newline child0 = bos.next_term.children[0] child1 = bos.next_term.children[1] child2 = bos.next_term.children[2] mt = bos.next_term lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), Terminal("\r"), Terminal("de"), Terminal("\r"), Terminal("f\"") ]) # test if nodes within a MultiTextNode are reused assert bos.next_term.children[0] is child0 assert bos.next_term.children[1] is child1 assert bos.next_term.children[2] is child2 child3 = bos.next_term.children[3] child4 = bos.next_term.children[4] assert child0.prev_term is None assert child0.next_term is child1 assert child1.prev_term is child0 assert child1.next_term is child2 assert child2.prev_term is child1 assert child2.next_term is child3 assert child3.prev_term is child2 assert child3.next_term is child4 assert child4.prev_term is child3 assert child4.next_term is None assert bos.next_term is mt # reused the MultiTextNode
def test_backwards_lexing(self): lexer = IncrementalLexer(""" "::=":doublecolon "=":equal ":":singlecolon """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal(":")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.symbol.name == ":" assert bos.next_term.lookup == "singlecolon" assert text.lookahead == 1 text2 = TextNode(Terminal(":")) text.insert_after(text2) lexer.relex(text2) assert text2.lookahead == 1 assert bos.next_term.symbol.name == ":" assert bos.next_term.next_term.symbol.name == ":" text3 = TextNode(Terminal("=")) text2.insert_after(text3) lexer.relex(text3) assert bos.next_term.symbol.name == "::=" assert isinstance(bos.next_term.next_term, EOS)
def relex_import(self, startnode): success = self.lex(startnode.symbol.name) bos = startnode.prev_term # bos startnode.parent.remove_child(startnode) parent = bos.parent eos = parent.children.pop() last_node = bos for match in success: node = TextNode(Terminal(match[0])) node.lookup = match[1] parent.children.append(node) last_node.next_term = node last_node.right = node node.left = last_node node.prev_term = last_node node.parent = parent last_node = node parent.children.append(eos) last_node.right = eos # link to eos last_node.next_term = eos eos.left = last_node eos.prev_term = last_node
def merge_back(self, read_nodes, generated_tokens): any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source node.indent = None if node.lookup != t.name: node.mark_changed() any_changes = True else: node.mark_version() # we need to invalidate the newline if we changed whitespace or # logical nodes that come after it if node.lookup == "<ws>" or node.lookup != t.name: prev = node.prev_term while isinstance(prev.symbol, IndentationTerminal): prev = prev.prev_term if prev.lookup == "<return>": prev.mark_changed() any_changes = True elif isinstance(prev, BOS): # if there is no return, re-indentation won't be triggered # in the incremental parser so we have to mark the next # terminal. possibly only use case: bos <ws> pass DEDENT eos node.next_term.mark_changed() # XXX this should become neccessary with incparse optimisations turned on if node.lookup == "\\" and node.next_term.lookup == "<return>": node.next_term.mark_changed() any_changes = True node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def import_file(self, text): # init self.cursor = Cursor(self.get_bos(),0,0) for p in self.parsers[1:]: del p # convert linebreaks text = text.replace("\r\n","\r") text = text.replace("\n","\r") parser = self.parsers[0][0] lexer = self.parsers[0][1] # lex text into tokens bos = parser.previous_version.parent.children[0] new = TextNode(Terminal(text)) bos.insert_after(new) root = new.get_root() lexer.relex_import(new) self.rescan_linebreaks(0) im = self.parsers[0][4] if im: im.repair_full() self.reparse(bos) self.changed = True return
def pasteText(self, text): oldpos = self.cursor.get_x() node = self.get_node_from_cursor() next_node = node.next_term if self.hasSelection(): self.deleteSelection() text = text.replace("\r\n","\r") text = text.replace("\n","\r") if self.cursor.inside(): internal_position = self.cursor.pos node.insert(text, internal_position) self.cursor.pos += len(text) else: #XXX same code as in key_normal pos = 0 if isinstance(node, BOS) or node.symbol.name == "\r" or isinstance(node.symbol, MagicTerminal): # insert new node: [bos] [newtext] [next node] old = node node = TextNode(Terminal("")) old.insert_after(node) self.cursor.pos = len(text) else: pos = len(node.symbol.name) self.cursor.pos += len(text) node.insert(text, pos) self.cursor.node = node self.relex(node) self.post_keypress("") self.reparse(node) self.cursor.fix() self.cursor.line += text.count("\r") self.changed = True
def test_stringwrapper(self): ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("abc")) text2 = TextNode(Terminal("+")) text3 = TextNode(Terminal("1")) text4 = TextNode(Terminal("*")) text5 = TextNode(Terminal("3456")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) text3.insert_after(text4) text4.insert_after(text5) wrapper = StringWrapper(text1) assert wrapper[0] == "a" assert wrapper[2] == "c" assert wrapper[3] == "+" assert wrapper[4] == "1" assert wrapper[5] == "*" assert wrapper[6] == "3" assert wrapper[9] == "6" s = "abc+1*3456" for i in range(len(s)): for j in range(len(s)): assert wrapper[i:j] == s[i:j] print(i,j,wrapper[i:j])
def test_relex3(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("1+2")) new2 = TextNode(Terminal("345")) new3 = TextNode(Terminal("6+")) new4 = TextNode(Terminal("789")) # this should never be touched new5 = TextNode(Terminal("+")) # this should never be touched bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) new4.insert_after(new5) self.relex(new1) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term; assert node.symbol == Terminal("1") node = node.next_term; assert node.symbol == Terminal("+") node = node.next_term; assert node.symbol == Terminal("23456") node = node.next_term; assert node.symbol == Terminal("+") # check that 789 hasn't been relexed assert node.next_term is new4 assert node.next_term.symbol is new4.symbol
def relex(self, node): # find farthest node that has lookahead into node # start munching tokens and spit out nodes # if generated node already exists => stop # (only if we passed edited node) # find node to start relaxing startnode = node nodes = self.find_preceeding_nodes(node) if nodes: node = nodes[0] if node is startnode: past_startnode = True else: past_startnode = False if isinstance(node, EOS): # nothing to do here return False # relex read_nodes = [] generated_tokens = [] pos = 0 read = 0 current_node = node next_token = self.lexer.get_token_iter(StringWrapper(node)) while True: token = next_token() if token.source == "": read_nodes.append(current_node) break read += len(token.source) # special case when inserting a newline into a string, the lexer # creates a single token. We need to make sure that that newline # gets lexed into its own token if len(token.source) > 1 and token.source.find("\r") >= 0: l = token.source.split("\r") for e in l: t = self.lexer.tokenize(e) generated_tokens.extend(t) if e is not l[-1]: newline = self.lexer.tokenize("\r") generated_tokens.extend(newline) else: generated_tokens.append(token) while read > pos + len(current_node.symbol.name): pos += len(current_node.symbol.name) read_nodes.append(current_node) current_node = current_node.next_term if current_node is startnode: past_startnode = True if past_startnode and read == pos + len(current_node.symbol.name): read_nodes.append(current_node) break any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source if node.lookup != t.name: any_changes = True node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def key_normal(self, text, undo_mode = True): indentation = 0 if self.hasSelection(): self.deleteSelection() edited_node = self.cursor.node if text == "\r": root = self.cursor.node.get_root() im = self.get_indentmanager(root) if im: bol = im.get_line_start(self.cursor.node) indentation = im.count_whitespace(bol) else: indentation = self.get_indentation(self.cursor.line) if indentation is None: indentation = 0 text += " " * indentation node = self.get_node_from_cursor() if node.image and not node.plain_mode: self.leave_languagebox() node = self.get_node_from_cursor() # edit node if self.cursor.inside(): internal_position = self.cursor.pos #len(node.symbol.name) - (x - self.cursor.x) node.insert(text, internal_position) else: # append to node: [node newtext] [next node] pos = 0 if str(text).startswith("\r"): newnode = TextNode(Terminal("")) node.insert_after(newnode) node = newnode self.cursor.pos = 0 elif isinstance(node, BOS) or node.symbol.name == "\r": # insert new node: [bos] [newtext] [next node] old = node if old.next_term: # skip over IndentationTerminals old = old.next_term while isinstance(old.symbol, IndentationTerminal): old = old.next_term old = old.prev_term node = TextNode(Terminal("")) old.insert_after(node) self.cursor.pos = 0 elif isinstance(node.symbol, MagicTerminal): old = node node = TextNode(Terminal("")) old.insert_after(node) self.cursor.pos = 0 else: pos = self.cursor.pos#len(node.symbol.name) node.insert(text, pos) self.cursor.node = node self.cursor.pos += len(text) need_reparse = self.relex(node) self.cursor.fix() self.fix_cursor_on_image() temp = self.cursor.node self.cursor.node = edited_node need_reparse |= self.post_keypress(text) self.cursor.node = temp self.reparse(node, need_reparse) if undo_mode: self.undomanager.add('insert', text, self.cursor.copy()) self.changed = True return indentation