Пример #1
0
    def test_token_iter_newline_lbox(self):
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        new1 = TextNode(Terminal("'a"))
        new2 = TextNode(Terminal("\r"))
        new3 = TextNode(Terminal("b"))
        new4 = TextNode(MagicTerminal("<SQL>"))
        new5 = TextNode(Terminal("c'"))
        bos.insert_after(new1)
        new1.insert_after(new2)
        new2.insert_after(new3)
        new3.insert_after(new4)
        new4.insert_after(new5)

        it = self.lexer.get_token_iter(new1)
        assert it.next() == (["'a", "\r", "b", lbph, "c'"], "string", 1, [
            TextNode(Terminal("'a")),
            TextNode(Terminal("\r")),
            TextNode(Terminal("b")),
            TextNode(MagicTerminal("<SQL>")),
            TextNode(Terminal("c'"))
        ])
        with pytest.raises(StopIteration):
            it.next()
Пример #2
0
    def test_multitoken_real_lbox_relex(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("\"abc"))
        lbox = TextNode(MagicTerminal("<SQL>"))
        text2 = TextNode(Terminal("def\""))
        bos.insert_after(text1)
        text1.insert_after(lbox)
        lbox.insert_after(text2)
        lexer.relex(text1)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             MagicTerminal("<SQL>"),
             Terminal("def\"")])

        bos.next_term.children[0].symbol.name = "\"ab\rc"
        lexer.relex(bos.next_term)

        assert bos.next_term == mk_multitextnode([
            Terminal("\"ab"),
            Terminal("\r"),
            Terminal("c"),
            MagicTerminal("<SQL>"),
            Terminal("def\"")
        ])
Пример #3
0
    def test_multitoken_real_lbox_cut_off_string(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
"[a-z]+":var
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("\"abc"))
        lbox = TextNode(MagicTerminal("<SQL>"))
        text2 = TextNode(Terminal("d\"ef\"g"))
        bos.insert_after(text1)
        text1.insert_after(lbox)
        lbox.insert_after(text2)
        pytest.raises(LexingError, lexer.relex, text1)
        assert type(bos.next_term) is MultiTextNode
        assert bos.next_term.children[0] is text1
        assert bos.next_term.children[1] is lbox
        assert bos.next_term.children[2] is text2
        assert text2.symbol.name == "d\""
        assert bos.next_term.next_term.symbol.name == "ef"
        leftover = bos.next_term.next_term.next_term
        assert leftover.symbol.name == "\"g"

        leftover.symbol.name = "g"
        leftover.changed = True
        lexer.relex(leftover)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             MagicTerminal("<SQL>"),
             Terminal("d\"")])
        assert bos.next_term.next_term.symbol.name == "efg"
Пример #4
0
    def test_multitoken_real_lbox_relex_cut_off_string(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
"[a-z]+":var
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("\"abc"))
        lbox = TextNode(MagicTerminal("<SQL>"))
        text2 = TextNode(Terminal("def\""))
        bos.insert_after(text1)
        text1.insert_after(lbox)
        lbox.insert_after(text2)
        lexer.relex(text1)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             MagicTerminal("<SQL>"),
             Terminal("def\"")])
        assert bos.next_term.lookahead == 0

        bos.next_term.children[2].symbol.name = "d\"ef\""
        pytest.raises(LexingError, lexer.relex, bos.next_term)

        bos.next_term.children[2].symbol.name = "d\"ef"
        lexer.relex(bos.next_term)

        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             MagicTerminal("<SQL>"),
             Terminal("d\"")])
        assert bos.next_term.next_term.symbol.name == "ef"
Пример #5
0
    def test_multitoken_real_lbox_multiple(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        n1 = TextNode(Terminal("\"abc"))
        n2 = TextNode(MagicTerminal("<SQL>"))
        n3 = TextNode(Terminal("def"))
        n4 = TextNode(MagicTerminal("<Calc>"))
        n5 = TextNode(Terminal("ghi\""))
        bos.insert_after(n1)
        n1.insert_after(n2)
        n2.insert_after(n3)
        n3.insert_after(n4)
        n4.insert_after(n5)
        lexer.relex(n1)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode([
            Terminal("\"abc"),
            MagicTerminal("<SQL>"),
            Terminal("def"),
            MagicTerminal("<Calc>"),
            Terminal("ghi\"")
        ])
 def heuristic_line(self, errornode):
     valid = []
     pv = self.op.prev_version
     for sub in self.langs:
         lbox = MagicTerminal("<{}>".format(sub))
         node = errornode.prev_term
         while True:
             element = self.op.syntaxtable.lookup(node.state, lbox)
             if type(element) in [Reduce, Shift]:
                 r = self.langs[sub]
                 r.mode_limit_tokens_new = self.mode_limit_tokens_new
                 start = node.next_term
                 result = r.parse(start)
                 if r.possible_ends:
                     for e, enddist, split in r.possible_ends:
                         if e.lookup == "<ws>" or e.lookup == "<return>":
                             continue
                         if self.contains_errornode(start, e, errornode):
                             if self.parse_after_lbox_h2(
                                     lbox, e, start, pv, split):
                                 total_distance = self.abs_parse_distance + enddist
                                 valid.append(
                                     (start, e, sub, total_distance, split))
             if node.lookup == "<return>" or type(node) is BOS:
                 break
             node = node.prev_term
     return valid
 def heuristic_history(self, errornode):
     valid = []
     ws = ["<ws>", "<return>"]
     searched = set()
     pv = self.op.prev_version
     for sub in self.langs:
         lbox = MagicTerminal("<{}>".format(sub))
         parent = errornode.parent
         while parent is not None:
             if parent.get_attr("parent", pv) is None:  # Root
                 # If we've reached the root, try inserting the box after
                 # BOS, i.e. the beginning of the file
                 left = parent.get_attr("children", pv)[0]  # bos
             else:
                 left = parent.get_attr("left", pv)
             while left and type(left.symbol) is Nonterminal and len(
                     left.get_attr("children", pv)) == 0:
                 # If left is an empty nonterminal, keep going left until we
                 # find a non-empty nonterminal or a terminal
                 left = left.get_attr("left", pv)
             if left:
                 state = left.state
                 element = self.op.syntaxtable.lookup(state, lbox)
                 if type(element) in [Reduce, Shift]:
                     term = self.find_terminal(left, pv)
                     if term and term not in searched:
                         tleft = term.prev_term  # left's most right terminal
                         if type(term) is EOS:
                             parent = parent.get_attr("parent", pv)
                             continue
                         while term and term.lookup in ws:
                             # skip whitespace
                             term = term.next_term
                         element = self.op.syntaxtable.lookup(
                             tleft.state, lbox)
                         if type(element) not in [Reduce, Shift]:
                             # Usually if `lbox` can be shifted after `left`
                             # this means it should also be shiftable after
                             # `left`'s most right terminal. However, that
                             # terminal might have changed and caused an error
                             # which was isolated, which means that `lbox` isn't
                             # valid after all.
                             parent = parent.get_attr("parent", pv)
                             continue
                         r = self.langs[sub]
                         r.mode_limit_tokens_new = self.mode_limit_tokens_new
                         result = r.parse(term)
                         if r.possible_ends:
                             for e, enddist, split in r.possible_ends:
                                 if e.lookup in ws:
                                     continue
                                 if (self.contains_errornode(term, e, errornode) \
                                         and self.parse_after_lbox_h2(lbox, e, parent, pv, split)):
                                     total_distance = self.abs_parse_distance + enddist
                                     valid.append((term, e, sub,
                                                   total_distance, split))
                                     searched.add(term)
             parent = parent.get_attr("parent", pv)
     return valid
Пример #8
0
    def test_multitoken_relex_to_normal(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
"[0-9]+":INT
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = mk_multitextnode([Terminal("123"), MagicTerminal("<SQL>")])
        bos.insert_after(text1)
        lexer.relex(text1)
        assert bos.next_term.lookup == "INT"
        assert bos.next_term.symbol == Terminal("123")
        assert bos.next_term.lookahead == 1
        assert bos.next_term.next_term.symbol == MagicTerminal("<SQL>")
Пример #9
0
    def test_token_iter_lbox(self):
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        new = TextNode(Terminal("12"))
        new2 = TextNode(MagicTerminal("<SQL>"))
        new3 = TextNode(Terminal("34"))
        bos.insert_after(new)
        new.insert_after(new2)
        new2.insert_after(new3)

        it = self.lexer.get_token_iter(new)
        assert next(it) == ("12", "INT", 1, [TextNode(Terminal("12"))], 0)
        assert next(it) == (lbph, "", 0, [TextNode(MagicTerminal("<SQL>"))], 0)
        assert next(it) == ("34", "INT", 1, [TextNode(Terminal("34"))], 0)
        with pytest.raises(Exception):
            next(it)
Пример #10
0
    def test_token_iter_lbox(self):
        lexer = IncrementalLexer("""
"[0-9]+":INT
        """)
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        new = TextNode(Terminal("12"))
        new2 = TextNode(MagicTerminal("<SQL>"))
        new3 = TextNode(Terminal("34"))
        bos.insert_after(new)
        new.insert_after(new2)
        new2.insert_after(new3)

        next_token = lexer.lexer.get_token_iter(new).next
        assert next_token() == ("12", "INT", 1, [TextNode(Terminal("12"))], 0)
        assert next_token() == (lbph, "", 0,
                                [TextNode(MagicTerminal("<SQL>"))], 0)
        assert next_token() == ("34", "INT", 1, [TextNode(Terminal("34"))], 0)
Пример #11
0
    def test_token_iter_lbox_multi(self):
        lexer = IncrementalLexer("""
"[0-9]+":INT
"\"[^\"]*\"":STRING
        """)
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        new = TextNode(Terminal("\"abc"))
        new2 = TextNode(MagicTerminal("<SQL>"))
        new3 = TextNode(Terminal("def\""))
        bos.insert_after(new)
        new.insert_after(new2)
        new2.insert_after(new3)

        next_token = lexer.lexer.get_token_iter(new).next
        assert next_token() == (["\"abc", lbph, "def\""], "STRING", 0, [
            TextNode(Terminal("\"abc")),
            TextNode(MagicTerminal("<SQL>")),
            TextNode(Terminal("def\""))
        ], 0)
Пример #12
0
 def parse_symbol(self, node):
     node = node.children[0]
     if node.lookup == "nonterminal":
         return Nonterminal(node.symbol.name)
     elif node.lookup == "terminal":
         if node.symbol.name != "\"<eos>\"":
             self.terminals.add(node.symbol.name[1:-1])
         return Terminal(node.symbol.name[1:-1])
     elif node.lookup == "languagebox":
         return MagicTerminal(node.symbol.name)
     elif node.symbol.name == "function":
         return self.parse_function(node)
Пример #13
0
    def test_relex_altered_comment(self):
        lexer = IncrementalLexer("""
"#[^\r]*":comment
"[0-9]+":INT
"\+":PLUS
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("1+"))
        text2 = TextNode(Terminal("#abc"))
        text3 = TextNode(MagicTerminal("<SQL>"))
        bos.insert_after(text1)
        text1.insert_after(text2)
        text2.insert_after(text3)
        lexer.relex(text1)
        assert bos.next_term.symbol == Terminal("1")
        assert bos.next_term.next_term.symbol == Terminal("+")
        assert bos.next_term.next_term.next_term == mk_multitextnode(
            [Terminal("#abc"), MagicTerminal("<SQL>")])
Пример #14
0
    def test_lexer_returns_nodes(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("\"abc"))
        lbox = TextNode(MagicTerminal("<SQL>"))
        text2 = TextNode(Terminal("def\""))
        bos.insert_after(text1)
        text1.insert_after(lbox)
        lbox.insert_after(text2)
        lexer.relex(text1)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             MagicTerminal("<SQL>"),
             Terminal("def\"")])
        assert bos.next_term.lookahead == 0
Пример #15
0
    def test_token_iter_lbox2(self):
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        new = TextNode(Terminal("12"))
        new2 = TextNode(Terminal("'string with"))
        new3 = TextNode(MagicTerminal("<SQL>"))
        new4 = TextNode(Terminal("inside'"))
        bos.insert_after(new)
        new.insert_after(new2)
        new2.insert_after(new3)
        new3.insert_after(new4)

        it = self.lexer.get_token_iter(new)
        assert it.next() == ("12", "INT", 1, [TextNode(Terminal("12"))])
        assert it.next() == (["'string with", lbph, "inside'"], "string", 1, [
            TextNode(Terminal("'string with")),
            TextNode(MagicTerminal("<SQL>")),
            TextNode(Terminal("inside'"))
        ])
        with pytest.raises(StopIteration):
            it.next()
Пример #16
0
 def parse_rule(self, node):
     name = node.children[0].symbol.name
     self.current_rulename = name
     alternatives = self.parse_alternatives(node.children[4])
     symbol = Nonterminal(name)
     if self.start_symbol is None:
         self.start_symbol = symbol
     if self.change_startrule and symbol.name == self.change_startrule:
         self.start_symbol = symbol
     r = Rule(symbol)
     for a in alternatives:
         r.add_alternative(a[0], a[1], a[2])
     # add additional alternatives to the grammar (grammar extension feature, e.g. languageboxes)
     if self.extra_alternatives.has_key(symbol.name):
         for n in self.extra_alternatives[symbol.name]:
             r.add_alternative([MagicTerminal(n), Nonterminal("WS")], None)
     self.rules[symbol] = r
 def heuristic_stack(self, errornode):
     # Find position on stack where lbox would be valid
     valid = []
     for sub in self.langs:
         lbox = MagicTerminal("<{}>".format(sub))
         cut = len(self.op.stack) - 1
         while cut >= 0:
             top = self.op.stack[cut]
             if isinstance(top, EOS):
                 top = top.parent.children[0]  # bos
                 state = 0
             else:
                 state = self.op.stack[cut].state
             # get all possible sublangs
             element = self.op.syntaxtable.lookup(state, lbox)
             if type(element) in [Reduce, Shift]:
                 term = self.find_terminal(top)
                 if type(term) is EOS:
                     cut = cut - 1
                     continue
                 if term:
                     n = term
                     # See if we can get a valid language box using the Recogniser
                     r = self.langs[sub]
                     r.mode_limit_tokens_new = self.mode_limit_tokens_new
                     result = r.parse(n)
                     if r.possible_ends:
                         # Filter results and test if remaining file can be
                         # parsed after shifting the language box
                         for e, enddist, split in r.possible_ends:
                             if e.lookup == "<ws>" or e.lookup == "<return>":
                                 continue
                             if (self.contains_errornode(n, e, errornode) \
                                 and self.parse_after_lbox_h1(lbox, e, cut, split=split, distance=10)) \
                                 or self.parse_after_lbox_h1(lbox, e, cut, errornode):
                                 # Either the error was solved by
                                 # moving it into the box or a box
                                 # was created before it, allowing
                                 # the error to be shifted
                                 total_distance = self.abs_parse_distance + enddist
                                 valid.append(
                                     (n, e, sub, total_distance, split))
             cut = cut - 1
     return valid
Пример #18
0
 def heuristic_line(self, errornode):
     valid = []
     for sub in self.langs:
         lbox = MagicTerminal("<{}>".format(sub))
         node = errornode.prev_term
         while True:
             element = self.op.syntaxtable.lookup(node.state, lbox)
             if type(element) in [Reduce, Shift]:
                 r = self.langs[sub]
                 r.mode_limit_tokens_new = self.mode_limit_tokens_new
                 start = node.next_term
                 result = r.parse(start)
                 if r.possible_ends:
                     for e, enddist, split in r.possible_ends:
                         if e.lookup == "<ws>" or e.lookup == "<return>":
                             continue
                         valid.append((start, e, sub, enddist, split, lbox,
                                       errornode))
             if node.lookup == "<return>" or type(
                     node) is BOS or node.ismultinode():
                 break
             node = node.prev_term
     return valid