Exemplo n.º 1
0
def closure_1(grammar, state_set):
    assert False
    result = StateSet()
    # Step 1
    for state in state_set.elements:
        result.add(state)
    # Step 2
    for state in result:
        symbol = state.next_symbol()
        if isinstance(symbol, Nonterminal):
            f = set()
            for l in state.lookahead:
                betaL = []
                betaL.extend(state.remaining_symbols())
                betaL.append(l)
                f |= old2_first(grammar, betaL)

            alternatives = grammar[symbol].alternatives
            for a in alternatives:
                # create epsilon symbol if alternative is empty
                if a == []:
                    a = [Epsilon()]
                p = Production(symbol, a)
                s = LR1Element(p, 0, f)
                if a == [epsilon]:
                    s.d = 1
                result.add(s)
    # merge states that only differ in their lookahead
    result.merge()
    return result
Exemplo n.º 2
0
 def amount(self):
     if len(self.action.right) > 0 and self.action.right[-1] == Terminal(
             "<eos>"):
         return len(self.action.right) - 1
     if self.action.right == [Epsilon()]:
         return 0
     return len(self.action.right)
Exemplo n.º 3
0
def test_closure_0():
    s1 = StateSet()
    s =  State(Production(Nonterminal("Z"), [Nonterminal("S")]), 0) # first state Z ::= .S
    s1.add(s)
    closure = helper1.closure_0(s1)
    assert len(closure.elements) == 4
    assert State(Production(Z, [S]), 0) in closure
    assert State(Production(S, [S, b]), 0) in closure
    assert State(Production(S, [b, A, a]), 0) in closure
    assert State(Production(S, [a]), 0) in closure

    s2 = StateSet()
    s =  State(Production(F, [C, D, f]), 0)
    s2.add(s)
    closure = helper1.closure_0(s2)
    assert len(closure.elements) == 4
    assert State(Production(F, [C, D, f]), 0) in closure
    assert State(Production(C, [D, A]), 0) in closure
    assert State(Production(D, [d]), 0) in closure
    assert State(Production(D, [Epsilon()]), 1) in closure

    s3 = StateSet()
    s =  State(Production(C, [D, A]), 1)
    s3.add(s)
    closure = helper1.closure_0(s3)
    assert len(closure.elements) == 4
    assert State(Production(C, [D, A]), 1) in closure
    assert State(Production(A, [a, S, c]), 0) in closure
    assert State(Production(A, [a, S, b]), 0) in closure
    assert State(Production(A, [a]), 0) in closure
Exemplo n.º 4
0
    def inc_parse(self, line_indents=[], needs_reparse=False, state=0, stack = []):
        logging.debug("============ NEW %s PARSE ================= ", "OOC" if self.ooc else "INCREMENTAL")
        logging.debug("= starting in state %s ", state)
        self.validating = False
        self.reused_nodes = set()
        self.current_state = state
        self.previous_version.parent.isolated = None
        bos = self.previous_version.parent.children[0]
        eos = self.previous_version.parent.children[-1]
        if not stack:
            self.stack = [eos]
        else:
            self.stack = stack
        eos.state = 0
        self.loopcount = 0
        self.needs_reparse = needs_reparse
        self.error_nodes = []
        self.error_pres = []
        if self.ooc:
            rmroot = self.ooc[1]
        else:
            rmroot = self.previous_version.parent
        self.rm = RecoveryManager(self.prev_version, rmroot, self.stack, self.syntaxtable)

        USE_OPT = True


        la = self.pop_lookahead(bos)
        while(True):
            logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la, la.changed, id(la), la.indent)
            self.loopcount += 1



            # Abort condition for out-of-context analysis. If we reached the state of the
            # node that is being analyses and the lookahead matches the nodes
            # lookahead from the previous parse, we are done
            if self.ooc:
                logging.debug("ooc %s %s", self.ooc, id(self.ooc))
                logging.debug("la %s", la)
                logging.debug("cs %s", self.current_state)
                if la is self.ooc[0]:
                    if isinstance(la.symbol, Nonterminal):
                        # if OOC is Nonterminal, use first terminal to apply
                        # reductions
                        first_term = la.find_first_terminal(self.prev_version)
                        lookup = self.get_lookup(first_term)
                    else:
                        lookup = self.get_lookup(la)
                    while True:
                        # OOC is complete if we reached the expected state and
                        # there are no more reductions left to do
                        if self.current_state == self.ooc[2] and len(self.stack) == 2:
                            logging.debug("======= OOC parse successfull =========")
                            self.last_status = True
                            return True
                        # Otherwise apply more reductions to reach the wanted
                        # state or an error occurs
                        element = self.syntaxtable.lookup(self.current_state, lookup)
                        if not isinstance(element, Reduce):
                            logging.debug("No more reductions")
                            break
                        else:
                            self.reduce(element)
                    logging.debug("======= OOC parse failed =========")
                    self.last_status = False
                    return False

            if isinstance(la.symbol, Terminal) or isinstance(la.symbol, FinishSymbol) or la.symbol == Epsilon():
                    lookup_symbol = self.get_lookup(la)
                    result = self.parse_terminal(la, lookup_symbol)
                    if result == "Accept":
                        logging.debug("============ INCREMENTAL PARSE END (ACCEPT) ================= ")
                        # With error recovery we can end up in the accepting
                        # state despite errors occuring during the parse.
                        if len(self.error_nodes) == 0:
                            self.last_status = True
                            return True
                        self.last_status = False
                        return False
                    elif result == "Error":
                        logging.debug("============ INCREMENTAL PARSE END (ERROR) ================= ")
                        self.last_status = False
                        return False
                    elif result != None:
                        la = result

            else: # Nonterminal
                if la.has_changes() or needs_reparse or la.has_errors() or self.iso_context_changed(la):
                    la = self.left_breakdown(la)
                else:
                    if USE_OPT:
                        goto = self.syntaxtable.lookup(self.current_state, la.symbol)
                        # Only opt-shift if the nonterminal has children to
                        # avoid a bug in the retainability algorithm. See
                        # test/test_eco.py::Test_RetainSubtree::test_bug1
                        if goto and la.children: # can we shift this Nonterminal in the current state?
                            logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto)
                            follow_id = goto.action
                            self.stack.append(la)
                            la.deleted = False
                            la.state = follow_id #XXX this fixed goto error (I should think about storing the states on the stack instead of inside the elements)
                            la.exists = True
                            self.current_state = follow_id
                            logging.debug("USE_OPT: set state to %s", self.current_state)
                            if la.isolated:
                                # When skipping previously isolated subtrees,
                                # traverse their children to find the error
                                # nodes and report them back to the editor.
                                self.find_nested_error(la)
                            la = self.pop_lookahead(la)
                            self.validating = True
                            continue
                        else:
                            #XXX can be made faster by providing more information in syntax tables
                            first_term = la.find_first_terminal(self.prev_version)

                            lookup_symbol = self.get_lookup(first_term)
                            element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
                            if isinstance(element, Reduce):
                                logging.debug("OPT Reduce: %s", element)
                                self.reduce(element)
                            else:
                                la = self.left_breakdown(la)
                    else:
                        # PARSER WITHOUT OPTIMISATION
                        if la.lookup != "":
                            lookup_symbol = Terminal(la.lookup)
                        else:
                            lookup_symbol = la.symbol
                        element = self.syntaxtable.lookup(self.current_state, lookup_symbol)

                        if self.shiftable(la):
                            logging.debug("\x1b[37mis shiftable\x1b[0m")
                            self.stack.append(la)
                            self.current_state = la.state
                            self.right_breakdown()
                            la = self.pop_lookahead(la)
                        else:
                            la = self.left_breakdown(la)
Exemplo n.º 5
0
        |
"""

p = Parser(grammar)
p.parse()
r = p.rules

b = Terminal("b")
c = Terminal("c")
d = Terminal("d")
S = Nonterminal("S")
A = Nonterminal("A")

S_bAd = Production(S, [b, A, d])
A_c = Production(A, [c])
A_None = Production(A, [Epsilon()])

syntaxtable = {
    (0, b): Shift(2),
    (0, S): Goto(1),
    (1, FinishSymbol()): Accept(),
    (2, c): Shift(4),
    (2, A): Goto(3),
    (2, d): Reduce(A_None),
    (3, d): Shift(5),
    (4, d): Reduce(A_c),
    (5, FinishSymbol()): Reduce(S_bAd),
}


def test_build():
Exemplo n.º 6
0
 def closure_1(self, state_set):
     la_dict = {}
     result = set()
     working_set = set()
     # Step 1
     for element in state_set.elements:
         la_dict[element] = state_set.get_lookahead(element)
         result.add(element)
         working_set.add(element)
     # Step 2
     i = 0
     temp = working_set
     while 1:
         newelements = set()
         for state in temp:
             if state.isfinal():
                 continue
             symbol = state.next_symbol()
             if isinstance(symbol, Nonterminal):
                 f = set()
                 for l in la_dict[state]:
                     betaL = []
                     betaL.extend(state.remaining_symbols())
                     betaL.append(l)
                     f |= self.first(betaL)
                 alternatives = self.grammar[symbol].alternatives
                 i = -1
                 for a in alternatives:
                     i = i + 1
                     # create epsilon symbol if alternative is empty
                     if a == []:
                         a = [Epsilon()]
                     p = Production(symbol, a,
                                    self.grammar[symbol].annotations[i],
                                    self.grammar[symbol].precs[i])
                     if i in self.grammar[symbol].inserts:
                         insert = self.grammar[symbol].inserts[i]
                         p.inserts[insert[0]] = insert[1]
                     s = LR0Element(p, 0)
                     if a == [epsilon]:
                         s.d = 1
                     # NEW ELEMENT:
                     # 1. completely new (+lookahead): add to result
                     # 2. new lookahead: update lookahead in la_dict
                     # -> add to new working set
                     # 3. already known: ignore
                     if s in result:
                         if f.issubset(
                                 la_dict[s]
                         ):  # lookahead in combination with state already known
                             continue
                         else:
                             la_dict[s] |= f  # new lookahead
                     else:
                         la_dict[s] = set(f)  # completely new
                     result.add(s)
                     newelements.add(s)
         temp = newelements
         if len(temp) == 0:
             break
         i += 1
     # add lookaheads
     final_result = StateSet()
     for element in result:
         final_result.add(element, la_dict[element])
     return final_result
Exemplo n.º 7
0
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

from .state import State, StateSet, LR0Element, LR1Element
from .production import Production
from grammar_parser.gparser import Terminal, Nonterminal, Epsilon
from .syntaxtable import FinishSymbol

epsilon = Epsilon()


def noprint(*args, **kwargs):
    pass


class Helper(object):
    def __init__(self, grammar):
        self.grammar = grammar
        self.closure_time = 0
        self.first_dict = {}
        self.follow_dict = {}
        self.calculate_first()
        self.calculate_follow()
        self.goto_count = {}
Exemplo n.º 8
0
    def inc_parse(self, line_indents=[], reparse=False):
        logging.debug("============ NEW INCREMENTAL PARSE ================= ")
        self.validating = False
        self.error_node = None
        self.stack = []
        self.undo = []
        self.current_state = 0
        self.stack.append(Node(FinishSymbol(), 0, []))
        bos = self.previous_version.parent.children[0]
        self.loopcount = 0

        USE_OPT = True

        self.pm.do_incparse_inc_parse_top()

        la = self.pop_lookahead(bos)
        while (True):
            logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la,
                          la.changed, id(la), la.indent)
            self.loopcount += 1
            if isinstance(la.symbol, Terminal) or isinstance(
                    la.symbol, FinishSymbol) or la.symbol == Epsilon():
                if la.changed:
                    assert False  # with prelexing you should never end up here!
                else:
                    lookup_symbol = self.get_lookup(la)
                    result = self.parse_terminal(la, lookup_symbol)
                    if result == "Accept":
                        self.last_status = True
                        return True
                    elif result == "Error":
                        self.last_status = False
                        return False
                    elif result != None:
                        la = result

            else:  # Nonterminal
                if la.changed or reparse:
                    # deconstruct the
                    #la.changed = False # as all nonterminals that have changed are being rebuild, there is no need to change this flag (this also solves problems with comments)
                    self.undo.append((la, 'changed', True))
                    la = self.left_breakdown(la)
                else:
                    if USE_OPT:
                        #Follow parsing/syntax table
                        goto = self.syntaxtable.lookup(self.current_state,
                                                       la.symbol)
                        if goto:  # can we shift this Nonterminal in the current state?
                            logging.debug("OPTShift: %s in state %s -> %s",
                                          la.symbol, self.current_state, goto)
                            self.pm.do_incparse_optshift(la)
                            follow_id = goto.action
                            self.stack.append(la)
                            la.state = follow_id  #XXX this fixed goto error (I should think about storing the states on the stack instead of inside the elements)
                            self.current_state = follow_id
                            logging.debug("USE_OPT: set state to %s",
                                          self.current_state)
                            la = self.pop_lookahead(la)
                            self.validating = True
                            continue
                        else:
                            #XXX can be made faster by providing more information in syntax tables
                            first_term = la.find_first_terminal()

                            lookup_symbol = self.get_lookup(first_term)
                            element = self.syntaxtable.lookup(
                                self.current_state, lookup_symbol)
                            if isinstance(element, Reduce):
                                self.reduce(element)
                            else:
                                la = self.left_breakdown(la)
                    else:
                        # PARSER WITHOUT OPTIMISATION
                        if la.lookup != "":
                            lookup_symbol = Terminal(la.lookup)
                        else:
                            lookup_symbol = la.symbol
                        element = self.syntaxtable.lookup(
                            self.current_state, lookup_symbol)

                        if self.shiftable(la):
                            logging.debug("\x1b[37mis shiftable\x1b[0m")
                            self.stack.append(la)
                            self.current_state = la.state
                            self.right_breakdown()
                            la = self.pop_lookahead(la)
                        else:
                            la = self.left_breakdown(la)
        logging.debug("============ INCREMENTAL PARSE END ================= ")