Exemplo n.º 1
0
    def __init__(self, start_symbol, grammar, lr_type=0):
        self.grammar = grammar
        self.start_symbol = start_symbol
        self.state_sets = []
        self.edges = {}
        self.ids = {}
        self.todo = []
        self.done = set()
        self.maybe_compatible = {}

        self.goto_time = 0
        self.add_time = 0
        self.closure_time = 0
        self.closure_count = 0
        self.addcount = 0
        self.weakly = 0
        self.weakly_count = 0
        self.mergetime = 0

        helper = Helper(grammar)
        self.helper = helper
        if lr_type == LR0:
            self.closure = helper.closure_0
            self.goto = helper.goto_0
            self.start_set = StateSet(
                [LR0Element(Production(None, [self.start_symbol]), 0)])
        elif lr_type == LR1 or lr_type == LALR:
            self.closure = helper.closure_1
            self.goto = helper.goto_1
            self.start_set = StateSet()
            self.start_set.add(
                LR0Element(Production(None, [self.start_symbol]), 0),
                set([FinishSymbol()]))
Exemplo n.º 2
0
def closure_1(grammar, state_set):
    assert False
    result = StateSet()
    # Step 1
    for state in state_set.elements:
        result.add(state)
    # Step 2
    for state in result:
        symbol = state.next_symbol()
        if isinstance(symbol, Nonterminal):
            f = set()
            for l in state.lookahead:
                betaL = []
                betaL.extend(state.remaining_symbols())
                betaL.append(l)
                f |= old2_first(grammar, betaL)

            alternatives = grammar[symbol].alternatives
            for a in alternatives:
                # create epsilon symbol if alternative is empty
                if a == []:
                    a = [Epsilon()]
                p = Production(symbol, a)
                s = LR1Element(p, 0, f)
                if a == [epsilon]:
                    s.d = 1
                result.add(s)
    # merge states that only differ in their lookahead
    result.merge()
    return result
Exemplo n.º 3
0
 def closure_0(self, state_set):
     result = set()
     # 1) Add state_set to it's own closure
     for element in state_set.elements:
         result.add(element)
     # 2) If there exists an LR-element with a Nonterminal as its next symbol
     #    add all production with this symbol on the left side to the closure
     temp = result
     while 1:
         newelements = set()
         # closure of temp
         for state in temp:
             symbol = state.next_symbol()
             if isinstance(symbol, Nonterminal):
                 alternatives = self.grammar[symbol].alternatives
                 for a in alternatives:
                     # create epsilon symbol if alternative is empty
                     if a == []:
                         a = [epsilon]
                     p = Production(symbol, a)
                     s = State(p, 0)
                     if a == [epsilon]:
                         s.d = 1
                     newelements.add(s)
         # add new elements to result
         temp = newelements.difference(
             result)  # remove elements already in result
         result.update(temp)
         if len(temp) == 0:  # no new elements were added
             break
     return StateSet(result)
Exemplo n.º 4
0
def goto_1(grammar, state_set, symbol):
    result = StateSet()
    for state in state_set:
        s = state.next_symbol()
        if s == symbol:
            new_state = state.clone()
            new_state.d += 1
            result.add(new_state)
    return closure_1(grammar, result)
Exemplo n.º 5
0
 def goto_0(self, state_set, symbol):
     result = StateSet()
     for state in state_set.elements:
         s = state.next_symbol()
         if s == symbol:
             new_state = state.clone()
             new_state.d += 1
             result.add(new_state)
     return self.closure_0(result)
Exemplo n.º 6
0
 def goto_1(self, state_set, symbol):
     try:
         self.goto_count[(id(state_set), symbol)] += 1
     except KeyError:
         self.goto_count[(id(state_set), symbol)] = 1
     print("goto", state_set, symbol,
           self.goto_count[(id(state_set), symbol)])
     result = StateSet()
     for state in state_set:
         s = state.next_symbol()
         if s == symbol:
             new_state = state.clone()
             new_state.d += 1
             result.add(new_state)
     print("goto END")
     return self.closure_1(result)
Exemplo n.º 7
0
def closure_0(grammar, state_set):
    result = StateSet()
    # 1) Add state_set to it's own closure
    for state in state_set.elements:
        result.add(state)
    # 2) If there exists an LR-element with a Nonterminal as its next symbol
    #    add all production with this symbol on the left side to the closure
    for state in result:
        symbol = state.next_symbol()
        if isinstance(symbol, Nonterminal):
            alternatives = grammar[symbol].alternatives
            for a in alternatives:
                # create epsilon symbol if alternative is empty
                if a == []:
                    a = [epsilon]
                p = Production(symbol, a)
                s = State(p, 0)
                if a == [epsilon]:
                    s.d = 1
                result.add(s)
    return result
Exemplo n.º 8
0
    def build(self):
        State._hashtime = 0
        start = time()
        start_set = self.start_set
        closure = start_set
        self.state_sets.append(closure)
        self.ids[closure] = 0
        _id = 0
        self.todo.append(_id)
        while self.todo:
            self.addcount += 1
            _id = self.todo.pop()
            self.done.add(_id)
            closure_start = time()
            state_set = self.closure(self.state_sets[_id])
            self.closure_count += 1
            closure_end = time()
            self.closure_time += closure_end - closure_start
            new_gotos = {}
            goto_start = time()
            # create new sets first, then calculate closure
            for lrelement in state_set.elements:
                symbol = lrelement.next_symbol()
                if not symbol:  # state is final
                    continue
                #XXX optimisation: create all configurations before building
                new_element = lrelement.clone()
                new_element.d += 1
                new_element_la = state_set.get_lookahead(lrelement)
                stateset = new_gotos.setdefault(symbol, StateSet())
                stateset.add(new_element, new_element_la)

            # now calculate closure and add result to state_sets
            goto_end = time()
            self.goto_time += goto_end - goto_start

            for ss in new_gotos:
                new_state_set = new_gotos[ss]
                add_start = time()
                self.add(_id, ss, new_state_set)
                add_end = time()
                self.add_time += add_end - add_start

        end = time()
        logging.info("add time %s", self.add_time)
        logging.info("closure time %s", self.closure_time)
        logging.info("closure time helper %s", self.helper.closure_time)
        logging.info("goto time %s", self.goto_time)
        logging.info("hashtime %s", StateSet._hashtime)
        logging.info("addcount %s", self.addcount)
        logging.info("states %s", len(self.state_sets))
        logging.info("weakly %s", self.weakly)
        logging.info("weakly count %s", self.weakly_count)
        logging.info("mergetime %s", self.mergetime)

        # apply closure
        logging.info("Apply closure to states")
        clstart = time()
        new_state_sets = []
        new_ids = {}
        for state in self.state_sets:
            _id = self.ids[state]
            new_state = self.closure(state)
            new_state_sets.append(new_state)
            new_ids[new_state] = new_state
        self.state_sets = new_state_sets
        logging.info("after closure %s", len(new_state_sets))
        logging.info("edges %s", len(set(self.edges.values())))
        self.ids = new_ids
        logging.info(time() - clstart)

        logging.info("Finished building Stategraph in %s", end - start)
        self.closure = None
        self.goto = None
Exemplo n.º 9
0
 def closure_1(self, state_set):
     la_dict = {}
     result = set()
     working_set = set()
     # Step 1
     for element in state_set.elements:
         la_dict[element] = state_set.get_lookahead(element)
         result.add(element)
         working_set.add(element)
     # Step 2
     i = 0
     temp = working_set
     while 1:
         newelements = set()
         for state in temp:
             if state.isfinal():
                 continue
             symbol = state.next_symbol()
             if isinstance(symbol, Nonterminal):
                 f = set()
                 for l in la_dict[state]:
                     betaL = []
                     betaL.extend(state.remaining_symbols())
                     betaL.append(l)
                     f |= self.first(betaL)
                 alternatives = self.grammar[symbol].alternatives
                 i = -1
                 for a in alternatives:
                     i = i + 1
                     # create epsilon symbol if alternative is empty
                     if a == []:
                         a = [Epsilon()]
                     p = Production(symbol, a,
                                    self.grammar[symbol].annotations[i],
                                    self.grammar[symbol].precs[i])
                     if self.grammar[symbol].inserts.has_key(i):
                         insert = self.grammar[symbol].inserts[i]
                         p.inserts[insert[0]] = insert[1]
                     s = LR0Element(p, 0)
                     if a == [epsilon]:
                         s.d = 1
                     # NEW ELEMENT:
                     # 1. completely new (+lookahead): add to result
                     # 2. new lookahead: update lookahead in la_dict
                     # -> add to new working set
                     # 3. already known: ignore
                     if s in result:
                         if f.issubset(
                                 la_dict[s]
                         ):  # lookahead in combination with state already known
                             continue
                         else:
                             la_dict[s] |= f  # new lookahead
                     else:
                         la_dict[s] = set(f)  # completely new
                     result.add(s)
                     newelements.add(s)
         temp = newelements
         if len(temp) == 0:
             break
         i += 1
     # add lookaheads
     final_result = StateSet()
     for element in result:
         final_result.add(element, la_dict[element])
     return final_result