Exemplo n.º 1
0
    def __init__(self, start_symbol, grammar, lr_type=0):
        self.grammar = grammar
        self.start_symbol = start_symbol
        self.state_sets = []
        self.edges = {}
        self.ids = {}
        self.todo = []
        self.done = set()
        self.maybe_compatible = {}

        self.goto_time = 0
        self.add_time = 0
        self.closure_time = 0
        self.closure_count = 0
        self.addcount = 0
        self.weakly = 0
        self.weakly_count = 0
        self.mergetime = 0

        helper = Helper(grammar)
        self.helper = helper
        if lr_type == LR0:
            self.closure = helper.closure_0
            self.goto = helper.goto_0
            self.start_set = StateSet([LR0Element(Production(None, [self.start_symbol]), 0)])
        elif lr_type == LR1 or lr_type == LALR:
            self.closure = helper.closure_1
            self.goto = helper.goto_1
            self.start_set = StateSet()
            self.start_set.add(LR0Element(Production(None, [self.start_symbol]), 0), set([FinishSymbol()]))
Exemplo n.º 2
0
def goto_1(grammar, state_set, symbol):
    result = StateSet()
    for state in state_set:
        s = state.next_symbol()
        if s == symbol:
            new_state = state.clone()
            new_state.d += 1
            result.add(new_state)
    return closure_1(grammar, result)
Exemplo n.º 3
0
 def goto_0(self, state_set, symbol):
     result = StateSet()
     for state in state_set.elements:
         s = state.next_symbol()
         if s == symbol:
             new_state = state.clone()
             new_state.d += 1
             result.add(new_state)
     return self.closure_0(result)
Exemplo n.º 4
0
 def goto_1(self, state_set, symbol):
     try:
         self.goto_count[(id(state_set), symbol)] += 1
     except KeyError:
         self.goto_count[(id(state_set), symbol)] = 1
     print("goto", state_set, symbol, self.goto_count[(id(state_set), symbol)])
     result = StateSet()
     for state in state_set:
         s = state.next_symbol()
         if s == symbol:
             new_state = state.clone()
             new_state.d += 1
             result.add(new_state)
     print("goto END")
     return self.closure_1(result)
Exemplo n.º 5
0
def closure_1(grammar, state_set):
    assert False
    result = StateSet()
    # Step 1
    for state in state_set.elements:
        result.add(state)
    # Step 2
    for state in result:
        symbol = state.next_symbol()
        if isinstance(symbol, Nonterminal):
            f = set()
            for l in state.lookahead:
                betaL = []
                betaL.extend(state.remaining_symbols())
                betaL.append(l)
                f |= old2_first(grammar, betaL)

            alternatives = grammar[symbol].alternatives
            for a in alternatives:
                # create epsilon symbol if alternative is empty
                if a == []:
                    a = [Epsilon()]
                p = Production(symbol, a)
                s = LR1Element(p, 0, f)
                if a == [epsilon]:
                    s.d = 1
                result.add(s)
    # merge states that only differ in their lookahead
    result.merge()
    return result
Exemplo n.º 6
0
def closure_0(grammar, state_set):
    result = StateSet()
    # 1) Add state_set to it's own closure
    for state in state_set.elements:
        result.add(state)
    # 2) If there exists an LR-element with a Nonterminal as its next symbol
    #    add all production with this symbol on the left side to the closure
    for state in result:
        symbol = state.next_symbol()
        if isinstance(symbol, Nonterminal):
            alternatives = grammar[symbol].alternatives
            for a in alternatives:
                # create epsilon symbol if alternative is empty
                if a == []:
                    a = [epsilon]
                p = Production(symbol, a)
                s = State(p, 0)
                if a == [epsilon]:
                    s.d = 1
                result.add(s)
    return result
Exemplo n.º 7
0
class StateGraph(object):

    def __init__(self, start_symbol, grammar, lr_type=0):
        self.grammar = grammar
        self.start_symbol = start_symbol
        self.state_sets = []
        self.edges = {}
        self.ids = {}
        self.todo = []
        self.done = set()
        self.maybe_compatible = {}

        self.goto_time = 0
        self.add_time = 0
        self.closure_time = 0
        self.closure_count = 0
        self.addcount = 0
        self.weakly = 0
        self.weakly_count = 0
        self.mergetime = 0

        helper = Helper(grammar)
        self.helper = helper
        if lr_type == LR0:
            self.closure = helper.closure_0
            self.goto = helper.goto_0
            self.start_set = StateSet([LR0Element(Production(None, [self.start_symbol]), 0)])
        elif lr_type == LR1 or lr_type == LALR:
            self.closure = helper.closure_1
            self.goto = helper.goto_1
            self.start_set = StateSet()
            self.start_set.add(LR0Element(Production(None, [self.start_symbol]), 0), set([FinishSymbol()]))

    def build(self):
        State._hashtime = 0
        start = time()
        start_set = self.start_set
        closure = start_set
        self.state_sets.append(closure)
        self.ids[closure] = 0
        _id = 0
        self.todo.append(_id)
        while self.todo:
            self.addcount += 1
            _id = self.todo.pop()
            self.done.add(_id)
            closure_start = time()
            state_set = self.closure(self.state_sets[_id])
            self.closure_count += 1
            closure_end = time()
            self.closure_time += closure_end - closure_start
            new_gotos = {}
            goto_start = time()
            # create new sets first, then calculate closure
            for lrelement in state_set.elements:
                symbol = lrelement.next_symbol()
                if not symbol: # state is final
                    continue
                #XXX optimisation: create all configurations before building
                new_element = lrelement.clone()
                new_element.d += 1
                new_element_la = state_set.get_lookahead(lrelement)
                stateset = new_gotos.setdefault(symbol, StateSet())
                stateset.add(new_element, new_element_la)

            # now calculate closure and add result to state_sets
            goto_end = time()
            self.goto_time += goto_end - goto_start

            for ss in new_gotos:
                new_state_set = new_gotos[ss]
                add_start = time()
                self.add(_id, ss, new_state_set)
                add_end = time()
                self.add_time += add_end - add_start

        end = time()
        logging.info("add time %s", self.add_time)
        logging.info("closure time %s", self.closure_time)
        logging.info("closure time helper %s", self.helper.closure_time)
        logging.info("goto time %s", self.goto_time)
        logging.info("hashtime %s", StateSet._hashtime)
        logging.info("addcount %s", self.addcount)
        logging.info("states %s", len(self.state_sets))
        logging.info("weakly %s", self.weakly)
        logging.info("weakly count %s", self.weakly_count)
        logging.info("mergetime %s", self.mergetime)

        # apply closure
        logging.info("Apply closure to states")
        clstart = time()
        new_state_sets = []
        new_ids = {}
        for state in self.state_sets:
            _id = self.ids[state]
            new_state = self.closure(state)
            new_state_sets.append(new_state)
            new_ids[new_state] = new_state
        self.state_sets = new_state_sets
        logging.info("after closure %s", len(new_state_sets))
        logging.info("edges %s", len(set(self.edges.values())))
        self.ids = new_ids
        logging.info(time() - clstart)

        logging.info("Finished building Stategraph in %s", end-start)
        self.closure = None
        self.goto = None

    def weakly_compatible(self, s1, s2):
        self.weakly_count += 1
        core = s1.elements
        if core != s2.elements:
            return False
        if len(core) == 1:
            return True
        self.weakly -= time()
        core = list(core)
        for i in range(0, len(core)-1):
            I = core[i]
            for j in range(i+1, len(core)):
                J = core[j]
                if ((s1.lookaheads[I] & s2.lookaheads[J] or s1.lookaheads[J] & s2.lookaheads[I])
                    and not s1.lookaheads[I] & s1.lookaheads[J]
                    and not s2.lookaheads[I] & s2.lookaheads[J]):
                    self.weakly += time()
                    return False
        self.weakly += time()
        return True

    def find_stateset_without_lookahead(self, state_set):
        for ss in self.state_sets:
            if state_set.equals(ss, True):
                return ss
        return None

    def merge_lookahead(self, old, new):
        self.mergetime -= time()
        changed = False
        for element in new.elements:
            la1 = new.get_lookahead(element)
            la2 = old.get_lookahead(element)
            if la1 - la2:
                changed = True
                new_la = la2 | la1
                old.lookaheads[element] = new_la

        self.mergetime += time()
        return changed

    def add(self, from_id, symbol, state_set):
        merged = False
        #for candidate in self.state_sets: # only check states that can be reached by symbol
        for _id in self.maybe_compatible.setdefault(symbol,set()):
            candidate = self.state_sets[_id]
            if self.weakly_compatible(state_set, candidate):
                # merge them
                merged = True
                changed = self.merge_lookahead(candidate, state_set)
                self.edges[(from_id, symbol)] = _id
                if changed and _id in self.done:
                    # move state to todo list
                    self.todo.append(_id) #XXX only need to to that if this state is already done (moving not necessary if it hasn't been looked at anyway (e.g. state at the end of list)
                    self.done.remove(_id)

        if not merged:
            # add normally and put on todo list
            self.state_sets.append(state_set)
            _id = len(self.state_sets)-1
            self.edges[(from_id, symbol)] = _id
            self.ids[state_set] = _id
            self.todo.append(_id)

            # add to maybe compatible
            mc = self.maybe_compatible.setdefault(symbol, set())
            mc.add(_id)

    def oldadd(self, from_id, symbol, state_set):
        # LALR way
       #ss = self.find_stateset_without_lookahead(state_set)
       #if ss:
       #    #print("found existing stateset -> merging")
       #    #print(ss)
       #    #print(state_set)
       #    self.merge_lookahead(ss, state_set)
       #    _id = self.state_sets.index(ss)
       #else:
       #    self.state_sets.append(state_set)
       #    _id = len(self.state_sets)-1
       #self.edges[(from_id, symbol)] = _id

        # normal LR(1) way
        add_start = time()
        _id = self.ids.get(state_set)
        if _id is None: # new state
            self.addcount += 1
            self.state_sets.append(state_set)
            _id = len(self.state_sets)-1
            self.ids[state_set] = _id
            self.todo.append(_id)
        self.edges[(from_id, symbol)] = _id
        add_end = time()
        self.add_time += add_end - add_start

    def follow(self, from_id, symbol):
        try:
            _id = self.edges[(from_id, symbol)]
            return _id
        except KeyError:
            return None

    def get_symbols(self):
        s = set()
        for _, symbol in self.edges.keys():
            s.add(symbol)
        return s

    def get_state_set(self, i):
        return self.state_sets[i]

    def convert_lalr(self):
        removelist = set([])
        l = len(self.state_sets)
        for i in range(l):
            if i in removelist:
                continue
            for j in range(l):
                if j in removelist:
                    continue
                s1 = self.state_sets[i]
                s2 = self.state_sets[j]
                if s1 is not s2 and s1.equals(s2, False):
                    for e in s2:
                        s1.add(e) # this should automatically merge the lookahead of the states
                    s1.merge()
                    for key in self.edges:
                        fromid, symbol = key
                        to = self.edges[key]
                        if fromid == j:
                            fromid == i
                        if to == j:
                            to == i
                        self.edges.pop(key)
                        self.edges[(fromid, symbol)] = to
                    removelist.add(j)
        l = list(removelist)
        l.sort()
        l.reverse()
        for j in l:
            self.state_sets.pop(j)
Exemplo n.º 8
0
class StateGraph(object):

    def __init__(self, start_symbol, grammar, lr_type=0):
        self.grammar = grammar
        self.start_symbol = start_symbol
        self.state_sets = []
        self.edges = {}
        self.ids = {}
        self.todo = []
        self.done = set()
        self.maybe_compatible = {}

        self.goto_time = 0
        self.add_time = 0
        self.closure_time = 0
        self.closure_count = 0
        self.addcount = 0
        self.weakly = 0
        self.weakly_count = 0
        self.mergetime = 0

        helper = Helper(grammar)
        self.helper = helper
        if lr_type == LR0:
            self.closure = helper.closure_0
            self.goto = helper.goto_0
            self.start_set = StateSet([LR0Element(Production(None, [self.start_symbol]), 0)])
        elif lr_type == LR1 or lr_type == LALR:
            self.closure = helper.closure_1
            self.goto = helper.goto_1
            self.start_set = StateSet()
            self.start_set.add(LR0Element(Production(None, [self.start_symbol]), 0), set([FinishSymbol()]))

    def build(self):
        State._hashtime = 0
        start = time()
        start_set = self.start_set
        closure = start_set
        #closure = self.closure(start_set)
        self.state_sets.append(closure)
        self.ids[closure] = 0
        _id = 0
        self.todo.append(_id)
        while self.todo:
            self.addcount += 1
            _id = self.todo.pop()
            self.done.add(_id)
            #print("id:", _id)
            closure_start = time()
            state_set = self.closure(self.state_sets[_id])
            self.closure_count += 1
            closure_end = time()
            self.closure_time += closure_end - closure_start
            #state_set = self.state_sets[_id]
            new_gotos = {}
            goto_start = time()
            # create new sets first, then calculate closure
            for lrelement in state_set.elements:
                symbol = lrelement.next_symbol()
                if not symbol: # state is final
                    continue
                #XXX optimisation: create all configurations before building
                new_element = lrelement.clone()
                new_element.d += 1
                new_element_la = state_set.get_lookahead(lrelement)
                stateset = new_gotos.setdefault(symbol, StateSet())
                stateset.add(new_element, new_element_la)

            # now calculate closure and add result to state_sets
            goto_end = time()
            self.goto_time += goto_end - goto_start

            for ss in new_gotos:
                new_state_set = new_gotos[ss]
                #new_state_set = self.closure(new_gotos[ss])
                add_start = time()
                self.add(_id, ss, new_state_set)
                add_end = time()
                self.add_time += add_end - add_start

        end = time()
        logging.info("add time %s", self.add_time)
        logging.info("closure time %s", self.closure_time)
        logging.info("closure time helper %s", self.helper.closure_time)
        logging.info("goto time %s", self.goto_time)
        logging.info("hashtime %s", StateSet._hashtime)
        logging.info("addcount %s", self.addcount)
        logging.info("states %s", len(self.state_sets))
        logging.info("weakly %s", self.weakly)
        logging.info("weakly count %s", self.weakly_count)
        logging.info("mergetime %s", self.mergetime)
        #print("maybe", self.maybe_compatible)
        #for key in self.maybe_compatible:
        #    print(key, len(self.maybe_compatible[key]))


        # apply closure
        logging.info("Apply closure to states")
        clstart = time()
        new_state_sets = []
        new_ids = {}
        for state in self.state_sets:
            _id = self.ids[state]
            new_state = self.closure(state)
            new_state_sets.append(new_state)
            new_ids[new_state] = new_state
        self.state_sets = new_state_sets
        logging.info("after closure %s", len(new_state_sets))
        logging.info("edges %s", len(set(self.edges.values())))
        self.ids = new_ids
        logging.info(time() - clstart)

        logging.info("Finished building Stategraph in %s", end-start)
        self.closure = None
        self.goto = None

    def weakly_compatible(self, s1, s2):
        self.weakly_count += 1
        core = s1.elements
        if core != s2.elements:
            return False
        if len(core) == 1:
            return True
        self.weakly -= time()
        core = list(core)
        for i in range(0, len(core)-1):
            I = core[i]
            for j in range(i+1, len(core)):
                J = core[j]
                if ((s1.lookaheads[I] & s2.lookaheads[J] or s1.lookaheads[J] & s2.lookaheads[I])
                    and not s1.lookaheads[I] & s1.lookaheads[J]
                    and not s2.lookaheads[I] & s2.lookaheads[J]):
                    self.weakly += time()
                    return False
        self.weakly += time()
        return True

    def find_stateset_without_lookahead(self, state_set):
        for ss in self.state_sets:
            if state_set.equals(ss, True):
                return ss
        return None

    def merge_lookahead(self, old, new):
        self.mergetime -= time()
        changed = False
       #for e1 in new.elements:
       #    for e2 in old.elements:
       #        if e1 == e2: # compare without lookahead
       #            #print("merging", e1, "and", e2)
       #            if e1.lookahead - e2.lookahead:
       #                changed = True
       #            e2.lookahead |= e1.lookahead
        for element in new.elements:
            la1 = new.get_lookahead(element)
            la2 = old.get_lookahead(element)
            if la1 - la2:
                changed = True
                new_la = la2 | la1
                old.lookaheads[element] = new_la

        self.mergetime += time()
        return changed

    def add(self, from_id, symbol, state_set):
        merged = False
        #for candidate in self.state_sets: # only check states that can be reached by symbol
        for _id in self.maybe_compatible.setdefault(symbol,set()):
            candidate = self.state_sets[_id]
            if self.weakly_compatible(state_set, candidate):
                # merge them
                merged = True
                changed = self.merge_lookahead(candidate, state_set)
                self.edges[(from_id, symbol)] = _id
                if changed and _id in self.done:
                    # move state to todo list
                    self.todo.append(_id) #XXX only need to to that if this state is already done (moving not necessary if it hasn't been looked at anyway (e.g. state at the end of list)
                    self.done.remove(_id)

        if not merged:
            # add normally and put on todo list
            self.state_sets.append(state_set)
            _id = len(self.state_sets)-1
            self.edges[(from_id, symbol)] = _id
            self.ids[state_set] = _id
            self.todo.append(_id)

            # add to maybe compatible
            mc = self.maybe_compatible.setdefault(symbol, set())
            mc.add(_id)

    def oldadd(self, from_id, symbol, state_set):
        # LALR way
       #ss = self.find_stateset_without_lookahead(state_set)
       #if ss:
       #    #print("found existing stateset -> merging")
       #    #print(ss)
       #    #print(state_set)
       #    self.merge_lookahead(ss, state_set)
       #    _id = self.state_sets.index(ss)
       #else:
       #    self.state_sets.append(state_set)
       #    _id = len(self.state_sets)-1
       #self.edges[(from_id, symbol)] = _id

        # normal LR(1) way
        add_start = time()
        _id = self.ids.get(state_set)
        if _id is None: # new state
            self.addcount += 1
            self.state_sets.append(state_set)
            _id = len(self.state_sets)-1
            self.ids[state_set] = _id
            self.todo.append(_id)
        self.edges[(from_id, symbol)] = _id
        add_end = time()
        self.add_time += add_end - add_start

    def follow(self, from_id, symbol):
        try:
            _id = self.edges[(from_id, symbol)]
            return _id
        except KeyError:
            return None

    def get_symbols(self):
        s = set()
        for _, symbol in self.edges.keys():
            s.add(symbol)
        return s

    def get_state_set(self, i):
        return self.state_sets[i]

    def convert_lalr(self):
        removelist = set([])
        l = len(self.state_sets)
        for i in range(l):
            if i in removelist:
                continue
            for j in range(l):
                if j in removelist:
                    continue
                s1 = self.state_sets[i]
                s2 = self.state_sets[j]
                if s1 is not s2 and s1.equals(s2, False):
                    for e in s2:
                        s1.add(e) # this should automatically merge the lookahead of the states
                    s1.merge()
                    for key in self.edges:
                        fromid, symbol = key
                        to = self.edges[key]
                        if fromid == j:
                            fromid == i
                        if to == j:
                            to == i
                        self.edges.pop(key)
                        self.edges[(fromid, symbol)] = to
                    removelist.add(j)
        l = list(removelist)
        l.sort()
        l.reverse()
        for j in l:
            self.state_sets.pop(j)
Exemplo n.º 9
0
 def closure_1(self, state_set):
     la_dict = {}
     result = set()
     working_set = set()
     # Step 1
     for element in state_set.elements:
         la_dict[element] = state_set.get_lookahead(element)
         result.add(element)
         working_set.add(element)
     # Step 2
     i=0
     temp = working_set
     while 1:
         newelements = set()
         for state in temp:
             if state.isfinal():
                 continue
             symbol = state.next_symbol()
             if isinstance(symbol, Nonterminal):
                 f = set()
                 for l in la_dict[state]:#state.lookahead:
                     betaL = []
                     betaL.extend(state.remaining_symbols())
                     betaL.append(l)
                     f |= self.first(betaL)
                 alternatives = self.grammar[symbol].alternatives
                 i = -1
                 for a in alternatives:
                     i = i + 1
                     # create epsilon symbol if alternative is empty
                     if a == []:
                         a = [Epsilon()]
                     p = Production(symbol, a, self.grammar[symbol].annotations[i], self.grammar[symbol].precs[i])
                     if self.grammar[symbol].inserts.has_key(i):
                         insert = self.grammar[symbol].inserts[i]
                         p.inserts[insert[0]] = insert[1]
                     s = LR0Element(p, 0)
                     if a == [epsilon]:
                         s.d = 1
                     # NEW ELEMENT:
                     # 1. completely new (+lookahead): add to result
                     # 2. new lookahead: update lookahead in la_dict
                     # -> add to new working set
                     # 3. already known: ignore
                     if s in result:
                         if f.issubset(la_dict[s]):   # lookahead in combination with state already known
                             continue
                         else:
                             la_dict[s] |= f   # new lookahead
                     else:
                         la_dict[s] = set(f)        # completely new
                     result.add(s)
                     newelements.add(s)
         temp = newelements
         if len(temp) == 0:
             break
         i += 1
     # add lookaheads
     final_result = StateSet()
     for element in result:
         final_result.add(element, la_dict[element])
     return final_result