Пример #1
0
    def _substitute(self, tree, nodes, node_stack):
        '''
        Let (parent(tree), parent(parent(tree)), ..., parent(...(parent(tree))...))
        the parental hierarchy of tree. It can be denoted as (P1, ..., P(n)) where P(i) is
        the node id of the i-th grandparent of tree.

        The substitution algorithm seeks for P(i) with repl_id == P(i). If found it
        replaces P(i) in P(i+1) with nodes = (N1, ..., N(k)). It is guaranteed that the node
        id of N(j) is repl_id.
        '''
        repl_id = nodes[0][0]
        if tree[0] == repl_id:
            return (tree, node_stack)
        if is_token(repl_id):          # replace token
            if is_token(tree[0]):
                tree[:] = nodes[0]
                return (tree, node_stack)
        else:
            i = len(node_stack)-1
            while i>=0:
                P = node_stack[i]
                i-=1
                if repl_id == P[0]:
                    try:
                        nd_list = node_stack[i]
                        i-=1
                        for j, N in enumerate(nd_list):
                            if id(N) == id(P):
                                nd_list[:] = nd_list[:j]+list(nodes)+nd_list[j+1:]
                                return (nd_list, node_stack[i-1:])
                    except IndexError:    # nothing to pop from node_stack
                        P[:] = nodes[0]
                        return (P, node_stack[i-1:])
        self._create_translation_error(tree, nodes, node_stack)
Пример #2
0
        def check(r, state, tracer, visited):
            R   = set()
            C   = {}
            msg = ""
            must_trace = False
            states     = tracer.select(state[0])
            selection  = list(set(s[0] for s in states if s and s[0]!=FIN))
            for i,s in enumerate(selection):
                if is_token(s):
                    S = set([s])
                else:
                    S = nfamodule.reachables[s]
                if R&S:
                    for u in C:
                        if C[u]&S:
                            if is_token(s):
                                msg = "%s : %s -> FirstSet(%s) /\\ {%s} = %s\n"%(r, state, u, s, C[u]&S)
                            elif is_token(u):
                                msg = "%s : %s -> {%s} /\\ FirstSet(%s) = %s\n"%(r, state, u, s, C[u]&S)
                            else:
                                msg = "%s : %s -> FirstSet(%s) /\\ FirstSet(%s) = %s\n"%(r, state, u, s, C[u]&S)
                            lineno = sys._getframe(0).f_lineno +1
                            if print_warning:
                                warnings.warn_explicit(msg, NeedsMoreExpansionWarning, "nfadatagen.py", lineno)
                            backtracking.add(r)
                    break
                else:
                    R.update(S)
                    C[s] = S

            for state in states:
                if state[0] is not FIN and state not in visited:
                    visited.add(state)
                    subtracer = tracer.clone()
                    check(r, state, subtracer, visited)
Пример #3
0
    def _substitute(self, tree, nodes, node_stack):
        '''
        Let (parent(tree), parent(parent(tree)), ..., parent(...(parent(tree))...))
        the parental hierarchy of tree. It can be denoted as (P1, ..., P(n)) where P(i) is
        the node id of the i-th grandparent of tree.

        The substitution algorithm seeks for P(i) with repl_id == P(i). If found it
        replaces P(i) in P(i+1) with nodes = (N1, ..., N(k)). It is guaranteed that the node
        id of N(j) is repl_id.
        '''
        repl_id = nodes[0][0]
        if tree[0] == repl_id:
            return (tree, node_stack)
        if is_token(repl_id):  # replace token
            if is_token(tree[0]):
                tree[:] = nodes[0]
                return (tree, node_stack)
        else:
            i = len(node_stack) - 1
            while i >= 0:
                P = node_stack[i]
                i -= 1
                if repl_id == P[0]:
                    try:
                        nd_list = node_stack[i]
                        i -= 1
                        for j, N in enumerate(nd_list):
                            if id(N) == id(P):
                                nd_list[:] = nd_list[:j] + list(
                                    nodes) + nd_list[j + 1:]
                                return (nd_list, node_stack[i - 1:])
                    except IndexError:  # nothing to pop from node_stack
                        P[:] = nodes[0]
                        return (P, node_stack[i - 1:])
        self._create_translation_error(tree, nodes, node_stack)
Пример #4
0
    def node_cmp(cls, tree, node_id):
        '''
        Compares first node of cst tree with node_id.

        @param tree:  CST
        @param node_id: integer representing a py_symbol or a py_token

        @return:
                 - 0 if node_id is tree-root.
                 - -1 if tree is a py_token or node_id cannot be the node_id of any subtree.
                 - 1 otherwise
        '''
        tree_id = tree[0] % LANGLET_ID_OFFSET
        node_id = node_id % LANGLET_ID_OFFSET
        if tree_id == node_id:  # do we still want this? it makes nodes of two different langlets
            return 0  # comparable.
        elif is_token(tree_id):  # is token
            return -1
        if cls.activated:
            if is_symbol(node_id):
                try:
                    s0 = hierarchy[tree_id]  # global ???
                    s1 = hierarchy[node_id]
                    if s0 > s1:
                        return -1
                except KeyError:
                    return 1
            else:
                return 1
        else:
            return 1
Пример #5
0
    def node_cmp(cls, tree, node_id):
        '''
        Compares first node of cst tree with node_id.

        @param tree:  CST
        @param node_id: integer representing a py_symbol or a py_token

        @return:
                 - 0 if node_id is tree-root.
                 - -1 if tree is a py_token or node_id cannot be the node_id of any subtree.
                 - 1 otherwise
        '''
        tree_id = tree[0] % LANGLET_ID_OFFSET
        node_id = node_id % LANGLET_ID_OFFSET
        if tree_id == node_id:           # do we still want this? it makes nodes of two different langlets
            return 0                     # comparable.
        elif is_token(tree_id): # is token
            return -1
        if cls.activated:
            if is_symbol(node_id):
                try:
                    s0 = hierarchy[tree_id]    # global ???
                    s1 = hierarchy[node_id]
                    if s0>s1:
                        return -1
                except KeyError:
                    return 1
            else:
                return 1
        else:
            return 1
Пример #6
0
    def check_first_follow_conflict(self):
        '''
        A FirstFollowConflict warning is raised when a conflict occurs.

        Background ::

            This method checks for first/follow conflicts. A first/follow conflict is an ambiguity that occur
            in grammars like

               G: A* A

            If a word 'AA' is given one can match it with the first A* ( which leads to an error ) or the first character
            by A* and the second by A.

            A first/follow conflict shall always be removed and it needs to be removed manually!
        '''
        last_sets  = self.nfadata.compute_last_set()
        fin_cycles = self.nfadata.compute_fin_cycles()

        warned = set()

        def format_stream(T, k):
            stream = []
            for t in [t for t in T[:k+2]]:
                if isinstance(t, int):
                    stream.append(self.node_name(t))
                else:
                    stream.append("'"+t+"'")
            return stream[0]+': '+' '.join(stream[1:])

        for r, traces in self._cyclefree_traces().items():
            for T in traces:
                for i in range(1,len(T)-1):
                    A = T[i]
                    B = T[i+1]
                    if (r,A,B) in warned or A == B:
                        continue
                    if is_token(B):
                        if B in last_sets.get(A, set()):
                            if B in fin_cycles[A]:
                                warn_text = "%s -> LastSet(%s) /\\ set([%s]) != {}"%(self.node_name(r),self.node_name(A),self.node_name(B)) #, format_stream(T,i))

                                warnings.warn_explicit(warn_text, FirstFollowConflict, "nfadatagen.py", sys._getframe(0).f_lineno-1)
                                warned.add((r,A,B))
                                break
                    else:
                        S = last_sets.get(A, set()) & self.nfadata.reachables.get(B, set())
                        if S:
                            C = S & fin_cycles[A]
                            if C:
                                warn_text = warn_text = "%s -> LastSet(%s) /\\ FirstSet(%s) != {}"%(self.node_name(r), self.node_name(A),self.node_name(B)) #,format_stream(T,i))

                                warnings.warn_explicit(warn_text, FirstFollowConflict, "nfadatagen.py", sys._getframe(0).f_lineno-1)
                                warned.add((r,A,B))
                                print "                  /\\", set([self.node_name(c) for c in C])
                                break
Пример #7
0
def find_token_gen(tree, depth=MAX_DEPTH):
    if is_token(tree[0]):
        yield tree
    elif depth < 0:
        raise StopIteration
    for sub in tree[1:]:
        if isinstance(sub, list):
            for item in find_token_gen(sub, depth=depth - 1):
                if item:
                    yield item
Пример #8
0
def find_token_gen(tree, depth = MAX_DEPTH):
    if is_token(tree[0]):
        yield tree
    elif depth<0:
        raise StopIteration
    for sub in tree[1:]:
        if isinstance(sub, list):
            for item in find_token_gen(sub, depth=depth-1):
                if item:
                    yield item
Пример #9
0
def find_token_chains_gen(tree, depth = MAX_DEPTH, chain=[]):

    if is_token(tree[0]):
        yield Chain(chain+[tree])
    elif depth<0:
        raise StopIteration
    for sub in tree[1:]:
        if isinstance(sub, list):
            for item in find_token_chains_gen(sub, depth=depth-1, chain = chain+[tree]):
                if item:
                    yield item
Пример #10
0
def find_token_chain(tree, depth = MAX_DEPTH, chain = []):
    if is_token(tree[0]):
        return Chain(chain+[tree])
    if depth<0:
        return
    for sub in tree[1:-1]:
        res = find_token_chain(sub, depth=depth-1, chain = chain+[tree])
        if res:
            return res
    if isinstance(tree[-1], list):
        return find_token_chain(tree[-1], depth=depth-1, chain = chain+[tree])
Пример #11
0
def find_token_chains_gen(tree, depth=MAX_DEPTH, chain=[]):

    if is_token(tree[0]):
        yield Chain(chain + [tree])
    elif depth < 0:
        raise StopIteration
    for sub in tree[1:]:
        if isinstance(sub, list):
            for item in find_token_chains_gen(sub,
                                              depth=depth - 1,
                                              chain=chain + [tree]):
                if item:
                    yield item
Пример #12
0
def create_referrer(symbols):
    '''
    The referrer is a dictionary
    '''
    referrer = {}
    for r, sym in symbols.items():
        for s in sym:
            if is_token(s):
                continue
            R = referrer.get(s, set())
            R.add(r)
            referrer[s] = R
    return referrer
Пример #13
0
def create_referrer(symbols):
    '''
    The referrer is a dictionary
    '''
    referrer = {}
    for r, sym in symbols.items():
        for s in sym:
            if is_token(s):
                continue
            R = referrer.get(s,set())
            R.add(r)
            referrer[s] = R
    return referrer
Пример #14
0
def find_token_chain(tree, depth=MAX_DEPTH, chain=[]):
    if is_token(tree[0]):
        return Chain(chain + [tree])
    if depth < 0:
        return
    for sub in tree[1:-1]:
        res = find_token_chain(sub, depth=depth - 1, chain=chain + [tree])
        if res:
            return res
    if isinstance(tree[-1], list):
        return find_token_chain(tree[-1],
                                depth=depth - 1,
                                chain=chain + [tree])
Пример #15
0
    def _create_translation_error(self, tree, nodes, node_stack):
        repl_id = nodes[0][0]
        if is_token(repl_id):
            raise TranslationError(
                "Cannot substitute non-terminal %s by terminal %s" %
                ((tree[0], self.langlet.get_node_name(tree[0])),
                 (repl_id, self.langlet.get_node_name(repl_id))))
        else:
            trace = [(tree[0], self.langlet.get_node_name(tree[0]))]
            while node_stack:
                P = node_stack.pop()
                trace.append((P[0], self.langlet.get_node_name(P[0])))
            trace = str(trace)

            S = "Failed to substitute node %s by %s.\n  Node %s must be one of the nodes or a projection in:  \n\n%s" % (
                (tree[0], self.langlet.get_node_name(tree[0])),
                (repl_id, self.langlet.get_node_name(repl_id)),
                (repl_id, self.langlet.get_node_name(repl_id)), trace)
        raise TranslationError(S)
Пример #16
0
    def _create_translation_error(self, tree, nodes, node_stack):
        repl_id = nodes[0][0]
        if is_token(repl_id):
            raise TranslationError("Cannot substitute non-terminal %s by terminal %s"%(
                 (tree[0], self.langlet.get_node_name(tree[0])),
                 (repl_id, self.langlet.get_node_name(repl_id))
                ))
        else:
            trace = [(tree[0], self.langlet.get_node_name(tree[0]))]
            while node_stack:
                P = node_stack.pop()
                trace.append((P[0], self.langlet.get_node_name(P[0])))
            trace = str(trace)

            S = "Failed to substitute node %s by %s.\n  Node %s must be one of the nodes or a projection in:  \n\n%s"%(
                 (tree[0], self.langlet.get_node_name(tree[0])),
                 (repl_id, self.langlet.get_node_name(repl_id)),
                 (repl_id, self.langlet.get_node_name(repl_id)),
                 trace)
        raise TranslationError( S )
Пример #17
0
def find_node(tree, nid, depth = MAX_DEPTH, exclude = ()):
    '''
    Finds one node of a given node id.

    ( Non-recursive depth first search implementation. )
    '''
    if is_token(tree[0]):
        if nid == tree[0]:
            return tree
        else:
            return
    dq = deque()
    for sub in tree[1:]:
        dq.append((sub, depth-1))
    while dq:
        node, depth = dq.popleft()
        s = node[0]
        if s == nid:
            return node
        elif is_symbol(s) and s not in exclude and depth>0:
            subnodes = zip(node[:0:-1], [depth-1]*(len(node)-1))
            dq.extendleft(subnodes)
Пример #18
0
def find_node(tree, nid, depth=MAX_DEPTH, exclude=()):
    '''
    Finds one node of a given node id.

    ( Non-recursive depth first search implementation. )
    '''
    if is_token(tree[0]):
        if nid == tree[0]:
            return tree
        else:
            return
    dq = deque()
    for sub in tree[1:]:
        dq.append((sub, depth - 1))
    while dq:
        node, depth = dq.popleft()
        s = node[0]
        if s == nid:
            return node
        elif is_symbol(s) and s not in exclude and depth > 0:
            subnodes = zip(node[:0:-1], [depth - 1] * (len(node) - 1))
            dq.extendleft(subnodes)
Пример #19
0
    def expand(self, rule = 0, visited = set()):
        '''
        Algorithm ::

            For each transition

                S -> {L1, ..., Ln}

            in NFA[rule] with at least two follow states L1, L2 and Li!=None we determine
            the corresponding selection

                sel = {s1, ..., sk}

            From sel we build Ri = s1.reach \/ s2.reach \/ .... si.reach successively.

            If Ri intersects with s(i+1) find the first sj, j=1,...,i with

                    sj.reach /\ s(i+1).reach

            Now embedd the smaller of both NFAs into NFA[rule].

            Repeat this procedure for each transition T until Rn-1 /\ sn.reach = {}.
        '''

        if not rule:
            rule = self.start_symbol

        visited.add(rule)
        more = True
        must_select = False
        while more:
            more = False
            selections = self._all_selections(rule)

            if len(selections)>TRAIL_MAX_ALLOWED_STATES:
                raise OverflowError("NFA size > TRAIL_MAX_ALLOWED_STATES. Cannot expand rule `%s : %s`"%(rule, self.node_name(rule)))
            if self.warn_cnt>10:
                raise OverflowError("More than ten expansion warnings issued. Expansion is terminated!")

            for follow in selections:
                selectable = sorted(list(set(s[0] for s in follow if s and s[0]!=FIN)))
                if len(selectable)<=1:
                    continue
                R = set()
                C = {}
                for i,s in enumerate(selectable):
                    tok_s = False
                    if is_token(s):
                        tok_s = True
                        S = set([s])
                    else:
                        S = self.nfadata.reachables[s]
                    if R&S:
                        for u in selectable[:i]:
                            tok_u,U = C[u]
                            if U&S:
                                if tok_s:
                                    k = u
                                elif tok_u:
                                    k = s
                                else:
                                    N_s = self.nfadata.nfas[s][2]
                                    N_u = self.nfadata.nfas[u][2]
                                    k = (s if len(N_s)<=len(N_u) else u)
                                break
                        for state in (state for state in follow if state[0] == k):
                            self.maybe_expand(state[0], visited)
                            self.embedd_nfa(state, rule)
                            more = True
                            break
                        break
                    else:
                        R.update(S)
                        C[s] = (tok_s, S)
                else:
                    continue
                break
            else:
                break
Пример #20
0
 def maybe_expand(self, r, visited):
     if r and r not in visited and not is_token(r):
         self.expand(r, visited)