def format_amr(l):
    amr_s = ' '.join(l)
    amr_g = Hgraph.from_string(amr_s)
    return amr_g
def format_amr(l):
  amr_s = ' '.join(l)
  amr_g = Hgraph.from_string(amr_s)
  return amr_g
示例#3
0
    def parse_string(self, s, concepts=True):
        """
        Parse the string s and return a new hypergraph.
        """

        # Constants to identify items on the stack
        PNODE = 1  # Parent node
        CNODE = 2  # Child node
        EDGE = 3  # Hyperedge

        hgraph = Hgraph()

        stack = []
        state = 0

        self.id_count = 0
        self.nt_id_count = 0
        self.ext_id_count = 0
        self.seen_nodes = set()
        self.explicit_ext_ids = False

        # States of the finite state parser
        #0, top level
        #1, expecting head nodename
        #2, expecting edge label or node
        #3, expecting further child nodes or right paren
        #4, expecting saw edge label, expecting child node, edge label, right paren

        def get_reentrance(s):
            re_pattern = re.compile('[^:](_[0-9]+)\.')
            re_list = re_pattern.findall(s)
            #print re_list
            self.reentrance_indexes.update(re_list)

        def insert_node(node, root=False):
            # Insert a node into the AMR
            ident, label, ext_id = node
            ignoreme = hgraph[ident]  #Initialize dictionary for this node
            hgraph.node_to_concepts[ident] = label
            if ext_id is not None:
                if ident in hgraph.external_nodes and hgraph.external_nodes[
                        ident] != ext_id:
                    raise ParserError, "Incompatible external node IDs for node %s." % ident
                hgraph.external_nodes[ident] = ext_id
                hgraph.rev_external_nodes[ext_id] = ident
            if root:
                hgraph.roots.append(ident)

        def pop_and_transition():
            # Create all edges in a group from the stack, attach them to the
            # graph and then transition to the appropriate state in the FSA
            edges = []
            while stack[-1][0] != PNODE:  # Pop all edges
                children = []
                while stack[-1][0] == CNODE:  # Pop all nodes in hyperedge
                    itemtype, node = stack.pop()
                    insert_node(node)
                    children.append(node)
                assert stack[-1][0] == EDGE
                itemtype, edgelabel = stack.pop()
                edges.append((edgelabel, children))

            # Construct the hyperedge
            itemtype, parentnode = stack.pop()
            for edgelabel, children in edges:
                hypertarget = []  # build hyperedge tail
                for ident, label, ext_id in children:
                    hypertarget.append(ident)
                hypertarget.reverse()
                hyperchild = tuple(hypertarget)

                if "$" in edgelabel:  # this is a nonterminal Edge
                    #print '***********non-terminal %s' % edgelabel
                    new_edge = NonterminalLabel.from_string(edgelabel)
                    if not new_edge.index:
                        new_edge.index = "_%i" % self.nt_id_count
                        self.nt_id_count = self.nt_id_count + 1
                else:
                    #print '***********terminal %s' % edgelabel
                    new_edge = edgelabel
                ident, label, ext_id = parentnode
                hgraph._add_triple(ident, new_edge, hyperchild)

            if stack:
                insert_node(parentnode)
                stack.append((CNODE, parentnode))
                state = 4
            else:
                insert_node(parentnode, root=True)
                state = 5

        get_reentrance(s)

        # Parser transitions start here
        #print 'begin'
        #print s
        #print 'end'
        for typ, token, pos in self.lexer.lex(s):
            #print typ, token, pos, state
            #log.info(typ+ '  ,  '+ token+ '  , '+ (str)(pos))

            if state == 0:
                if typ == LexTypes.LPAR:
                    state = 1
                elif typ == LexTypes.NODE:
                    insert_node(self.parse_node(token), root=True)
                    state = 5
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 1:
                if typ == LexTypes.NODE:
                    stack.append(
                        (PNODE, self.parse_node(token)))  # Push head node
                    state = 2
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 2:
                if typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 4
                elif typ == LexTypes.NODE:
                    stack.append(
                        (EDGE, ""))  # No edge specified, assume empty label
                    stack.append((CNODE, self.parse_node(token)))
                    state = 3
                elif typ == LexTypes.LPAR:
                    stack.append(
                        (EDGE, ""))  # No edge specified, assume empty label
                    state = 1
                elif typ == LexTypes.RPAR:
                    itemtype, node = stack.pop()
                    assert itemtype == PNODE
                    if stack:
                        insert_node(node)
                        stack.append((CNODE, node))
                        state = 3
                    else:
                        insert_node(node, root=True)
                        state = 5
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 3:
                if typ == LexTypes.RPAR:  # Pop from stack and add edges
                    pop_and_transition()
                elif typ == LexTypes.NODE:
                    stack.append((CNODE, self.parse_node(token)))
                    state = 3
                elif typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 4
                elif typ == LexTypes.LPAR:
                    state = 1
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 4:
                if typ == LexTypes.LPAR:
                    state = 1
                elif typ == LexTypes.NODE:
                    stack.append((CNODE, self.parse_node(token)))
                    state = 3
                elif typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                elif typ == LexTypes.RPAR:  # Pop from stack and add edges
                    pop_and_transition()
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 5:
                raise ParserError, "Unexpected token %s at position %i." % (
                    token, pos)

        # Normalize external nodes
        new_ext_nodes = {}
        new_rev_ext_nodes = {}
        i = 0
        for node, index in sorted(hgraph.external_nodes.items(),
                                  key=lambda (n, i): i):
            new_ext_nodes[node] = i
            new_rev_ext_nodes[i] = node
            i = i + 1

        hgraph.external_nodes = new_ext_nodes
        hgraph.rev_external_nodes = new_rev_ext_nodes
        return hgraph
    def parse_string(self, s, concepts = True):
        """
        Parse the string s and return a new hypergraph. 
        """

        # Constants to identify items on the stack
        PNODE = 1 # Parent node
        CNODE = 2 # Child node
        EDGE = 3  # Hyperedge 

        hgraph = Hgraph()
        
        stack = []
        state = 0

        self.id_count = 0
        self.nt_id_count = 0
        self.ext_id_count = 0
        self.seen_nodes = set()
        self.explicit_ext_ids = False                 
 
        # States of the finite state parser
        #0, top level
        #1, expecting head nodename
        #2, expecting edge label or node
        #3, expecting further child nodes or right paren
        #4, expecting saw edge label, expecting child node, edge label, right paren 

        def insert_node(node, root=False):
            # Insert a node into the AMR
            ident, label, ext_id = node                              
            ignoreme = hgraph[ident] #Initialize dictionary for this node
            hgraph.node_to_concepts[ident] = label
            if ext_id is not None:                
                if ident in hgraph.external_nodes and hgraph.external_nodes[ident] != ext_id:
                    raise ParserError, "Incompatible external node IDs for node %s." % ident
                hgraph.external_nodes[ident] = ext_id
                hgraph.rev_external_nodes[ext_id] = ident
            if root: 
                hgraph.roots.append(ident)
                
        def pop_and_transition():
            # Create all edges in a group from the stack, attach them to the 
            # graph and then transition to the appropriate state in the FSA
            edges = []
            while stack[-1][0] != PNODE: # Pop all edges
                children = []
                while stack[-1][0] == CNODE: # Pop all nodes in hyperedge
                    itemtype, node = stack.pop()
                    insert_node(node) 
                    children.append(node)
                assert stack[-1][0] == EDGE 
                itemtype, edgelabel = stack.pop()
                edges.append((edgelabel, children))
              
            # Construct the hyperedge 
            itemtype, parentnode = stack.pop()
            for edgelabel, children in edges: 
                hypertarget = [] # build hyperedge tail 
                for ident, label, ext_id in children:
                    hypertarget.append(ident) 
                hypertarget.reverse()
                hyperchild = tuple(hypertarget)    
                
                if "$" in edgelabel: # this is a nonterminal Edge 
                    new_edge = NonterminalLabel.from_string(edgelabel)
                    if not new_edge.index:
                        new_edge.index = "_%i" %self.nt_id_count
                        self.nt_id_count = self.nt_id_count + 1
                else: 
                    new_edge = edgelabel
                ident, label, ext_id = parentnode
                hgraph._add_triple(ident, new_edge, hyperchild) 
               
            if stack:
                insert_node(parentnode)
                stack.append((CNODE, parentnode))
                state = 4
            else:    
                insert_node(parentnode, root = True)
                state = 5

        # Parser transitions start here
        for typ, token, pos in self.lexer.lex(s):

            if state == 0:
                if typ == LexTypes.LPAR:
                    state = 1
                elif typ == LexTypes.NODE:
                    insert_node(self.parse_node(token), root=True)               
                    state = 5
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)
             
            elif state == 1: 
                if typ == LexTypes.NODE:
                    stack.append((PNODE, self.parse_node(token))) # Push head node
                    state = 2
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 2:
                if typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 4
                elif typ == LexTypes.NODE:
                    stack.append((EDGE, "")) # No edge specified, assume empty label
                    stack.append((CNODE, self.parse_node(token))) 
                    state = 3
                elif typ == LexTypes.LPAR:
                    stack.append((EDGE, "")) # No edge specified, assume empty label
                    state = 1
                elif typ == LexTypes.RPAR:
                    itemtype, node  = stack.pop()
                    assert itemtype == PNODE
                    if stack:
                        insert_node(node)
                        stack.append((CNODE, node))
                        state = 3
                    else:    
                        insert_node(node, root = True)
                        state = 5
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 3:
                if typ == LexTypes.RPAR: # Pop from stack and add edges
                    pop_and_transition(); 
                elif typ == LexTypes.NODE:
                    stack.append((CNODE, self.parse_node(token)))
                    state = 3
                elif typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 4
                elif typ == LexTypes.LPAR:
                    state = 1
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 4:
                if typ == LexTypes.LPAR:
                    state = 1
                elif typ == LexTypes.NODE:
                    stack.append((CNODE, self.parse_node(token))) 
                    state = 3
                elif typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                elif typ == LexTypes.RPAR: # Pop from stack and add edges
                    pop_and_transition(); 
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)
            
            elif state == 5:
                raise ParserError, "Unexpected token %s at position %i." % (token, pos)

        # Normalize external nodes
        new_ext_nodes = {}
        new_rev_ext_nodes = {}
        i = 0
        for node, index in sorted(hgraph.external_nodes.items(), key = lambda (n, i): i):
            new_ext_nodes[node] = i 
            new_rev_ext_nodes[i] = node
            i = i + 1       
 
        hgraph.external_nodes = new_ext_nodes
        hgraph.rev_external_nodes = new_rev_ext_nodes
        return hgraph
示例#5
0
    def parse_string(self, s, concepts=True):
        """
        Parse the string s and return a new abstract meaning representation.

        @concepts if True, method returns an L{Hgraph} object containing concept labels. 
        """

        PNODE = 1
        CNODE = 2
        EDGE = 3

        amr = Hgraph()
        stack = []
        state = 0

        #0, top leve
        #1, expecting source nodename
        #2, expecting concept name or edge label
        #3, lexpecting concept name
        #4, expecting edge label
        #5, expecting expression, node name or literal string, quantity or special symbol
        #6, expecting right paren or more target nodes
        #7, expecting right paren

        for type, token, pos in self.lexer.lex(s):

            if state == 0:
                if type == LexTypes.LPAR:
                    state = 1
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 1:
                if type == LexTypes.IDENTIFIER:
                    stack.append((PNODE, token, None))  # Push source node
                    state = 2
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 2:
                if type == LexTypes.SLASH:
                    state = 3
                elif type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5
                elif type == LexTypes.RPAR:
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    assert forgetme == PNODE
                    if parentnodelabel[0] == '@':
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    foo = amr[parentnodelabel]  # add only the node
                    if stack:
                        stack.append((CNODE, parentnodelabel, parentconcept))
                        state = 6
                    else:
                        amr.roots.append(parentnodelabel)
                        state = 0

                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 3:
                if type == LexTypes.IDENTIFIER:
                    assert stack[-1][0] == PNODE
                    nodelabel = stack.pop()[1]
                    stack.append(
                        (PNODE, nodelabel,
                         token))  # Push new source node with concept label
                    state = 4
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 4:
                if type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5
                elif type == LexTypes.RPAR:
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    assert forgetme == PNODE
                    if parentnodelabel[0] == '@':
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    foo = amr[parentnodelabel]  # add only the node
                    if concepts and (
                            not parentnodelabel in amr.node_to_concepts
                            or parentnodelabel is not None):
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if stack:
                        stack.append((CNODE, parentnodelabel, parentconcept))
                        state = 6
                    else:
                        amr.roots.append(parentnodelabel)
                        state = 0
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 5:
                if type == LexTypes.LPAR:
                    state = 1
                elif type == LexTypes.QUANTITY:
                    stack.append((CNODE, Quantity(token), None))
                    state = 6
                elif type == LexTypes.STRLITERAL:
                    stack.append((CNODE, StrLiteral(token[1:-1]), None))
                    state = 6
                elif type == LexTypes.LITERAL:
                    stack.append((CNODE, Literal(token[1:]), None))
                    state = 6
                elif type == LexTypes.IDENTIFIER:
                    stack.append(
                        (CNODE, token,
                         None))  # Push new source node with concept label
                    state = 6
                elif type == LexTypes.EDGELABEL:  # Unary edge
                    stack.append((CNODE, None, None))
                    stack.append((EDGE, token[1:]))
                    state = 5

                elif type == LexTypes.RPAR:  # Unary edge
                    stack.append((CNODE, None, None))
                    edges = []
                    while stack[-1][0] != PNODE:  # Pop all edges
                        children = []
                        while stack[-1][
                                0] == CNODE:  # Pop all external nodes for hyperedge
                            forgetme, childnodelabel, childconcept = stack.pop(
                            )
                            if childnodelabel is not None and childnodelabel[
                                    0] == '@':  #child is external node
                                childnodelabel = childnodelabel[1:]
                                amr.external_nodes.append(childnodelabel)
                            children.append((childnodelabel, childconcept))

                        assert stack[-1][0] == EDGE
                        forgetme, edgelabel = stack.pop()
                        edges.append((edgelabel, children))

                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    if concepts and (
                            not parentnodelabel in amr.node_to_concepts
                            or parentconcept is not None):
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if parentnodelabel[0] == '@':  #parent is external node
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    for edgelabel, children in edges:

                        hypertarget = []  # build hyperedge destination
                        for node, concept in children:
                            if node is not None:
                                if concepts and (
                                        not node in amr.node_to_concepts
                                        or concept is not None):
                                    amr.node_to_concepts[node] = concept
                                hypertarget.append(node)
                        hyperchild = tuple(hypertarget)

                        if edgelabel[0] == '#':  # this is a nonterminal Edge
                            edgelabel = NonterminalLabel(edgelabel[1:])

                        amr._add_triple(parentnodelabel, edgelabel, hyperchild)

                    if stack:
                        state = 6
                        stack.append((CNODE, parentnodelabel, parentconcept))
                    else:
                        state = 0
                        amr.roots.append(parentnodelabel)

                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 6:
                if type == LexTypes.RPAR:  # Pop from stack and add edges

                    edges = []

                    while stack[-1][0] != PNODE:  # Pop all edges
                        children = []
                        while stack[-1][
                                0] == CNODE:  # Pop all external nodes for hyperedge
                            forgetme, childnodelabel, childconcept = stack.pop(
                            )
                            if childnodelabel is not None and childnodelabel[
                                    0] == '@':  #child is external node
                                childnodelabel = childnodelabel[1:]
                                amr.external_nodes.append(childnodelabel)
                            children.append((childnodelabel, childconcept))

                        assert stack[-1][0] == EDGE
                        forgetme, edgelabel = stack.pop()
                        edges.append((edgelabel, children))

                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    if concepts and (
                            not parentnodelabel in amr.node_to_concepts
                            or parentconcept is not None):
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if parentnodelabel[0] == '@':  #parent is external node
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    for edgelabel, children in edges:

                        hypertarget = []  # build hyperedge destination
                        for node, concept in children:
                            if node is not None:
                                if concepts and (
                                        not node in amr.node_to_concepts
                                        or concept is not None):
                                    amr.node_to_concepts[node] = concept
                                hypertarget.append(node)
                        hyperchild = tuple(hypertarget)

                        if edgelabel[0] == '#':  # this is a nonterminal Edge
                            edgelabel = NonterminalLabel(edgelabel[1:])
                        amr._add_triple(parentnodelabel, edgelabel, hyperchild)

                    if stack:
                        state = 6
                        stack.append((CNODE, parentnodelabel, parentconcept))
                    else:
                        state = 0
                        amr.roots.append(parentnodelabel)

                elif type == LexTypes.COMMA:
                    state = 7

                elif type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5

                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 7:
                if type == LexTypes.IDENTIFIER:
                    stack.append(
                        (CNODE, token,
                         None))  # Push new source node with concept label
                    state = 6
                elif type == LexTypes.LPAR:
                    state = 1
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

        return amr
    def parse_string(self, s, concepts = True):
        """
        Parse the string s and return a new abstract meaning representation.

        @concepts if True, method returns an L{Hgraph} object containing concept labels. 
        """

        PNODE = 1
        CNODE = 2
        EDGE = 3

        amr = Hgraph()
        stack = []
        state = 0

        #0, top leve
        #1, expecting source nodename
        #2, expecting concept name or edge label
        #3, lexpecting concept name 
        #4, expecting edge label
        #5, expecting expression, node name or literal string, quantity or special symbol   
        #6, expecting right paren or more target nodes
        #7, expecting right paren

        for type, token, pos in self.lexer.lex(s):

            if state == 0:
                if type == LexTypes.LPAR:
                    state = 1
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 1:
                if type == LexTypes.IDENTIFIER:
                    stack.append((PNODE, token, None)) # Push source node
                    state = 2
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 2:
                if type == LexTypes.SLASH:
                    state = 3
                elif type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5
                elif type == LexTypes.RPAR:
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    assert forgetme == PNODE
                    if parentnodelabel[0] == '@': 
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    foo =  amr[parentnodelabel] # add only the node
                    if stack:
                        stack.append((CNODE, parentnodelabel, parentconcept))
                        state = 6
                    else:    
                        amr.roots.append(parentnodelabel)
                        state = 0

                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 3:
                if type == LexTypes.IDENTIFIER:
                    assert stack[-1][0] == PNODE
                    nodelabel = stack.pop()[1]
                    stack.append((PNODE, nodelabel, token)) # Push new source node with concept label
                    state = 4
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 4:
                if type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5
                elif type == LexTypes.RPAR:
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    assert forgetme == PNODE
                    if parentnodelabel[0] == '@': 
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    foo = amr[parentnodelabel] # add only the node
                    if concepts and (not parentnodelabel in amr.node_to_concepts or parentnodelabel is not None): 
                        amr.node_to_concepts[parentnodelabel] = parentconcept    
                    if stack: 
                        stack.append((CNODE, parentnodelabel, parentconcept))
                        state = 6
                    else:    
                        amr.roots.append(parentnodelabel)
                        state = 0
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 5:
                if type == LexTypes.LPAR:
                    state = 1
                elif type == LexTypes.QUANTITY:
                    stack.append((CNODE, Quantity(token), None))
                    state = 6
                elif type == LexTypes.STRLITERAL:
                    stack.append((CNODE, StrLiteral(token[1:-1]), None))
                    state = 6
                elif type == LexTypes.LITERAL:
                    stack.append((CNODE, Literal(token[1:]), None)) 
                    state = 6
                elif type == LexTypes.IDENTIFIER: 
                    stack.append((CNODE, token, None)) # Push new source node with concept label
                    state = 6
                elif type == LexTypes.EDGELABEL:  # Unary edge
                    stack.append((CNODE, None, None))
                    stack.append((EDGE, token[1:]))
                    state = 5
                        
                elif type == LexTypes.RPAR: # Unary edge
                    stack.append((CNODE, None, None))             
                    edges = []
                    while stack[-1][0] != PNODE: # Pop all edges
                        children = []
                        while stack[-1][0] == CNODE: # Pop all external nodes for hyperedge
                            forgetme, childnodelabel, childconcept = stack.pop()
                            if childnodelabel is not None and childnodelabel[0] == '@': #child is external node
                                childnodelabel = childnodelabel[1:]
                                amr.external_nodes.append(childnodelabel)
                            children.append((childnodelabel, childconcept))

                        assert stack[-1][0] == EDGE 
                        forgetme, edgelabel = stack.pop()
                        edges.append((edgelabel, children))
                   
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    if concepts and (not parentnodelabel in amr.node_to_concepts or parentconcept is not None): 
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if parentnodelabel[0] == '@': #parent is external node
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    for edgelabel, children in edges: 

                        hypertarget =[] # build hyperedge destination
                        for node, concept in children:
                            if node is not None:
                                if concepts and (not node in amr.node_to_concepts or concept is not None):
                                    amr.node_to_concepts[node] = concept
                                hypertarget.append(node) 
                        hyperchild = tuple(hypertarget)    
                        
                        if edgelabel[0] == '#': # this is a nonterminal Edge 
                            edgelabel = NonterminalLabel(edgelabel[1:])

                        amr._add_triple(parentnodelabel, edgelabel, hyperchild)

                    if stack:
                        state = 6
                        stack.append((CNODE, parentnodelabel, parentconcept))
                    else: 
                        state = 0 
                        amr.roots.append(parentnodelabel)
                     
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 6:
                if type == LexTypes.RPAR: # Pop from stack and add edges

                    edges = []
                    
                    while stack[-1][0] != PNODE: # Pop all edges
                        children = []
                        while stack[-1][0] == CNODE: # Pop all external nodes for hyperedge
                            forgetme, childnodelabel, childconcept = stack.pop()
                            if childnodelabel is not None and childnodelabel[0] == '@': #child is external node
                                childnodelabel = childnodelabel[1:]
                                amr.external_nodes.append(childnodelabel)
                            children.append((childnodelabel, childconcept))

                        assert stack[-1][0] == EDGE 
                        forgetme, edgelabel = stack.pop()
                        edges.append((edgelabel, children))
                   
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    if concepts and (not parentnodelabel in amr.node_to_concepts or parentconcept is not None): 
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if parentnodelabel[0] == '@': #parent is external node
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    for edgelabel, children in edges: 

                        hypertarget =[] # build hyperedge destination
                        for node, concept in children:
                            if node is not None: 
                                if concepts and (not node in amr.node_to_concepts or concept is not None):
                                    amr.node_to_concepts[node] = concept
                                hypertarget.append(node) 
                        hyperchild = tuple(hypertarget)    
                        
                        if edgelabel[0] == '#': # this is a nonterminal Edge 
                            edgelabel = NonterminalLabel(edgelabel[1:])
                        amr._add_triple(parentnodelabel, edgelabel, hyperchild)

                    if stack:
                        state = 6
                        stack.append((CNODE, parentnodelabel, parentconcept))
                    else: 
                        state = 0 
                        amr.roots.append(parentnodelabel)
                        
                elif type == LexTypes.COMMA:
                    state = 7

                elif type == LexTypes.EDGELABEL: 
                    stack.append((EDGE, token[1:]))
                    state = 5

                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 7: 
                if type == LexTypes.IDENTIFIER:
                    stack.append((CNODE, token, None)) # Push new source node with concept label
                    state = 6
                elif type== LexTypes.LPAR:
                    state = 1
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

        return amr
示例#7
0
def compute_smatch_batch(
    gold_filename, test_filename, starts, method, restart_threshold, concept_edges, precise, missing, detailed
):
    """
     Compute SMATCH on two files with pairwise AMRs, one-AMR-per-line. 
     """
    ps, rs, fs = [], [], []
    try:
        gold_file = open(gold_filename)
    except IOError:
        sys.stderr.write("ERROR: Could not open gold AMR file %s.\n" % gold_filename)
        sys.exit(1)
    try:
        test_file = open(test_filename)
    except IOError:
        sys.stderr.write("ERROR: Could not open test AMR file %s.\n" % test_filename)
        sys.exit(1)

    tiburonfailct = 0
    parsefailct = 0
    totalct = 0
    decodefailct = 0
    emptylinect = 0

    while True:
        gold = gold_file.readline()
        test = test_file.readline().strip()
        if not gold:  # EOF
            break
        gold = gold.strip()
        if not gold:
            sys.stderr.write("WARNING: Empty line in gold AMR file. Skipping entry.\n")
            continue
        totalct += 1
        if gold:
            try:
                if concept_edges:  # rebuild normal AMR with concepts attached to nodes.
                    amr_gold = Hgraph.from_string(gold)
                    amr_gold = Hgraph.from_concept_edge_labels(amr_gold)
                else:
                    amr_gold = Hgraph.from_string(gold)
                l = len(amr_gold.triples())
            except Exception as e:
                print >> sys.stderr, e
                sys.stderr.write("WARNING: Could not parse gold AMR. Skipping entry.\n")
                continue

            if test and not test.startswith("#"):
                try:
                    amr_test = Hgraph.from_string(test)
                    if concept_edges:  # rebuild normal AMR with concepts attached to nodes.
                        amr_test = Hgraph.from_concept_edge_labels(amr_test)
                    else:
                        amr_test = Hgraph.from_string(test)

                    if precise:
                        p, r, f = compute_smatch_precise(amr_gold, amr_test)
                    else:
                        p, r, f = compute_smatch_hill_climbing(
                            amr_gold, amr_test, starts=starts, method=method, restart_threshold=restart_threshold
                        )
                    if detailed:
                        print "P:%f R:%f F:%f " % (p, r, f)
                    else:
                        sys.stdout.write(".")
                        sys.stdout.flush()
                    ps.append((p, l))
                    rs.append((r, l))
                    fs.append((f, l))

                except pyparsing.ParseException:
                    parsefailct += 1
            else:
                if not missing:
                    rs.append((0.0, l))
                    ps.append((0.0, l))
                    fs.append((0.0, l))
        else:
            if test == "# Tiburon failed.":
                tiburonfailct += 1
            elif test == "# Decoding failed.":
                decodefailct += 1
            emptylinect += 1
            if not missing:
                rs.append((0.0, l))
                ps.append((0.0, l))
                fs.append((0.0, l))

    sys.stdout.write("\n")
    avgp = mean(ps)
    avgr = mean(rs)
    avgf = mean(fs)
    print "Total: %i\tFail(empty line): %i\tFail(invalid AMR): %i" % (totalct, emptylinect, parsefailct)
    print "MEAN SMATCH: P:%f R:%f F:%f " % (avgp, avgr, avgf)
示例#8
0
def compute_smatch_batch(gold_filename, test_filename, starts, method,
                         restart_threshold, concept_edges, precise, missing,
                         detailed):
    """
     Compute SMATCH on two files with pairwise AMRs, one-AMR-per-line. 
     """
    ps, rs, fs = [], [], []
    try:
        gold_file = open(gold_filename)
    except IOError:
        sys.stderr.write("ERROR: Could not open gold AMR file %s.\n" %
                         gold_filename)
        sys.exit(1)
    try:
        test_file = open(test_filename)
    except IOError:
        sys.stderr.write("ERROR: Could not open test AMR file %s.\n" %
                         test_filename)
        sys.exit(1)

    tiburonfailct = 0
    parsefailct = 0
    totalct = 0
    decodefailct = 0
    emptylinect = 0

    while True:
        gold = gold_file.readline()
        test = test_file.readline().strip()
        if not gold:  # EOF
            break
        gold = gold.strip()
        if not gold:
            sys.stderr.write(
                "WARNING: Empty line in gold AMR file. Skipping entry.\n")
            continue
        totalct += 1
        if gold:
            try:
                if concept_edges:  # rebuild normal AMR with concepts attached to nodes.
                    amr_gold = Hgraph.from_string(gold)
                    amr_gold = Hgraph.from_concept_edge_labels(amr_gold)
                else:
                    amr_gold = Hgraph.from_string(gold)
                l = len(amr_gold.triples())
            except Exception as e:
                print >> sys.stderr, e
                sys.stderr.write(
                    "WARNING: Could not parse gold AMR. Skipping entry.\n")
                continue

            if test and not test.startswith("#"):
                try:
                    amr_test = Hgraph.from_string(test)
                    if concept_edges:  # rebuild normal AMR with concepts attached to nodes.
                        amr_test = Hgraph.from_concept_edge_labels(amr_test)
                    else:
                        amr_test = Hgraph.from_string(test)

                    if precise:
                        p, r, f = compute_smatch_precise(amr_gold, amr_test)
                    else:
                        p, r, f = compute_smatch_hill_climbing(
                            amr_gold,
                            amr_test,
                            starts=starts,
                            method=method,
                            restart_threshold=restart_threshold)
                    if detailed:
                        print "P:%f R:%f F:%f " % (p, r, f)
                    else:
                        sys.stdout.write(".")
                        sys.stdout.flush()
                    ps.append((p, l))
                    rs.append((r, l))
                    fs.append((f, l))

                except pyparsing.ParseException:
                    parsefailct += 1
            else:
                if not missing:
                    rs.append((0.0, l))
                    ps.append((0.0, l))
                    fs.append((0.0, l))
        else:
            if test == "# Tiburon failed.":
                tiburonfailct += 1
            elif test == "# Decoding failed.":
                decodefailct += 1
            emptylinect += 1
            if not missing:
                rs.append((0.0, l))
                ps.append((0.0, l))
                fs.append((0.0, l))

    sys.stdout.write("\n")
    avgp = mean(ps)
    avgr = mean(rs)
    avgf = mean(fs)
    print "Total: %i\tFail(empty line): %i\tFail(invalid AMR): %i" % (
        totalct, emptylinect, parsefailct)
    print "MEAN SMATCH: P:%f R:%f F:%f " % (avgp, avgr, avgf)