class SimpleDependencyExampleBuilder2(ExampleBuilder):
    """
    Builds examples based on parse dependencies. An example is generated for each dependency. 
    If there is an annotated interaction edge between those tokens, then the example is positive,
    otherwise negative.
    """
    def __init__(self):
        ExampleBuilder.__init__(self)
        self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
        
    def buildExamples(self, sentenceGraph):
        examples = []
        exampleIndex = 0
        dependencyEdges = sentenceGraph.dependencyGraph.edges()
        # Loop through all the dependencies in the sentence
        for depEdge in dependencyEdges:
            # Ignore dependencies that do not connect annotated entities
#            if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (sentenceGraph.tokenIsEntityHead[depEdge[1]] == None):
#                continue
            # Dependencies that have a corresponding interaction edge (direction is ignored) are the positive cases
            if sentenceGraph.interactionGraph.has_edge(depEdge[0], depEdge[1]) or sentenceGraph.interactionGraph.has_edge(depEdge[1], depEdge[0]):
                category = 1
            else:
                category = -1
            # Generate features for the edge
            features = self.buildFeatures(depEdge,sentenceGraph)
            # Define extra attributes f.e. for the visualizer
            if int(depEdge[0].attrib["id"].split("_")[-1]) < int(depEdge[1].attrib["id"].split("_")[-1]):
                extra = {"xtype":"edge","type":"i","t1":depEdge[0],"t2":depEdge[1]}
            else:
                extra = {"xtype":"edge","type":"i","t1":depEdge[1],"t2":depEdge[0]}
            examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) )
            exampleIndex += 1
        return examples

    def buildFeatures(self, depEdge, sentenceGraph):
        features = {}
        self.featureBuilder.setFeatureVector(features)
        self.featureBuilder.buildEdgeFeatures(depEdge, sentenceGraph, "dep_", text=True, POS=True, annType=True, maskNames=True)
        self.featureBuilder.buildAttachedEdgeFeatures(depEdge, sentenceGraph, "", text=False, POS=True, annType=False, maskNames=True)       
        self.featureBuilder.buildLinearOrderFeatures(depEdge)
        self.featureBuilder.setFeatureVector(None)
        return features
示例#2
0
class SingleEdgeExampleBuilder(ExampleBuilder):
    """
    Builds examples based on parse dependencies. An example is generated for each dependency. 
    If there is an annotated interaction edge between those tokens, then the example is positive,
    otherwise negative. Optionally examples can be generated only between tokens that are heads
    of entities.
    """
    def __init__(self, style):
        ExampleBuilder.__init__(self)
        self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
        self.style = style
        if not "binary" in style:
            self.classSet = IdSet(1)
            assert (self.classSet.getId("neg") == 1)

    def buildExamples(self, sentenceGraph):
        examples = []
        exampleIndex = 0
        dependencyEdges = sentenceGraph.dependencyGraph.edges()
        for depEdge in dependencyEdges:
            if "headsOnly" in self.style:
                if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (
                        sentenceGraph.tokenIsEntityHead[depEdge[1]] == None):
                    continue

            edgeFound = False
            if sentenceGraph.interactionGraph.has_edge(depEdge[0], depEdge[1]):
                intEdges = sentenceGraph.interactionGraph.get_edge(
                    depEdge[0], depEdge[1])
                for intEdge in intEdges:
                    examples.append(
                        self.buildExample(depEdge, intEdge, False,
                                          exampleIndex, sentenceGraph))
                    exampleIndex += 1
                    edgeFound = True
            elif "directed" in self.style:
                examples.append(
                    self.buildExample(depEdge, None, None, exampleIndex,
                                      sentenceGraph))
                exampleIndex += 1
            if sentenceGraph.interactionGraph.has_edge(depEdge[1], depEdge[0]):
                intEdges = sentenceGraph.interactionGraph.get_edge(
                    depEdge[1], depEdge[0])
                for intEdge in intEdges:
                    examples.append(
                        self.buildExample(depEdge, intEdge, True, exampleIndex,
                                          sentenceGraph))
                    exampleIndex += 1
                    edgeFound = True
            elif "directed" in self.style:
                examples.append(
                    self.buildExample(depEdge, None, None, exampleIndex,
                                      sentenceGraph))
                exampleIndex += 1

            if (not edgeFound) and (not "directed" in self.style):
                examples.append(
                    self.buildExample(depEdge, None, None, exampleIndex,
                                      sentenceGraph))
                exampleIndex += 1

        return examples

    def buildExample(self, depEdge, intEdge, isReverse, exampleIndex,
                     sentenceGraph):
        if "binary" in self.style:
            categoryName = "i"
            if intEdge != None:
                category = 1
            else:
                category = -1
        else:
            if intEdge != None:
                categoryName = intEdge.attrib["type"]
                if isReverse and "directed" in self.style:
                    categoryName += "_rev"
                category = self.classSet.getId(categoryName)
            else:
                categoryName = "neg"
                category = 1

        features = self.buildFeatures(depEdge, sentenceGraph)

        # Define extra attributes f.e. for the visualizer
        if int(depEdge[0].attrib["id"].split("_")[-1]) < int(
                depEdge[1].attrib["id"].split("_")[-1]):
            extra = {
                "xtype": "edge",
                "type": categoryName,
                "t1": depEdge[0],
                "t2": depEdge[1]
            }
            extra["deprev"] = False
        else:
            extra = {
                "xtype": "edge",
                "type": categoryName,
                "t1": depEdge[1],
                "t2": depEdge[0]
            }
            extra["deprev"] = True
        return (sentenceGraph.getSentenceId() + ".x" + str(exampleIndex),
                category, features, extra)

    def buildFeatures(self, depEdge, sentenceGraph):
        features = {}
        self.featureBuilder.setFeatureVector(features)
        self.featureBuilder.buildEdgeFeatures(depEdge,
                                              sentenceGraph,
                                              "dep_",
                                              text=True,
                                              POS=True,
                                              annType=True,
                                              maskNames=True)
        self.featureBuilder.buildAttachedEdgeFeatures(depEdge,
                                                      sentenceGraph,
                                                      "",
                                                      text=False,
                                                      POS=True,
                                                      annType=False,
                                                      maskNames=True)
        self.featureBuilder.buildLinearOrderFeatures(depEdge)
        self.featureBuilder.setFeatureVector(None)
        return features
示例#3
0
class GeneralEntityRecognizer(ExampleBuilder):
    
    def __init__(self):
        ExampleBuilder.__init__(self)
        self.edgeFeatureBuilder = EdgeFeatureBuilder(self.featureSet)
        self.entityFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        
    def buildExamples(self, sentenceGraph, exampleIndex = 0):
        examples = []
        #exampleIndex = 0
        
        namedEntityCount = 0
        for i in range(len(sentenceGraph.tokens)):
            token = sentenceGraph.tokens[i]
            if sentenceGraph.tokenIsName[token]:
                namedEntityCount += 1
        for i in range(len(sentenceGraph.tokens)):
            token = sentenceGraph.tokens[i]
            # Recognize only non-named entities (i.e. interaction words)
            if sentenceGraph.tokenIsName[token]:
                continue
            
            if sentenceGraph.tokenIsEntityHead[token] != None:
            # CLASS
                category = 1
            else:
                category = -1
            
            # FEATURES
            features = {}
            # Main features
            textUpper = token.get("text")
            text = textUpper.lower()
            features[self.featureSet.getId("txt_"+text)] = 1
            features[self.featureSet.getId("POS_"+token.get("POS"))] = 1
            stem = PorterStemmer.stem(text)
            features[self.featureSet.getId("stem_"+stem)] = 1
            features[self.featureSet.getId("nonstem_"+text[len(stem):])] = 1
            # Dictionary features
            if text in intWords:
                features[self.featureSet.getId("dict")] = 1
                features[self.featureSet.getId("dict_def_"+wordDict[text])]=1
            # Named entity count
            features[self.featureSet.getId("neCount")] = namedEntityCount
            # Linear order features
            self.entityFeatureBuilder.setFeatureVector(features)
            self.entityFeatureBuilder.buildLinearOrderFeatures(i, sentenceGraph, 3, 3 )
            # Content
            self.entityFeatureBuilder.buildContentFeatures(i, textUpper, duplets=True, triplets=True)
            self.entityFeatureBuilder.setFeatureVector(None)
            # Attached edges
            self.edgeFeatureBuilder.setFeatureVector(features)
            t1InEdges = sentenceGraph.dependencyGraph.in_edges(token)
            for edge in t1InEdges:
                self.edgeFeatureBuilder.buildEdgeFeatures(edge, sentenceGraph, "in_", text=True, POS=True, annType=False, maskNames=True)
#                l2Edges = sentenceGraph.dependencyGraph.in_edges(edge[0])
#                for e2 in l2Edges:
#                    self.featureBuilder.buildEdgeFeatures(edge, sentenceGraph, "in2_", text=True, POS=True, annType=False, maskNames=True)
#                l2Edges = sentenceGraph.dependencyGraph.out_edges(edge[0])
#                for e2 in l2Edges:
#                    self.featureBuilder.buildEdgeFeatures(edge, sentenceGraph, "in2_", text=True, POS=True, annType=False, maskNames=True)
                #self.featureBuilder.buildAttachedEdgeFeatures(edge, sentenceGraph, "in_att_", text=True, POS=True, annType=False, maskNames=True)       
                #self.featureBuilder.buildLinearOrderFeatures(edge)
            t1OutEdges = sentenceGraph.dependencyGraph.out_edges(token)
            for edge in t1OutEdges:
                self.edgeFeatureBuilder.buildEdgeFeatures(edge, sentenceGraph, "out_", text=True, POS=True, annType=False, maskNames=True)
#                l2Edges = sentenceGraph.dependencyGraph.in_edges(edge[1])
#                for e2 in l2Edges:
#                    self.featureBuilder.buildEdgeFeatures(edge, sentenceGraph, "out2_", text=True, POS=True, annType=False, maskNames=True)
#                l2Edges = sentenceGraph.dependencyGraph.out_edges(edge[1])
#                for e2 in l2Edges:
#                    self.featureBuilder.buildEdgeFeatures(edge, sentenceGraph, "out2_", text=True, POS=True, annType=False, maskNames=True)
                #self.featureBuilder.buildAttachedEdgeFeatures(edge, sentenceGraph, "out_att_", text=True, POS=True, annType=False, maskNames=True)       
                #self.featureBuilder.buildLinearOrderFeatures(edge)
            self.edgeFeatureBuilder.setFeatureVector(None)
             
            extra = {"xtype":"token","t":token}
            examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) )
            exampleIndex += 1
        return examples
class SingleEdgeExampleBuilder(ExampleBuilder):
    """
    Builds examples based on parse dependencies. An example is generated for each dependency. 
    If there is an annotated interaction edge between those tokens, then the example is positive,
    otherwise negative. Optionally examples can be generated only between tokens that are heads
    of entities.
    """
    def __init__(self, style):
        ExampleBuilder.__init__(self)
        self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
        self.style = style
        if not "binary" in style:
            self.classSet = IdSet(1)
            assert( self.classSet.getId("neg") == 1 )
        
    def buildExamples(self, sentenceGraph):
        examples = []
        exampleIndex = 0
        dependencyEdges = sentenceGraph.dependencyGraph.edges()
        for depEdge in dependencyEdges:
            if "headsOnly" in self.style:
                if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (sentenceGraph.tokenIsEntityHead[depEdge[1]] == None):
                    continue
            
            edgeFound = False
            if sentenceGraph.interactionGraph.has_edge(depEdge[0], depEdge[1]):
                intEdges = sentenceGraph.interactionGraph.get_edge(depEdge[0], depEdge[1])
                for intEdge in intEdges:
                    examples.append( self.buildExample(depEdge, intEdge, False, exampleIndex, sentenceGraph) )
                    exampleIndex += 1
                    edgeFound = True
            elif "directed" in self.style:
                examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph) )
                exampleIndex += 1
            if sentenceGraph.interactionGraph.has_edge(depEdge[1], depEdge[0]):
                intEdges = sentenceGraph.interactionGraph.get_edge(depEdge[1], depEdge[0])
                for intEdge in intEdges:
                    examples.append( self.buildExample(depEdge, intEdge, True, exampleIndex, sentenceGraph) )
                    exampleIndex += 1
                    edgeFound = True
            elif "directed" in self.style:
                examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph) )
                exampleIndex += 1
            
            if (not edgeFound) and (not "directed" in self.style):
                examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph) )
                exampleIndex += 1

        return examples
    
    def buildExample(self, depEdge, intEdge, isReverse, exampleIndex, sentenceGraph):
        if "binary" in self.style:
            categoryName = "i"
            if intEdge != None:
                category = 1
            else:
                category = -1
        else:
            if intEdge != None:
                categoryName = intEdge.attrib["type"]
                if isReverse and "directed" in self.style:
                    categoryName += "_rev"
                category = self.classSet.getId(categoryName)
            else:
                categoryName = "neg"
                category = 1
        
        features = self.buildFeatures(depEdge,sentenceGraph)

        # Define extra attributes f.e. for the visualizer
        if int(depEdge[0].attrib["id"].split("_")[-1]) < int(depEdge[1].attrib["id"].split("_")[-1]):
            extra = {"xtype":"edge","type":categoryName,"t1":depEdge[0],"t2":depEdge[1]}
            extra["deprev"] = False
        else:
            extra = {"xtype":"edge","type":categoryName,"t1":depEdge[1],"t2":depEdge[0]}
            extra["deprev"] = True
        return (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra)

    def buildFeatures(self, depEdge, sentenceGraph):
        features = {}
        self.featureBuilder.setFeatureVector(features)
        self.featureBuilder.buildEdgeFeatures(depEdge, sentenceGraph, "dep_", text=True, POS=True, annType=True, maskNames=True)
        self.featureBuilder.buildAttachedEdgeFeatures(depEdge, sentenceGraph, "", text=False, POS=True, annType=False, maskNames=True)       
        self.featureBuilder.buildLinearOrderFeatures(depEdge)
        self.featureBuilder.setFeatureVector(None)
        return features
class SingleDependencyTypeExampleBuilder(ExampleBuilder):
    def __init__(self):
        ExampleBuilder.__init__(self)
        self.classSet = IdSet(1)
        assert( self.classSet.getId("neg") == 1 )
        self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
        
    def buildExamples(self, sentenceGraph):
        examples = []
        exampleIndex = 0
        dependencyEdges = sentenceGraph.dependencyGraph.edges()
        for depEdge in dependencyEdges:
            if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (sentenceGraph.tokenIsEntityHead[depEdge[1]] == None):
                continue
            
            if sentenceGraph.interactionGraph.has_edge(depEdge[0], depEdge[1]):
                intEdges = sentenceGraph.interactionGraph.get_edge(depEdge[0], depEdge[1])
                for intEdge in intEdges:
                    examples.append( self.buildExample(depEdge, intEdge, False, exampleIndex, sentenceGraph) )
                    exampleIndex += 1
            elif sentenceGraph.interactionGraph.has_edge(depEdge[1], depEdge[0]):
                intEdges = sentenceGraph.interactionGraph.get_edge(depEdge[1], depEdge[0])
                for intEdge in intEdges:
                    examples.append( self.buildExample(depEdge, intEdge, True, exampleIndex, sentenceGraph) )
                    exampleIndex += 1
            else:
                examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph) )
                exampleIndex += 1

        return examples
    
    def buildExample(self, depEdge, intEdge, isReverse, exampleIndex, sentenceGraph):
        if intEdge != None:
            categoryName = intEdge.attrib["type"]
            if isReverse:
                categoryName += "_rev"
            #categoryName += ">"
            #categoryName = "<" + categoryName
            category = self.classSet.getId(categoryName)
        else:
            categoryName = "neg"
            category = 1
        
        features = self.buildFeatures(depEdge,sentenceGraph)

        # Define extra attributes f.e. for the visualizer
        if int(depEdge[0].attrib["id"].split("_")[-1]) < int(depEdge[1].attrib["id"].split("_")[-1]):
            extra = {"xtype":"edge","type":categoryName,"t1":depEdge[0],"t2":depEdge[1]}
            extra["deprev"] = False
        else:
            extra = {"xtype":"edge","type":categoryName,"t1":depEdge[1],"t2":depEdge[0]}
            extra["deprev"] = True
        return (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra)

    def buildFeatures(self, depEdge, sentenceGraph):
        features = {}
        self.featureBuilder.setFeatureVector(features)
        self.featureBuilder.buildEdgeFeatures(depEdge, sentenceGraph, "dep_", text=True, POS=True, annType=True, maskNames=True)
        
        # Attached edges
        self.featureBuilder.buildAttachedEdgeFeatures(depEdge, sentenceGraph, "", text=False, POS=True, annType=False, maskNames=True)               
#        t1InEdges = sentenceGraph.dependencyGraph.in_edges(depEdge[0])
#        for edge in t1InEdges:
#            features[self.featureSet.getId("t1HangingIn_"+edge[2].attrib["type"])] = 1
#            features[self.featureSet.getId("t1HangingIn_"+edge[0].attrib["POS"])] = 1
#            self.addType(edge[0], features, sentenceGraph, prefix="t1HangingInAnn_")
#            #features[self.featureSet.getId("t1HangingIn_"+sentenceGraph.getTokenText(edge[0]))] = 1
#        t1OutEdges = sentenceGraph.dependencyGraph.out_edges(depEdge[0])
#        for edge in t1OutEdges:
#            features[self.featureSet.getId("t1HangingOut_"+edge[2].attrib["type"])] = 1
#            features[self.featureSet.getId("t1HangingOut_"+edge[1].attrib["POS"])] = 1
#            self.addType(edge[1], features, sentenceGraph, prefix="t1HangingOutAnn_")
#            #features[self.featureSet.getId("t1HangingOut_"+sentenceGraph.getTokenText(edge[1]))] = 1
#        
#        t2InEdges = sentenceGraph.dependencyGraph.in_edges(depEdge[1])
#        for edge in t2InEdges:
#            features[self.featureSet.getId("t2HangingIn_"+edge[2].attrib["type"])] = 1
#            features[self.featureSet.getId("t2HangingIn_"+edge[0].attrib["POS"])] = 1
#            self.addType(edge[0], features, sentenceGraph, prefix="t2HangingInAnn_")
#            #features[self.featureSet.getId("t2HangingIn_"+sentenceGraph.getTokenText(edge[0]))] = 1
#        t2OutEdges = sentenceGraph.dependencyGraph.out_edges(depEdge[1])
#       for edge in t2OutEdges:
#            features[self.featureSet.getId("t2HangingOut_"+edge[2].attrib["type"])] = 1
#            features[self.featureSet.getId("t2HangingOut_"+edge[1].attrib["POS"])] = 1
#            self.addType(edge[1], features, sentenceGraph, prefix="t2HangingOutAnn_")
#            #features[self.featureSet.getId("t2HangingOut_"+sentenceGraph.getTokenText(edge[1]))] = 1
        
        # Linear order
        self.featureBuilder.buildLinearOrderFeatures(depEdge)
        self.featureBuilder.setFeatureVector(None)
        return features
class SimpleDependencyExampleBuilder2(ExampleBuilder):
    """
    Builds examples based on parse dependencies. An example is generated for each dependency. 
    If there is an annotated interaction edge between those tokens, then the example is positive,
    otherwise negative.
    """
    def __init__(self):
        ExampleBuilder.__init__(self)
        self.featureBuilder = EdgeFeatureBuilder(self.featureSet)

    def buildExamples(self, sentenceGraph):
        examples = []
        exampleIndex = 0
        dependencyEdges = sentenceGraph.dependencyGraph.edges()
        # Loop through all the dependencies in the sentence
        for depEdge in dependencyEdges:
            # Ignore dependencies that do not connect annotated entities
            #            if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (sentenceGraph.tokenIsEntityHead[depEdge[1]] == None):
            #                continue
            # Dependencies that have a corresponding interaction edge (direction is ignored) are the positive cases
            if sentenceGraph.interactionGraph.has_edge(
                    depEdge[0],
                    depEdge[1]) or sentenceGraph.interactionGraph.has_edge(
                        depEdge[1], depEdge[0]):
                category = 1
            else:
                category = -1
            # Generate features for the edge
            features = self.buildFeatures(depEdge, sentenceGraph)
            # Define extra attributes f.e. for the visualizer
            if int(depEdge[0].attrib["id"].split("_")[-1]) < int(
                    depEdge[1].attrib["id"].split("_")[-1]):
                extra = {
                    "xtype": "edge",
                    "type": "i",
                    "t1": depEdge[0],
                    "t2": depEdge[1]
                }
            else:
                extra = {
                    "xtype": "edge",
                    "type": "i",
                    "t1": depEdge[1],
                    "t2": depEdge[0]
                }
            examples.append(
                (sentenceGraph.getSentenceId() + ".x" + str(exampleIndex),
                 category, features, extra))
            exampleIndex += 1
        return examples

    def buildFeatures(self, depEdge, sentenceGraph):
        features = {}
        self.featureBuilder.setFeatureVector(features)
        self.featureBuilder.buildEdgeFeatures(depEdge,
                                              sentenceGraph,
                                              "dep_",
                                              text=True,
                                              POS=True,
                                              annType=True,
                                              maskNames=True)
        self.featureBuilder.buildAttachedEdgeFeatures(depEdge,
                                                      sentenceGraph,
                                                      "",
                                                      text=False,
                                                      POS=True,
                                                      annType=False,
                                                      maskNames=True)
        self.featureBuilder.buildLinearOrderFeatures(depEdge)
        self.featureBuilder.setFeatureVector(None)
        return features