class SimpleDependencyExampleBuilder2(ExampleBuilder): """ Builds examples based on parse dependencies. An example is generated for each dependency. If there is an annotated interaction edge between those tokens, then the example is positive, otherwise negative. """ def __init__(self): ExampleBuilder.__init__(self) self.featureBuilder = EdgeFeatureBuilder(self.featureSet) def buildExamples(self, sentenceGraph): examples = [] exampleIndex = 0 dependencyEdges = sentenceGraph.dependencyGraph.edges() # Loop through all the dependencies in the sentence for depEdge in dependencyEdges: # Ignore dependencies that do not connect annotated entities # if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (sentenceGraph.tokenIsEntityHead[depEdge[1]] == None): # continue # Dependencies that have a corresponding interaction edge (direction is ignored) are the positive cases if sentenceGraph.interactionGraph.has_edge(depEdge[0], depEdge[1]) or sentenceGraph.interactionGraph.has_edge(depEdge[1], depEdge[0]): category = 1 else: category = -1 # Generate features for the edge features = self.buildFeatures(depEdge,sentenceGraph) # Define extra attributes f.e. for the visualizer if int(depEdge[0].attrib["id"].split("_")[-1]) < int(depEdge[1].attrib["id"].split("_")[-1]): extra = {"xtype":"edge","type":"i","t1":depEdge[0],"t2":depEdge[1]} else: extra = {"xtype":"edge","type":"i","t1":depEdge[1],"t2":depEdge[0]} examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) ) exampleIndex += 1 return examples def buildFeatures(self, depEdge, sentenceGraph): features = {} self.featureBuilder.setFeatureVector(features) self.featureBuilder.buildEdgeFeatures(depEdge, sentenceGraph, "dep_", text=True, POS=True, annType=True, maskNames=True) self.featureBuilder.buildAttachedEdgeFeatures(depEdge, sentenceGraph, "", text=False, POS=True, annType=False, maskNames=True) self.featureBuilder.buildLinearOrderFeatures(depEdge) self.featureBuilder.setFeatureVector(None) return features
class SingleEdgeExampleBuilder(ExampleBuilder): """ Builds examples based on parse dependencies. An example is generated for each dependency. If there is an annotated interaction edge between those tokens, then the example is positive, otherwise negative. Optionally examples can be generated only between tokens that are heads of entities. """ def __init__(self, style): ExampleBuilder.__init__(self) self.featureBuilder = EdgeFeatureBuilder(self.featureSet) self.style = style if not "binary" in style: self.classSet = IdSet(1) assert (self.classSet.getId("neg") == 1) def buildExamples(self, sentenceGraph): examples = [] exampleIndex = 0 dependencyEdges = sentenceGraph.dependencyGraph.edges() for depEdge in dependencyEdges: if "headsOnly" in self.style: if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or ( sentenceGraph.tokenIsEntityHead[depEdge[1]] == None): continue edgeFound = False if sentenceGraph.interactionGraph.has_edge(depEdge[0], depEdge[1]): intEdges = sentenceGraph.interactionGraph.get_edge( depEdge[0], depEdge[1]) for intEdge in intEdges: examples.append( self.buildExample(depEdge, intEdge, False, exampleIndex, sentenceGraph)) exampleIndex += 1 edgeFound = True elif "directed" in self.style: examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph)) exampleIndex += 1 if sentenceGraph.interactionGraph.has_edge(depEdge[1], depEdge[0]): intEdges = sentenceGraph.interactionGraph.get_edge( depEdge[1], depEdge[0]) for intEdge in intEdges: examples.append( self.buildExample(depEdge, intEdge, True, exampleIndex, sentenceGraph)) exampleIndex += 1 edgeFound = True elif "directed" in self.style: examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph)) exampleIndex += 1 if (not edgeFound) and (not "directed" in self.style): examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph)) exampleIndex += 1 return examples def buildExample(self, depEdge, intEdge, isReverse, exampleIndex, sentenceGraph): if "binary" in self.style: categoryName = "i" if intEdge != None: category = 1 else: category = -1 else: if intEdge != None: categoryName = intEdge.attrib["type"] if isReverse and "directed" in self.style: categoryName += "_rev" category = self.classSet.getId(categoryName) else: categoryName = "neg" category = 1 features = self.buildFeatures(depEdge, sentenceGraph) # Define extra attributes f.e. for the visualizer if int(depEdge[0].attrib["id"].split("_")[-1]) < int( depEdge[1].attrib["id"].split("_")[-1]): extra = { "xtype": "edge", "type": categoryName, "t1": depEdge[0], "t2": depEdge[1] } extra["deprev"] = False else: extra = { "xtype": "edge", "type": categoryName, "t1": depEdge[1], "t2": depEdge[0] } extra["deprev"] = True return (sentenceGraph.getSentenceId() + ".x" + str(exampleIndex), category, features, extra) def buildFeatures(self, depEdge, sentenceGraph): features = {} self.featureBuilder.setFeatureVector(features) self.featureBuilder.buildEdgeFeatures(depEdge, sentenceGraph, "dep_", text=True, POS=True, annType=True, maskNames=True) self.featureBuilder.buildAttachedEdgeFeatures(depEdge, sentenceGraph, "", text=False, POS=True, annType=False, maskNames=True) self.featureBuilder.buildLinearOrderFeatures(depEdge) self.featureBuilder.setFeatureVector(None) return features
class GeneralEntityRecognizer(ExampleBuilder): def __init__(self): ExampleBuilder.__init__(self) self.edgeFeatureBuilder = EdgeFeatureBuilder(self.featureSet) self.entityFeatureBuilder = TokenFeatureBuilder(self.featureSet) def buildExamples(self, sentenceGraph, exampleIndex = 0): examples = [] #exampleIndex = 0 namedEntityCount = 0 for i in range(len(sentenceGraph.tokens)): token = sentenceGraph.tokens[i] if sentenceGraph.tokenIsName[token]: namedEntityCount += 1 for i in range(len(sentenceGraph.tokens)): token = sentenceGraph.tokens[i] # Recognize only non-named entities (i.e. interaction words) if sentenceGraph.tokenIsName[token]: continue if sentenceGraph.tokenIsEntityHead[token] != None: # CLASS category = 1 else: category = -1 # FEATURES features = {} # Main features textUpper = token.get("text") text = textUpper.lower() features[self.featureSet.getId("txt_"+text)] = 1 features[self.featureSet.getId("POS_"+token.get("POS"))] = 1 stem = PorterStemmer.stem(text) features[self.featureSet.getId("stem_"+stem)] = 1 features[self.featureSet.getId("nonstem_"+text[len(stem):])] = 1 # Dictionary features if text in intWords: features[self.featureSet.getId("dict")] = 1 features[self.featureSet.getId("dict_def_"+wordDict[text])]=1 # Named entity count features[self.featureSet.getId("neCount")] = namedEntityCount # Linear order features self.entityFeatureBuilder.setFeatureVector(features) self.entityFeatureBuilder.buildLinearOrderFeatures(i, sentenceGraph, 3, 3 ) # Content self.entityFeatureBuilder.buildContentFeatures(i, textUpper, duplets=True, triplets=True) self.entityFeatureBuilder.setFeatureVector(None) # Attached edges self.edgeFeatureBuilder.setFeatureVector(features) t1InEdges = sentenceGraph.dependencyGraph.in_edges(token) for edge in t1InEdges: self.edgeFeatureBuilder.buildEdgeFeatures(edge, sentenceGraph, "in_", text=True, POS=True, annType=False, maskNames=True) # l2Edges = sentenceGraph.dependencyGraph.in_edges(edge[0]) # for e2 in l2Edges: # self.featureBuilder.buildEdgeFeatures(edge, sentenceGraph, "in2_", text=True, POS=True, annType=False, maskNames=True) # l2Edges = sentenceGraph.dependencyGraph.out_edges(edge[0]) # for e2 in l2Edges: # self.featureBuilder.buildEdgeFeatures(edge, sentenceGraph, "in2_", text=True, POS=True, annType=False, maskNames=True) #self.featureBuilder.buildAttachedEdgeFeatures(edge, sentenceGraph, "in_att_", text=True, POS=True, annType=False, maskNames=True) #self.featureBuilder.buildLinearOrderFeatures(edge) t1OutEdges = sentenceGraph.dependencyGraph.out_edges(token) for edge in t1OutEdges: self.edgeFeatureBuilder.buildEdgeFeatures(edge, sentenceGraph, "out_", text=True, POS=True, annType=False, maskNames=True) # l2Edges = sentenceGraph.dependencyGraph.in_edges(edge[1]) # for e2 in l2Edges: # self.featureBuilder.buildEdgeFeatures(edge, sentenceGraph, "out2_", text=True, POS=True, annType=False, maskNames=True) # l2Edges = sentenceGraph.dependencyGraph.out_edges(edge[1]) # for e2 in l2Edges: # self.featureBuilder.buildEdgeFeatures(edge, sentenceGraph, "out2_", text=True, POS=True, annType=False, maskNames=True) #self.featureBuilder.buildAttachedEdgeFeatures(edge, sentenceGraph, "out_att_", text=True, POS=True, annType=False, maskNames=True) #self.featureBuilder.buildLinearOrderFeatures(edge) self.edgeFeatureBuilder.setFeatureVector(None) extra = {"xtype":"token","t":token} examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) ) exampleIndex += 1 return examples
class SingleEdgeExampleBuilder(ExampleBuilder): """ Builds examples based on parse dependencies. An example is generated for each dependency. If there is an annotated interaction edge between those tokens, then the example is positive, otherwise negative. Optionally examples can be generated only between tokens that are heads of entities. """ def __init__(self, style): ExampleBuilder.__init__(self) self.featureBuilder = EdgeFeatureBuilder(self.featureSet) self.style = style if not "binary" in style: self.classSet = IdSet(1) assert( self.classSet.getId("neg") == 1 ) def buildExamples(self, sentenceGraph): examples = [] exampleIndex = 0 dependencyEdges = sentenceGraph.dependencyGraph.edges() for depEdge in dependencyEdges: if "headsOnly" in self.style: if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (sentenceGraph.tokenIsEntityHead[depEdge[1]] == None): continue edgeFound = False if sentenceGraph.interactionGraph.has_edge(depEdge[0], depEdge[1]): intEdges = sentenceGraph.interactionGraph.get_edge(depEdge[0], depEdge[1]) for intEdge in intEdges: examples.append( self.buildExample(depEdge, intEdge, False, exampleIndex, sentenceGraph) ) exampleIndex += 1 edgeFound = True elif "directed" in self.style: examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph) ) exampleIndex += 1 if sentenceGraph.interactionGraph.has_edge(depEdge[1], depEdge[0]): intEdges = sentenceGraph.interactionGraph.get_edge(depEdge[1], depEdge[0]) for intEdge in intEdges: examples.append( self.buildExample(depEdge, intEdge, True, exampleIndex, sentenceGraph) ) exampleIndex += 1 edgeFound = True elif "directed" in self.style: examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph) ) exampleIndex += 1 if (not edgeFound) and (not "directed" in self.style): examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph) ) exampleIndex += 1 return examples def buildExample(self, depEdge, intEdge, isReverse, exampleIndex, sentenceGraph): if "binary" in self.style: categoryName = "i" if intEdge != None: category = 1 else: category = -1 else: if intEdge != None: categoryName = intEdge.attrib["type"] if isReverse and "directed" in self.style: categoryName += "_rev" category = self.classSet.getId(categoryName) else: categoryName = "neg" category = 1 features = self.buildFeatures(depEdge,sentenceGraph) # Define extra attributes f.e. for the visualizer if int(depEdge[0].attrib["id"].split("_")[-1]) < int(depEdge[1].attrib["id"].split("_")[-1]): extra = {"xtype":"edge","type":categoryName,"t1":depEdge[0],"t2":depEdge[1]} extra["deprev"] = False else: extra = {"xtype":"edge","type":categoryName,"t1":depEdge[1],"t2":depEdge[0]} extra["deprev"] = True return (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) def buildFeatures(self, depEdge, sentenceGraph): features = {} self.featureBuilder.setFeatureVector(features) self.featureBuilder.buildEdgeFeatures(depEdge, sentenceGraph, "dep_", text=True, POS=True, annType=True, maskNames=True) self.featureBuilder.buildAttachedEdgeFeatures(depEdge, sentenceGraph, "", text=False, POS=True, annType=False, maskNames=True) self.featureBuilder.buildLinearOrderFeatures(depEdge) self.featureBuilder.setFeatureVector(None) return features
class SingleDependencyTypeExampleBuilder(ExampleBuilder): def __init__(self): ExampleBuilder.__init__(self) self.classSet = IdSet(1) assert( self.classSet.getId("neg") == 1 ) self.featureBuilder = EdgeFeatureBuilder(self.featureSet) def buildExamples(self, sentenceGraph): examples = [] exampleIndex = 0 dependencyEdges = sentenceGraph.dependencyGraph.edges() for depEdge in dependencyEdges: if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (sentenceGraph.tokenIsEntityHead[depEdge[1]] == None): continue if sentenceGraph.interactionGraph.has_edge(depEdge[0], depEdge[1]): intEdges = sentenceGraph.interactionGraph.get_edge(depEdge[0], depEdge[1]) for intEdge in intEdges: examples.append( self.buildExample(depEdge, intEdge, False, exampleIndex, sentenceGraph) ) exampleIndex += 1 elif sentenceGraph.interactionGraph.has_edge(depEdge[1], depEdge[0]): intEdges = sentenceGraph.interactionGraph.get_edge(depEdge[1], depEdge[0]) for intEdge in intEdges: examples.append( self.buildExample(depEdge, intEdge, True, exampleIndex, sentenceGraph) ) exampleIndex += 1 else: examples.append( self.buildExample(depEdge, None, None, exampleIndex, sentenceGraph) ) exampleIndex += 1 return examples def buildExample(self, depEdge, intEdge, isReverse, exampleIndex, sentenceGraph): if intEdge != None: categoryName = intEdge.attrib["type"] if isReverse: categoryName += "_rev" #categoryName += ">" #categoryName = "<" + categoryName category = self.classSet.getId(categoryName) else: categoryName = "neg" category = 1 features = self.buildFeatures(depEdge,sentenceGraph) # Define extra attributes f.e. for the visualizer if int(depEdge[0].attrib["id"].split("_")[-1]) < int(depEdge[1].attrib["id"].split("_")[-1]): extra = {"xtype":"edge","type":categoryName,"t1":depEdge[0],"t2":depEdge[1]} extra["deprev"] = False else: extra = {"xtype":"edge","type":categoryName,"t1":depEdge[1],"t2":depEdge[0]} extra["deprev"] = True return (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) def buildFeatures(self, depEdge, sentenceGraph): features = {} self.featureBuilder.setFeatureVector(features) self.featureBuilder.buildEdgeFeatures(depEdge, sentenceGraph, "dep_", text=True, POS=True, annType=True, maskNames=True) # Attached edges self.featureBuilder.buildAttachedEdgeFeatures(depEdge, sentenceGraph, "", text=False, POS=True, annType=False, maskNames=True) # t1InEdges = sentenceGraph.dependencyGraph.in_edges(depEdge[0]) # for edge in t1InEdges: # features[self.featureSet.getId("t1HangingIn_"+edge[2].attrib["type"])] = 1 # features[self.featureSet.getId("t1HangingIn_"+edge[0].attrib["POS"])] = 1 # self.addType(edge[0], features, sentenceGraph, prefix="t1HangingInAnn_") # #features[self.featureSet.getId("t1HangingIn_"+sentenceGraph.getTokenText(edge[0]))] = 1 # t1OutEdges = sentenceGraph.dependencyGraph.out_edges(depEdge[0]) # for edge in t1OutEdges: # features[self.featureSet.getId("t1HangingOut_"+edge[2].attrib["type"])] = 1 # features[self.featureSet.getId("t1HangingOut_"+edge[1].attrib["POS"])] = 1 # self.addType(edge[1], features, sentenceGraph, prefix="t1HangingOutAnn_") # #features[self.featureSet.getId("t1HangingOut_"+sentenceGraph.getTokenText(edge[1]))] = 1 # # t2InEdges = sentenceGraph.dependencyGraph.in_edges(depEdge[1]) # for edge in t2InEdges: # features[self.featureSet.getId("t2HangingIn_"+edge[2].attrib["type"])] = 1 # features[self.featureSet.getId("t2HangingIn_"+edge[0].attrib["POS"])] = 1 # self.addType(edge[0], features, sentenceGraph, prefix="t2HangingInAnn_") # #features[self.featureSet.getId("t2HangingIn_"+sentenceGraph.getTokenText(edge[0]))] = 1 # t2OutEdges = sentenceGraph.dependencyGraph.out_edges(depEdge[1]) # for edge in t2OutEdges: # features[self.featureSet.getId("t2HangingOut_"+edge[2].attrib["type"])] = 1 # features[self.featureSet.getId("t2HangingOut_"+edge[1].attrib["POS"])] = 1 # self.addType(edge[1], features, sentenceGraph, prefix="t2HangingOutAnn_") # #features[self.featureSet.getId("t2HangingOut_"+sentenceGraph.getTokenText(edge[1]))] = 1 # Linear order self.featureBuilder.buildLinearOrderFeatures(depEdge) self.featureBuilder.setFeatureVector(None) return features
class SimpleDependencyExampleBuilder2(ExampleBuilder): """ Builds examples based on parse dependencies. An example is generated for each dependency. If there is an annotated interaction edge between those tokens, then the example is positive, otherwise negative. """ def __init__(self): ExampleBuilder.__init__(self) self.featureBuilder = EdgeFeatureBuilder(self.featureSet) def buildExamples(self, sentenceGraph): examples = [] exampleIndex = 0 dependencyEdges = sentenceGraph.dependencyGraph.edges() # Loop through all the dependencies in the sentence for depEdge in dependencyEdges: # Ignore dependencies that do not connect annotated entities # if (sentenceGraph.tokenIsEntityHead[depEdge[0]] == None) or (sentenceGraph.tokenIsEntityHead[depEdge[1]] == None): # continue # Dependencies that have a corresponding interaction edge (direction is ignored) are the positive cases if sentenceGraph.interactionGraph.has_edge( depEdge[0], depEdge[1]) or sentenceGraph.interactionGraph.has_edge( depEdge[1], depEdge[0]): category = 1 else: category = -1 # Generate features for the edge features = self.buildFeatures(depEdge, sentenceGraph) # Define extra attributes f.e. for the visualizer if int(depEdge[0].attrib["id"].split("_")[-1]) < int( depEdge[1].attrib["id"].split("_")[-1]): extra = { "xtype": "edge", "type": "i", "t1": depEdge[0], "t2": depEdge[1] } else: extra = { "xtype": "edge", "type": "i", "t1": depEdge[1], "t2": depEdge[0] } examples.append( (sentenceGraph.getSentenceId() + ".x" + str(exampleIndex), category, features, extra)) exampleIndex += 1 return examples def buildFeatures(self, depEdge, sentenceGraph): features = {} self.featureBuilder.setFeatureVector(features) self.featureBuilder.buildEdgeFeatures(depEdge, sentenceGraph, "dep_", text=True, POS=True, annType=True, maskNames=True) self.featureBuilder.buildAttachedEdgeFeatures(depEdge, sentenceGraph, "", text=False, POS=True, annType=False, maskNames=True) self.featureBuilder.buildLinearOrderFeatures(depEdge) self.featureBuilder.setFeatureVector(None) return features