class EdgeExampleBuilder(ExampleBuilder): """ This example builder makes edge examples, i.e. examples describing the event arguments. """ def __init__(self, style=None, types=[], featureSet=None, classSet=None): if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) assert( classSet.getId("neg") == 1 or (len(classSet.Ids)== 2 and classSet.getId("neg") == -1) ) # Basic style = trigger_features:typed:directed:no_linear:entities:auto_limits:noMasking:maxFeatures self._setDefaultParameters([ "directed", "undirected", "headsOnly", "graph_kernel", "noAnnType", "mask_nodes", "limit_features", "no_auto_limits", "co_features", "genia_features", "bi_features", #"genia_limits", "epi_limits", "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits", "genia_task1", "ontology", "nodalida", "bacteria_renaming", "no_trigger_features", "rel_features", "drugbank_features", "ddi_mtmx", "evex", "giuliano", "random", "themeOnly", "causeOnly", "no_path", "token_nodes", "skip_extra_triggers", "headsOnly", "graph_kernel", "no_task", "no_dependency", "disable_entity_features", "disable_terminus_features", "disable_single_element_features", "disable_ngram_features", "disable_path_edge_features", "linear_features", "subset", "binary", "pos_only", "entity_type", "filter_shortest_path", "maskTypeAsProtein", "keep_neg", "metamap", "sdb_merge", "sdb_features", "ontobiotope_features", "no_self_loops", "full_entities", "no_features", "wordnet", "wordvector", "se10t8_undirected", "filter_types", "doc_extra", "entity_extra"]) self.styles = self.getParameters(style) #if style == None: # no parameters given # style["typed"] = style["directed"] = style["headsOnly"] = True self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet, self.styles) # NOTE Temporarily re-enabling predicted range #self.multiEdgeFeatureBuilder.definePredictedValueRange([], None) if self.styles["graph_kernel"]: from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(self.featureSet) if self.styles["noAnnType"]: self.multiEdgeFeatureBuilder.noAnnType = True if self.styles["mask_nodes"]: self.multiEdgeFeatureBuilder.maskNamedEntities = True else: self.multiEdgeFeatureBuilder.maskNamedEntities = False if not self.styles["limit_features"]: self.multiEdgeFeatureBuilder.maximum = True if self.styles["genia_task1"]: self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity") self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet) if self.styles["ontology"]: self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet) if self.styles["ontobiotope_features"]: self.ontobiotopeFeatureBuilder = OntoBiotopeFeatureBuilder(self.featureSet) if self.styles["nodalida"]: self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(self.featureSet) if self.styles["bacteria_renaming"]: self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder(self.featureSet) if not self.styles["no_trigger_features"]: self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet, self.styles) self.triggerFeatureBuilder.useNonNameEntities = True if self.styles["noAnnType"]: self.triggerFeatureBuilder.noAnnType = True if self.styles["genia_task1"]: self.triggerFeatureBuilder.filterAnnTypes.add("Entity") #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName) if self.styles["rel_features"]: self.relFeatureBuilder = RELFeatureBuilder(featureSet) if self.styles["drugbank_features"]: self.drugFeatureBuilder = DrugFeatureBuilder(featureSet) if self.styles["evex"]: self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet) if self.styles["wordnet"]: self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet) if self.styles["wordvector"]: self.wordVectorFeatureBuilder = WordVectorFeatureBuilder(featureSet, self.styles) if self.styles["giuliano"]: self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet) self.types = types if self.styles["random"]: from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet) def definePredictedValueRange(self, sentences, elementName): self.multiEdgeFeatureBuilder.definePredictedValueRange(sentences, elementName) def getPredictedValueRange(self): return self.multiEdgeFeatureBuilder.predictedRange def filterEdgesByType(self, edges, typesToInclude): if len(typesToInclude) == 0: return edges edgesToKeep = [] for edge in edges: if edge.get("type") in typesToInclude: edgesToKeep.append(edge) return edgesToKeep def getCategoryNameFromTokens(self, sentenceGraph, t1, t2, directed=True): """ Example class. Multiple overlapping edges create a merged type. """ types = set() intEdges = sentenceGraph.interactionGraph.getEdges(t1, t2) if not directed: intEdges = intEdges + sentenceGraph.interactionGraph.getEdges(t2, t1) for intEdge in intEdges: types.add(intEdge[2].get("type")) types = list(types) types.sort() categoryName = "" for name in types: if categoryName != "": categoryName += "---" categoryName += name if categoryName != "": return categoryName else: return "neg" def getCategoryName(self, sentenceGraph, e1, e2, directed=True): """ Example class. Multiple overlapping edges create a merged type. """ interactions = sentenceGraph.getInteractions(e1, e2, True) if not directed and not self.styles["se10t8_undirected"]: interactions = interactions + sentenceGraph.getInteractions(e2, e1, True) types = set() for interaction in interactions: types.add(interaction[2].get("type")) types = list(types) types.sort() categoryName = "" for name in types: if self.styles["causeOnly"] and name != "Cause": continue if self.styles["themeOnly"] and name != "Theme": continue if categoryName != "": categoryName += "---" if self.styles["sdb_merge"]: name = self.mergeForSeeDev(name, self.structureAnalyzer) categoryName += name if categoryName != "": return categoryName else: return "neg" def getBISuperType(self, eType): if eType in ["GeneProduct", "Protein", "ProteinFamily", "PolymeraseComplex"]: return "ProteinEntity" elif eType in ["Gene", "GeneFamily", "GeneComplex", "Regulon", "Site", "Promoter"]: return "GeneEntity" else: return None def getSeeDevSuperTypes(self, eType): if eType in ("Gene", "Gene_Family", "Box", "Promoter"): return ("DNA", "Molecule") elif eType == "RNA": return ("RNA", "DNA_Product", "Molecule") elif eType in ("Protein", "Protein_Family", "Protein_Complex", "Protein_Domain"): return ("Amino_acid_sequence", "DNA_Product", "Molecule") elif eType == "Hormone": return ("Molecule",) elif eType in ("Regulatory_Network", "Pathway"): return ("Dynamic_process",) elif eType in ("Genotype", "Tissue", "Development_Phase"): return ("Biological_context", "Context") elif eType == "Environmental_Factor": return ("Context",) else: raise Exception("Unknown SeeDev type '" + str(eType) + "'") def mergeForSeeDev(self, categoryName, structureAnalyzer): if categoryName in structureAnalyzer.typeMap["forward"]: return structureAnalyzer.typeMap["forward"][categoryName] return categoryName # for tag in ("Regulates", "Exists", "Interacts", "Is", "Occurs"): # if categoryName.startswith(tag): # categoryName = tag # break # return categoryName def processCorpus(self, input, output, gold=None, append=False, allowNewIds=True, structureAnalyzer=None): if self.styles["sdb_merge"]: structureAnalyzer.determineNonOverlappingTypes() self.structureAnalyzer = structureAnalyzer ExampleBuilder.processCorpus(self, input, output, gold, append, allowNewIds, structureAnalyzer) def isValidInteraction(self, e1, e2, structureAnalyzer,forceUndirected=False): return len(structureAnalyzer.getValidEdgeTypes(e1.get("type"), e2.get("type"), forceUndirected=forceUndirected)) > 0 def getGoldCategoryName(self, goldGraph, entityToGold, e1, e2, directed=True): if len(entityToGold[e1]) > 0 and len(entityToGold[e2]) > 0: return self.getCategoryName(goldGraph, entityToGold[e1][0], entityToGold[e2][0], directed=directed) else: return "neg" def filterEdge(self, edge, edgeTypes): import types assert edgeTypes != None if type(edgeTypes) not in [types.ListType, types.TupleType]: edgeTypes = [edgeTypes] if edge[2].get("type") in edgeTypes: return True else: return False def keepExample(self, e1, e2, categoryName, isDirected, structureAnalyzer): makeExample = True if (not self.styles["no_auto_limits"]) and not self.isValidInteraction(e1, e2, structureAnalyzer, forceUndirected=not isDirected): makeExample = False self.exampleStats.filter("auto_limits") if self.styles["genia_task1"] and (e1.get("type") == "Entity" or e2.get("type") == "Entity"): makeExample = False self.exampleStats.filter("genia_task1") if self.styles["pos_only"] and categoryName == "neg": makeExample = False self.exampleStats.filter("pos_only") if self.styles["no_self_loops"] and ((e1 == e2) or (e1.get("headOffset") == e2.get("headOffset"))): makeExample = False self.exampleStats.filter("no_self_loops") return makeExample def getExampleCategoryName(self, e1=None, e2=None, t1=None, t2=None, sentenceGraph=None, goldGraph=None, entityToGold=None, isDirected=True, structureAnalyzer=None): if self.styles["token_nodes"]: categoryName = self.getCategoryNameFromTokens(sentenceGraph, t1, t2, isDirected) else: categoryName = self.getCategoryName(sentenceGraph, e1, e2, isDirected) if goldGraph != None: categoryName = self.getGoldCategoryName(goldGraph, entityToGold, e1, e2, isDirected) if self.styles["filter_types"] != None and categoryName in self.styles["filter_types"]: categoryName = "neg" if self.styles["se10t8_undirected"]: assert e1.get("id").endswith(".e1") assert e2.get("id").endswith(".e2") #if self.styles["sdb_merge"]: # categoryName = self.mergeForSeeDev(categoryName, structureAnalyzer) return categoryName def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph = None, structureAnalyzer=None): """ Build examples for a single sentence. Returns a list of examples. See Core/ExampleUtils for example format. """ #examples = [] exampleIndex = 0 # example directionality if self.styles["directed"] == None and self.styles["undirected"] == None: # determine directedness from corpus examplesAreDirected = structureAnalyzer.hasDirectedTargets() if structureAnalyzer != None else True elif self.styles["directed"]: assert self.styles["undirected"] in [None, False] examplesAreDirected = True elif self.styles["undirected"]: assert self.styles["directed"] in [None, False] examplesAreDirected = False if not self.styles["no_trigger_features"]: self.triggerFeatureBuilder.initSentence(sentenceGraph) if self.styles["evex"]: self.evexFeatureBuilder.initSentence(sentenceGraph) # if self.styles["sdb_merge"]: # self.determineNonOverlappingTypes(structureAnalyzer) # Filter entities, if needed sentenceGraph.mergeInteractionGraph(True) entities = sentenceGraph.mergedEntities entityToDuplicates = sentenceGraph.mergedEntityToDuplicates self.exampleStats.addValue("Duplicate entities skipped", len(sentenceGraph.entities) - len(entities)) # Connect to optional gold graph entityToGold = None if goldGraph != None: entityToGold = EvaluateInteractionXML.mapEntities(entities, goldGraph.entities) paths = None if not self.styles["no_path"]: undirected = sentenceGraph.dependencyGraph.toUndirected() paths = undirected if self.styles["filter_shortest_path"] != None: # For DDI use filter_shortest_path=conj_and paths.resetAnalyses() # just in case paths.FloydWarshall(self.filterEdge, {"edgeTypes":self.styles["filter_shortest_path"]}) # Generate examples based on interactions between entities or interactions between tokens if self.styles["token_nodes"]: loopRange = len(sentenceGraph.tokens) else: loopRange = len(entities) for i in range(loopRange-1): for j in range(i+1,loopRange): eI = None eJ = None if self.styles["token_nodes"]: tI = sentenceGraph.tokens[i] tJ = sentenceGraph.tokens[j] else: eI = entities[i] eJ = entities[j] tI = sentenceGraph.entityHeadTokenByEntity[eI] tJ = sentenceGraph.entityHeadTokenByEntity[eJ] if eI.get("type") == "neg" or eJ.get("type") == "neg": continue if self.styles["skip_extra_triggers"]: if eI.get("source") != None or eJ.get("source") != None: continue # only consider paths between entities (NOTE! entities, not only named entities) if self.styles["headsOnly"]: if (len(sentenceGraph.tokenIsEntityHead[tI]) == 0) or (len(sentenceGraph.tokenIsEntityHead[tJ]) == 0): continue examples = self.buildExamplesForPair(tI, tJ, paths, sentenceGraph, goldGraph, entityToGold, eI, eJ, structureAnalyzer, examplesAreDirected) for categoryName, features, extra in examples: # make example if self.styles["binary"]: if categoryName != "neg": category = 1 else: category = -1 extra["categoryName"] = "i" else: category = self.classSet.getId(categoryName) example = [sentenceGraph.getSentenceId()+".x"+str(exampleIndex), category, features, extra] ExampleUtils.appendExamples([example], outfile) exampleIndex += 1 return exampleIndex def buildExamplesForPair(self, token1, token2, paths, sentenceGraph, goldGraph, entityToGold, entity1=None, entity2=None, structureAnalyzer=None, isDirected=True): # define forward categoryName = self.getExampleCategoryName(entity1, entity2, token1, token2, sentenceGraph, goldGraph, entityToGold, isDirected, structureAnalyzer=structureAnalyzer) # make forward forwardExample = None self.exampleStats.beginExample(categoryName) if self.keepExample(entity1, entity2, categoryName, isDirected, structureAnalyzer): forwardExample = self.buildExample(token1, token2, paths, sentenceGraph, categoryName, entity1, entity2, structureAnalyzer, isDirected) if isDirected: # build a separate reverse example (if that is valid) self.exampleStats.endExample() # end forward example # define reverse categoryName = self.getExampleCategoryName(entity2, entity1, token2, token1, sentenceGraph, goldGraph, entityToGold, True, structureAnalyzer=structureAnalyzer) # make reverse self.exampleStats.beginExample(categoryName) reverseExample = None if self.keepExample(entity2, entity1, categoryName, True, structureAnalyzer): reverseExample = self.buildExample(token2, token1, paths, sentenceGraph, categoryName, entity2, entity1, structureAnalyzer, isDirected) self.exampleStats.endExample() return filter(None, [forwardExample, reverseExample]) elif self.styles["se10t8_undirected"]: # undirected example with a directed type self.exampleStats.endExample() return [forwardExample] elif forwardExample != None: # merge features from the reverse example to the forward one reverseExample = self.buildExample(token2, token1, paths, sentenceGraph, categoryName, entity2, entity1, structureAnalyzer, isDirected) forwardExample[1].update(reverseExample[1]) self.exampleStats.endExample() # end merged example return [forwardExample] else: # undirected example that was filtered self.exampleStats.endExample() # end merged example return [] def buildExample(self, token1, token2, paths, sentenceGraph, categoryName, entity1=None, entity2=None, structureAnalyzer=None, isDirected=True): """ Build a single directed example for the potential edge between token1 and token2 """ # define features if not self.styles["no_path"]: path = paths.getPaths(token1, token2) if len(path) > 0: path = path[0] #pathExists = True else: path = [token1, token2] #pathExists = False else: path = [token1, token2] #pathExists = False features = {} if not self.styles["no_features"]: features = self.buildFeatures(sentenceGraph, entity1, entity2, token1, token2, path) # define extra attributes if int(path[0].get("charOffset").split("-")[0]) < int(path[-1].get("charOffset").split("-")[0]): extra = {"xtype":"edge","type":"i","t1":path[0].get("id"),"t2":path[-1].get("id")} extra["deprev"] = False else: extra = {"xtype":"edge","type":"i","t1":path[-1].get("id"),"t2":path[0].get("id")} extra["deprev"] = True if entity1 != None: extra["e1"] = entity1.get("id") if sentenceGraph.mergedEntityToDuplicates != None: extra["e1DuplicateIds"] = ",".join([x.get("id") for x in sentenceGraph.mergedEntityToDuplicates[entity1]]) if entity2 != None: extra["e2"] = entity2.get("id") if sentenceGraph.mergedEntityToDuplicates != None: extra["e2DuplicateIds"] = ",".join([x.get("id") for x in sentenceGraph.mergedEntityToDuplicates[entity2]]) extra["categoryName"] = categoryName if self.styles["bacteria_renaming"]: if entity1.get("text") != None and entity1.get("text") != "": extra["e1t"] = entity1.get("text").replace(" ", "---").replace(":","-COL-") if entity2.get("text") != None and entity2.get("text") != "": extra["e2t"] = entity2.get("text").replace(" ", "---").replace(":","-COL-") if self.styles["doc_extra"]: if hasattr(sentenceGraph, "documentElement") and sentenceGraph.documentElement.get("origId") != None: extra["DOID"] = sentenceGraph.documentElement.get("origId") if self.styles["entity_extra"]: if entity1.get("origId") != None: extra["e1OID"] = entity1.get("origId") if entity2.get("origId") != None: extra["e2OID"] = entity2.get("origId") sentenceOrigId = sentenceGraph.sentenceElement.get("origId") if sentenceOrigId != None: extra["SOID"] = sentenceOrigId extra["directed"] = str(isDirected) if self.styles["sdb_merge"]: extra["sdb_merge"] = "True" #print extra return (categoryName, features, extra) def buildFeatures(self, sentenceGraph, entity1, entity2, token1, token2, path): features = {} if not self.styles["no_trigger_features"]: # F 85.52 -> 85.55 self.triggerFeatureBuilder.setFeatureVector(features) self.triggerFeatureBuilder.tag = "trg1_" self.triggerFeatureBuilder.buildFeatures(token1) self.triggerFeatureBuilder.tag = "trg2_" self.triggerFeatureBuilder.buildFeatures(token2) self.triggerFeatureBuilder.setFeatureVector(None) # REL features if self.styles["rel_features"] and not self.styles["no_task"]: self.relFeatureBuilder.setFeatureVector(features) self.relFeatureBuilder.tag = "rel1_" self.relFeatureBuilder.buildAllFeatures(sentenceGraph.tokens, sentenceGraph.tokens.index(token1)) self.relFeatureBuilder.tag = "rel2_" self.relFeatureBuilder.buildAllFeatures(sentenceGraph.tokens, sentenceGraph.tokens.index(token2)) self.relFeatureBuilder.setFeatureVector(None) if self.styles["bacteria_renaming"] and not self.styles["no_task"]: self.bacteriaRenamingFeatureBuilder.setFeatureVector(features) self.bacteriaRenamingFeatureBuilder.buildPairFeatures(entity1, entity2) #self.bacteriaRenamingFeatureBuilder.buildSubstringFeatures(entity1, entity2) # decreases perf. 74.76 -> 72.41 self.bacteriaRenamingFeatureBuilder.setFeatureVector(None) if self.styles["co_features"] and not self.styles["no_task"]: e1Offset = Range.charOffsetToSingleTuple(entity1.get("charOffset")) e2Offset = Range.charOffsetToSingleTuple(entity2.get("charOffset")) if Range.contains(e1Offset, e2Offset): features[self.featureSet.getId("e1_contains_e2")] = 1 if entity2.get("given") == "True": features[self.featureSet.getId("e1_contains_e2name")] = 1 if Range.contains(e2Offset, e1Offset): features[self.featureSet.getId("e2_contains_e1")] = 1 if entity1.get("given") == "True": features[self.featureSet.getId("e2_contains_e1name")] = 1 if self.styles["drugbank_features"]: self.drugFeatureBuilder.setFeatureVector(features) self.drugFeatureBuilder.tag = "ddi_" self.drugFeatureBuilder.buildPairFeatures(entity1, entity2) if self.styles["ddi_mtmx"]: self.drugFeatureBuilder.buildMTMXFeatures(entity1, entity2) self.drugFeatureBuilder.setFeatureVector(None) if self.styles["graph_kernel"]: self.graphKernelFeatureBuilder.setFeatureVector(features, entity1, entity2) self.graphKernelFeatureBuilder.buildGraphKernelFeatures(sentenceGraph, path) self.graphKernelFeatureBuilder.setFeatureVector(None) if self.styles["entity_type"]: e1Type = self.multiEdgeFeatureBuilder.getEntityType(entity1) e2Type = self.multiEdgeFeatureBuilder.getEntityType(entity2) features[self.featureSet.getId("e1_"+e1Type)] = 1 features[self.featureSet.getId("e2_"+e2Type)] = 1 features[self.featureSet.getId("distance_"+str(len(path)))] = 1 if not self.styles["no_dependency"]: #print "Dep features" self.multiEdgeFeatureBuilder.setFeatureVector(features, entity1, entity2) #self.multiEdgeFeatureBuilder.buildStructureFeatures(sentenceGraph, paths) # remove for fast if not self.styles["disable_entity_features"]: self.multiEdgeFeatureBuilder.buildEntityFeatures(sentenceGraph) self.multiEdgeFeatureBuilder.buildPathLengthFeatures(path) if not self.styles["disable_terminus_features"]: self.multiEdgeFeatureBuilder.buildTerminusTokenFeatures(path, sentenceGraph) # remove for fast if not self.styles["disable_single_element_features"]: self.multiEdgeFeatureBuilder.buildSingleElementFeatures(path, sentenceGraph) if not self.styles["disable_ngram_features"]: #print "NGrams" self.multiEdgeFeatureBuilder.buildPathGrams(2, path, sentenceGraph) # remove for fast self.multiEdgeFeatureBuilder.buildPathGrams(3, path, sentenceGraph) # remove for fast self.multiEdgeFeatureBuilder.buildPathGrams(4, path, sentenceGraph) # remove for fast #self.buildEdgeCombinations(path, edges, sentenceGraph, features) # remove for fast #if edges != None: # self.multiEdgeFeatureBuilder.buildTerminusFeatures(path[0], edges[0][1]+edges[1][0], "t1", sentenceGraph) # remove for fast # self.multiEdgeFeatureBuilder.buildTerminusFeatures(path[-1], edges[len(path)-1][len(path)-2]+edges[len(path)-2][len(path)-1], "t2", sentenceGraph) # remove for fast if not self.styles["disable_path_edge_features"]: self.multiEdgeFeatureBuilder.buildPathEdgeFeatures(path, sentenceGraph) self.multiEdgeFeatureBuilder.buildSentenceFeatures(sentenceGraph) self.multiEdgeFeatureBuilder.setFeatureVector(None) if self.styles["nodalida"]: self.nodalidaFeatureBuilder.setFeatureVector(features, entity1, entity2) shortestPaths = self.nodalidaFeatureBuilder.buildShortestPaths(sentenceGraph.dependencyGraph, path) print shortestPaths if len(shortestPaths) > 0: self.nodalidaFeatureBuilder.buildNGrams(shortestPaths, sentenceGraph) self.nodalidaFeatureBuilder.setFeatureVector(None) if self.styles["linear_features"]: self.tokenFeatureBuilder.setFeatureVector(features) for i in range(len(sentenceGraph.tokens)): if sentenceGraph.tokens[i] == token1: token1Index = i if sentenceGraph.tokens[i] == token2: token2Index = i linearPreTag = "linfw_" if token1Index > token2Index: token1Index, token2Index = token2Index, token1Index linearPreTag = "linrv_" self.tokenFeatureBuilder.buildLinearOrderFeatures(token1Index, sentenceGraph, 2, 2, preTag="linTok1") self.tokenFeatureBuilder.buildLinearOrderFeatures(token2Index, sentenceGraph, 2, 2, preTag="linTok2") # Before, middle, after # self.tokenFeatureBuilder.buildTokenGrams(0, token1Index-1, sentenceGraph, "bf") # self.tokenFeatureBuilder.buildTokenGrams(token1Index+1, token2Index-1, sentenceGraph, "bw") # self.tokenFeatureBuilder.buildTokenGrams(token2Index+1, len(sentenceGraph.tokens)-1, sentenceGraph, "af") # before-middle, middle, middle-after # self.tokenFeatureBuilder.buildTokenGrams(0, token2Index-1, sentenceGraph, linearPreTag+"bf", max=2) # self.tokenFeatureBuilder.buildTokenGrams(token1Index+1, token2Index-1, sentenceGraph, linearPreTag+"bw", max=2) # self.tokenFeatureBuilder.buildTokenGrams(token1Index+1, len(sentenceGraph.tokens)-1, sentenceGraph, linearPreTag+"af", max=2) self.tokenFeatureBuilder.setFeatureVector(None) if self.styles["random"]: self.randomFeatureBuilder.setFeatureVector(features) self.randomFeatureBuilder.buildRandomFeatures(100, 0.01) self.randomFeatureBuilder.setFeatureVector(None) if self.styles["genia_features"] and not self.styles["no_task"]: e1Type = entity1.get("type") e2Type = entity2.get("type") assert(entity1.get("given") in (None, "False")) if entity2.get("given") == "True": features[self.featureSet.getId("GENIA_target_protein")] = 1 else: features[self.featureSet.getId("GENIA_nested_event")] = 1 if e1Type.find("egulation") != -1: # leave r out to avoid problems with capitalization if entity2.get("given") == "True": features[self.featureSet.getId("GENIA_regulation_of_protein")] = 1 else: features[self.featureSet.getId("GENIA_regulation_of_event")] = 1 if self.styles["bi_features"]: # Make features based on entity types e1Type = entity1.get("type") e2Type = entity2.get("type") e1SuperType = str(self.getBISuperType(e1Type)) e2SuperType = str(self.getBISuperType(e2Type)) features[self.featureSet.getId("BI_e1_"+e1Type)] = 1 features[self.featureSet.getId("BI_e2_"+e2Type)] = 1 features[self.featureSet.getId("BI_e1sup_"+e1SuperType)] = 1 features[self.featureSet.getId("BI_e2sup_"+e2SuperType)] = 1 features[self.featureSet.getId("BI_e1e2_"+e1Type+"_"+e2Type)] = 1 features[self.featureSet.getId("BI_e1e2sup_"+e1SuperType+"_"+e2SuperType)] = 1 if self.styles["sdb_features"]: e1Type = entity1.get("type") e2Type = entity2.get("type") features[self.featureSet.getId("SDB_e1_"+e1Type)] = 1 features[self.featureSet.getId("SDB_e2_"+e2Type)] = 1 features[self.featureSet.getId("SDB_e1e2_"+e1Type+"_"+e2Type)] = 1 if e1Type == e2Type: features[self.featureSet.getId("SDB_e1e2_equal")] = 1 features[self.featureSet.getId("SDB_e1e2_equal_" + e1Type)] = 1 e1SuperTypes = str(self.getSeeDevSuperTypes(e1Type)) e2SuperTypes = str(self.getSeeDevSuperTypes(e2Type)) for e1SuperType in e1SuperTypes: for e2SuperType in e2SuperTypes: features[self.featureSet.getId("SDB_e1sup_"+e1SuperType)] = 1 features[self.featureSet.getId("SDB_e2sup_"+e2SuperType)] = 1 features[self.featureSet.getId("SDB_e1e2sup_"+e1SuperType+"_"+e2SuperType)] = 1 if e1SuperType == e2SuperType: features[self.featureSet.getId("SDB_e1e2sup_equal")] = 1 features[self.featureSet.getId("SDB_e1e2sup_equal_" + e1SuperType)] = 1 if self.styles["ontobiotope_features"]: self.ontobiotopeFeatureBuilder.setFeatureVector(features) self.ontobiotopeFeatureBuilder.buildOBOFeaturesForEntityPair(entity1, entity2) self.ontobiotopeFeatureBuilder.setFeatureVector(None) if self.styles["full_entities"]: e1Text = entity1.get("text").lower() e2Text = entity2.get("text").lower() features[self.featureSet.getId("FULL_e1_"+e1Text)] = 1 features[self.featureSet.getId("FULL_e2_"+e2Text)] = 1 for ep1 in e1Text.split(): for ep2 in e2Text.split(): features[self.featureSet.getId("FULL_e1_"+ep1)] = 1 features[self.featureSet.getId("FULL_e2_"+ep2)] = 1 features[self.featureSet.getId("FULL_e1e2_"+ep1+"_"+ep2)] = 1 if self.styles["evex"]: self.evexFeatureBuilder.setFeatureVector(features, entity1, entity2) self.evexFeatureBuilder.buildEdgeFeatures(entity1, entity2, token1, token2, path, sentenceGraph) self.evexFeatureBuilder.setFeatureVector(None) if self.styles["wordnet"]: self.wordNetFeatureBuilder.setFeatureVector(features, entity1, entity2) self.wordNetFeatureBuilder.buildFeaturesForEntityPair(token1, token2) self.wordNetFeatureBuilder.buildLinearFeatures(token1, sentenceGraph.tokens, tag="t1_") self.wordNetFeatureBuilder.buildLinearFeatures(token2, sentenceGraph.tokens, tag="t2_") self.wordNetFeatureBuilder.buildPathFeatures(path) self.wordNetFeatureBuilder.setFeatureVector(None) if self.styles["wordvector"]: self.wordVectorFeatureBuilder.setFeatureVector(features, entity1, entity2) self.wordVectorFeatureBuilder.buildFeatures(token1, "t1_") self.wordVectorFeatureBuilder.buildFeatures(token2, "t2_") self.wordVectorFeatureBuilder.buildLinearFeatures(token1, sentenceGraph.tokens, tag="t1_") self.wordVectorFeatureBuilder.buildLinearFeatures(token2, sentenceGraph.tokens, tag="t2_") self.wordVectorFeatureBuilder.buildPathFeatures(path) self.wordVectorFeatureBuilder.buildFBAFeatures(sentenceGraph.tokens, sentenceGraph.tokens.index(token1), sentenceGraph.tokens.index(token2)) self.wordVectorFeatureBuilder.setFeatureVector(None) if self.styles["giuliano"]: self.giulianoFeatureBuilder.setFeatureVector(features, entity1, entity2) self.giulianoFeatureBuilder.buildEdgeFeatures(entity1, entity2, token1, token2, path, sentenceGraph) self.giulianoFeatureBuilder.setFeatureVector(None) return features