def __init__(self,
                 style=None,
                 classSet=None,
                 featureSet=None,
                 gazetteerFileName=None,
                 skiplist=None):
        if classSet == None:
            classSet = IdSet(1)
        if featureSet == None:
            featureSet = IdSet()

        ExampleBuilder.__init__(self, classSet, featureSet)
        assert (classSet.getId("neg") == 1)
        #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train"
        if gazetteerFileName != None:
            self.gazetteer = Gazetteer.loadGztr(gazetteerFileName)
            print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName
        else:
            print >> sys.stderr, "No gazetteer loaded"
            self.gazetteer = None
        self._setDefaultParameters([
            "rel_features", "wordnet", "bb_features", "giuliano",
            "epi_merge_negated", "limit_merged_types", "genia_task1",
            "build_for_nameless", "pos_only", "all_tokens", "names",
            "pos_pairs", "linear_ngrams", "phospho"
        ])
        self.styles = self.getParameters(style)
        #        if "selftrain_group" in self.styles:
        #            self.selfTrainGroups = set()
        #            if "selftrain_group-1" in self.styles:
        #                self.selfTrainGroups.add("-1")
        #            if "selftrain_group0" in self.styles:
        #                self.selfTrainGroups.add("0")
        #            if "selftrain_group1" in self.styles:
        #                self.selfTrainGroups.add("1")
        #            if "selftrain_group2" in self.styles:
        #                self.selfTrainGroups.add("2")
        #            if "selftrain_group3" in self.styles:
        #                self.selfTrainGroups.add("3")
        #            print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups

        self.skiplist = set()
        if skiplist != None:
            f = open(skiplist, "rt")
            for line in f.readlines():
                self.skiplist.add(line.strip())
            f.close()

        if self.styles["rel_features"]:
            self.relFeatureBuilder = RELFeatureBuilder(featureSet)
        if self.styles["wordnet"]:
            self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet)
        if self.styles["bb_features"]:
            self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens(
            )
            #self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens(PhraseTriggerExampleBuilder.getBacteriaNames())
        if self.styles["giuliano"]:
            self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
示例#2
0
 def __init__(self, style=None, types=[], featureSet=None, classSet=None):
     if featureSet == None:
         featureSet = IdSet()
     if classSet == None:
         classSet = IdSet(1)
     else:
         classSet = classSet
     
     ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
     assert( classSet.getId("neg") == 1 or (len(classSet.Ids)== 2 and classSet.getId("neg") == -1) )
     
     # Basic style = trigger_features:typed:directed:no_linear:entities:auto_limits:noMasking:maxFeatures
     self._setDefaultParameters([
         "directed", "undirected", "headsOnly", "graph_kernel", "noAnnType", "mask_nodes", "limit_features",
         "no_auto_limits", "co_features", "genia_features", "bi_features", #"genia_limits", "epi_limits", "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits",
         "genia_task1", "ontology", "nodalida", "bacteria_renaming", "no_trigger_features", "rel_features",
         "drugbank_features", "ddi_mtmx", "evex", "giuliano", "random", "themeOnly", "causeOnly", "no_path", "token_nodes", 
         "skip_extra_triggers", "headsOnly", "graph_kernel", "no_task", "no_dependency", 
         "disable_entity_features", "disable_terminus_features", "disable_single_element_features", 
         "disable_ngram_features", "disable_path_edge_features", "linear_features", "subset", "binary", "pos_only",
         "entity_type", "filter_shortest_path", "maskTypeAsProtein", "keep_neg", "metamap", 
         "sdb_merge", "sdb_features", "ontobiotope_features", "no_self_loops", "full_entities",
         "no_features", "wordnet", "wordvector", "se10t8_undirected", "filter_types", "doc_extra",
         "entity_extra"])
     self.styles = self.getParameters(style)
     #if style == None: # no parameters given
     #    style["typed"] = style["directed"] = style["headsOnly"] = True
     
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet, self.styles)
     # NOTE Temporarily re-enabling predicted range
     #self.multiEdgeFeatureBuilder.definePredictedValueRange([], None)
     if self.styles["graph_kernel"]:
         from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
         self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(self.featureSet)
     if self.styles["noAnnType"]:
         self.multiEdgeFeatureBuilder.noAnnType = True
     if self.styles["mask_nodes"]:
         self.multiEdgeFeatureBuilder.maskNamedEntities = True
     else:
         self.multiEdgeFeatureBuilder.maskNamedEntities = False
     if not self.styles["limit_features"]:
         self.multiEdgeFeatureBuilder.maximum = True
     if self.styles["genia_task1"]:
         self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity")
     self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
     if self.styles["ontology"]:
         self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
     if self.styles["ontobiotope_features"]:
         self.ontobiotopeFeatureBuilder = OntoBiotopeFeatureBuilder(self.featureSet)
     if self.styles["nodalida"]:
         self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(self.featureSet)
     if self.styles["bacteria_renaming"]:
         self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder(self.featureSet)
     if not self.styles["no_trigger_features"]:
         self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet, self.styles)
         self.triggerFeatureBuilder.useNonNameEntities = True
         if self.styles["noAnnType"]:
             self.triggerFeatureBuilder.noAnnType = True
         if self.styles["genia_task1"]:
             self.triggerFeatureBuilder.filterAnnTypes.add("Entity")
         #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName)
     if self.styles["rel_features"]:
         self.relFeatureBuilder = RELFeatureBuilder(featureSet)
     if self.styles["drugbank_features"]:
         self.drugFeatureBuilder = DrugFeatureBuilder(featureSet)
     if self.styles["evex"]:
         self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet)
     if self.styles["wordnet"]:
         self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet)
     if self.styles["wordvector"]:
         self.wordVectorFeatureBuilder = WordVectorFeatureBuilder(featureSet, self.styles)
     if self.styles["giuliano"]:
         self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
     self.types = types
     if self.styles["random"]:
         from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
         self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)