Exemplo n.º 1
0
 def __init__(self, style):
     ExampleBuilder.__init__(self)
     self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
     self.style = style
     if not "binary" in style:
         self.classSet = IdSet(1)
         assert (self.classSet.getId("neg") == 1)
Exemplo n.º 2
0
    def __init__(self,
                 style=["typed", "directed", "headsOnly"],
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        self.styles = style

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        if "noAnnType" in self.styles:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if "noMasking" in self.styles:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if "maxFeatures" in self.styles:
            self.multiEdgeFeatureBuilder.maximum = True
        #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        #if "ontology" in self.styles:
        #    self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types
Exemplo n.º 3
0
    def __init__(self, style="trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures", length=None, types=[], featureSet=None, classSet=None):
        # reset style regardless of input
        style="trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures"
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert( classSet.getId("neg") == 1 )
        
        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        
        self.styles = self.getParameters(style, ["trigger_features","typed","directed","no_linear","entities","genia_limits",
            "noAnnType", "noMasking", "maxFeatures", "no_merge", "disable_entity_features", 
            "disable_single_element_features", "disable_ngram_features", "disable_path_edge_features"])
        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        self.multiEdgeFeatureBuilder.noAnnType = self.styles["noAnnType"]
        self.multiEdgeFeatureBuilder.maskNamedEntities = not self.styles["noMasking"]
        self.multiEdgeFeatureBuilder.maximum = self.styles["maxFeatures"]
        self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        self.pathLengths = length
        assert(self.pathLengths == None)
        self.types = types

        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        self.triggerFeatureBuilder.useNonNameEntities = True
Exemplo n.º 4
0
 def __init__(self, style):
     ExampleBuilder.__init__(self)
     self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
     self.style = style
     if not "binary" in style:
         self.classSet = IdSet(1)
         assert( self.classSet.getId("neg") == 1 )
Exemplo n.º 5
0
    def __init__(self, style=["typed", "directed", "headsOnly"], length=None, types=[], featureSet=None, classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert classSet.getId("neg") == 1

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        self.styles = style

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        if "noAnnType" in self.styles:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if "noMasking" in self.styles:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if "maxFeatures" in self.styles:
            self.multiEdgeFeatureBuilder.maximum = True
        # self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        # if "ontology" in self.styles:
        #    self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
        self.pathLengths = length
        assert self.pathLengths == None
        self.types = types
Exemplo n.º 6
0
    def __init__(self,
                 style=["typed", "directed"],
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        if style.find(",") != -1:
            style = style.split(",")
        self.styles = style

        self.negFrac = None
        self.posPairGaz = POSPairGazetteer()
        for s in style:
            if s.find("negFrac") != -1:
                self.negFrac = float(s.split("_")[-1])
                print >> sys.stderr, "Downsampling negatives to", self.negFrac
                self.negRand = random.Random(15)
            elif s.find("posPairGaz") != -1:
                self.posPairGaz = POSPairGazetteer(
                    loadFrom=s.split("_", 1)[-1])

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        if "graph_kernel" in self.styles:
            from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
            self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(
                self.featureSet)
        if "noAnnType" in self.styles:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if "noMasking" in self.styles:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if "maxFeatures" in self.styles:
            self.multiEdgeFeatureBuilder.maximum = True
        self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        if "ontology" in self.styles:
            self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(
                self.featureSet)
        if "nodalida" in self.styles:
            self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(
                self.featureSet)
        #IF LOCAL
        if "bioinfer_limits" in self.styles:
            self.bioinferOntologies = OntologyUtils.getBioInferTempOntology()
            #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName)
        #ENDIF
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types
        if "random" in self.styles:
            from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
            self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
Exemplo n.º 7
0
 def __init__(self, style=["typed","directed","headsOnly"], length=None, types=[], featureSet=None, classSet=None):
     if featureSet == None:
         featureSet = IdSet()
     if classSet == None:
         classSet = IdSet(1)
     else:
         classSet = classSet
     assert( classSet.getId("neg") == 1 )
     
     ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
     self.styles = style
     
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
     if "noAnnType" in self.styles:
         self.multiEdgeFeatureBuilder.noAnnType = True
     if "noMasking" in self.styles:
         self.multiEdgeFeatureBuilder.maskNamedEntities = False
     if "maxFeatures" in self.styles:
         self.multiEdgeFeatureBuilder.maximum = True
     self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
     self.pathLengths = length
     assert(self.pathLengths == None)
     self.types = types
     if "random" in self.styles:
         from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
         self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
Exemplo n.º 8
0
 def __init__(self, style=["typed","directed","headsOnly"], length=None, types=[], featureSet=None, classSet=None, gazetteer=None, pathGazetteer=None, negFrac=None):
     if featureSet == None:
         featureSet = IdSet()
     if classSet == None:
         classSet = IdSet(1)
     else:
         classSet = classSet
     assert( classSet.getId("neg") == 1 )
     
     if gazetteer != None:
         print >> sys.stderr, "Loading gazetteer from", gazetteer
         self.gazetteer=Gazetteer.loadGztr(gazetteer)
     else:
         print >> sys.stderr, "No gazetteer loaded"
         self.gazetteer=None
     
     self.pathGazetteer=None
     self.pathGazetteerDependencies = None
     self.pathGazetteerPairs = None
     if pathGazetteer != None:
         print >> sys.stderr, "Loading path gazetteer from", pathGazetteer
         self.pathGazetteer=PathGazetteer.load(pathGazetteer)
         self.pathGazetteerDependencies = PathGazetteer.getDependencies(self.pathGazetteer)
         self.pathGazetteerPairs = PathGazetteer.getPairs(self.pathGazetteer)
     else:
         print >> sys.stderr, "No path gazetteer loaded"
     
     ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
     self.styles = style
     self.negFrac = negFrac
     print >> sys.stderr, "Downsampling negatives to", negFrac
     self.negRand = random.Random()
     
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
     if True:#"noAnnType" in self.styles:
         self.multiEdgeFeatureBuilder.noAnnType = True
     if "noMasking" in self.styles:
         self.multiEdgeFeatureBuilder.maskNamedEntities = False
     if "maxFeatures" in self.styles:
         self.multiEdgeFeatureBuilder.maximum = True
     
     self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
     #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
     #if "ontology" in self.styles:
     #    self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
     self.pathLengths = length
     assert(self.pathLengths == None)
     self.types = types
     
     self.eventsByOrigId = {}
     self.headTokensByOrigId = {}
     self.interSentenceEvents = set()
     
     self.examplesByEventOrigId = {}
     self.skippedByType = {}
     self.skippedByTypeAndReason = {}
     self.builtByType = {}
     
     self.gazMatchCache = {}
 def __init__(self, style=["typed","directed"], length=None, types=[], featureSet=None, classSet=None):
     if featureSet == None:
         featureSet = IdSet()
     if classSet == None:
         classSet = IdSet(1)
     else:
         classSet = classSet
     assert( classSet.getId("neg") == 1 )
     
     ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
     if style.find(",") != -1:
         style = style.split(",")
     self.styles = style
     
     self.negFrac = None
     self.posPairGaz = POSPairGazetteer()
     for s in style:
         if s.find("negFrac") != -1:      
             self.negFrac = float(s.split("_")[-1])
             print >> sys.stderr, "Downsampling negatives to", self.negFrac
             self.negRand = random.Random(15)
         elif s.find("posPairGaz") != -1:
             self.posPairGaz = POSPairGazetteer(loadFrom=s.split("_", 1)[-1])
     
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
     self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
     if "graph_kernel" in self.styles:
         from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
         self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(self.featureSet)
     if "noAnnType" in self.styles:
         self.multiEdgeFeatureBuilder.noAnnType = True
     if "noMasking" in self.styles:
         self.multiEdgeFeatureBuilder.maskNamedEntities = False
     if "maxFeatures" in self.styles:
         self.multiEdgeFeatureBuilder.maximum = True
     self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
     if "ontology" in self.styles:
         self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
     if "nodalida" in self.styles:
         self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(self.featureSet)
     #IF LOCAL
     if "bioinfer_limits" in self.styles:
         self.bioinferOntologies = OntologyUtils.getBioInferTempOntology()
         #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName)
     #ENDIF
     self.pathLengths = length
     assert(self.pathLengths == None)
     self.types = types
     if "random" in self.styles:
         from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
         self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
Exemplo n.º 10
0
 def __init__(self, style=["typed","directed","headsOnly"], length=None, types=[], featureSet=None, classSet=None):
     if featureSet == None:
         featureSet = IdSet()
     if classSet == None:
         classSet = IdSet(1)
     else:
         classSet = classSet
     assert( classSet.getId("neg") == 1 )
     
     ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
     self.styles = style
     
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
     
     self.counts = {}
     self.countsPerType = {}
     self.untypedCounts = {}
     self.tokenCounts = {}
Exemplo n.º 11
0
 def __init__(self, style=["typed","directed","headsOnly"], featureSet=None, classSet=None):
     if featureSet == None:
         featureSet = IdSet()
     if classSet == None:
         classSet = IdSet(1)
     else:
         classSet = classSet
     assert( classSet.getId("neg") == 1 )
     
     ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
     self.styles = style
     
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
     #if "noAnnType" in self.styles:
     self.multiEdgeFeatureBuilder.noAnnType = True
     #if "noMasking" in self.styles:
     self.multiEdgeFeatureBuilder.maskNamedEntities = False
     #if "maxFeatures" in self.styles:
     self.multiEdgeFeatureBuilder.maximum = True
     self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
     self.triggerFeatureBuilder.useNonNameEntities = False
Exemplo n.º 12
0
    def __init__(
            self,
            style="trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures",
            length=None,
            types=[],
            featureSet=None,
            classSet=None):
        # reset style regardless of input
        style = "trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures"
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)

        self.styles = self.getParameters(style, [
            "trigger_features", "typed", "directed", "no_linear", "entities",
            "genia_limits", "noAnnType", "noMasking", "maxFeatures",
            "no_merge", "disable_entity_features",
            "disable_single_element_features", "disable_ngram_features",
            "disable_path_edge_features"
        ])
        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        self.multiEdgeFeatureBuilder.noAnnType = self.styles["noAnnType"]
        self.multiEdgeFeatureBuilder.maskNamedEntities = not self.styles[
            "noMasking"]
        self.multiEdgeFeatureBuilder.maximum = self.styles["maxFeatures"]
        self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types

        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        self.triggerFeatureBuilder.useNonNameEntities = True
Exemplo n.º 13
0
    def __init__(self,
                 style=["typed", "directed", "headsOnly"],
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        self.styles = style

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)

        self.counts = {}
        self.countsPerType = {}
        self.untypedCounts = {}
        self.tokenCounts = {}
Exemplo n.º 14
0
    def __init__(self, style=None, length=None, types=[], featureSet=None, classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert( classSet.getId("neg") == 1 or (len(classSet.Ids)== 2 and classSet.getId("neg") == -1) )
        
        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        
        self.styles = self.getParameters(style, [
            "typed", "directed", "headsOnly", "graph_kernel", "noAnnType", "noMasking", "maxFeatures",
            "genia_limits", "epi_limits", "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits",
            "genia_task1", "ontology", "nodalida", "bacteria_renaming", "trigger_features", "rel_features",
            "ddi_features", "evex", "giuliano", "random", "themeOnly", "causeOnly", "no_path", "entities", 
            "skip_extra_triggers", "headsOnly", "graph_kernel", "trigger_features", "no_task", "no_dependency", 
            "disable_entity_features", "disable_terminus_features", "disable_single_element_features", 
            "disable_ngram_features", "disable_path_edge_features", "no_linear", "subset", "binary", "pos_only",
            "entity_type"
        ])
        if style == None: # no parameters given
            style["typed"] = style["directed"] = style["headsOnly"] = True
#        self.styles = style
#        if "selftrain_group" in self.styles:
#            self.selfTrainGroups = set()
#            if "selftrain_group-1" in self.styles:
#                self.selfTrainGroups.add("-1")
#            if "selftrain_group0" in self.styles:
#                self.selfTrainGroups.add("0")
#            if "selftrain_group1" in self.styles:
#                self.selfTrainGroups.add("1")
#            if "selftrain_group2" in self.styles:
#                self.selfTrainGroups.add("2")
#            if "selftrain_group3" in self.styles:
#                self.selfTrainGroups.add("3")
#            print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups
        
        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        # NOTE Temporarily re-enabling predicted range
        #self.multiEdgeFeatureBuilder.definePredictedValueRange([], None)
        if self.styles["graph_kernel"]:
            from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
            self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(self.featureSet)
        if self.styles["noAnnType"]:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if self.styles["noMasking"]:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if self.styles["maxFeatures"]:
			self.multiEdgeFeatureBuilder.maximum = True
        if self.styles["genia_task1"]:
            self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity")
        self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        if self.styles["ontology"]:
            self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
        if self.styles["nodalida"]:
            self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(self.featureSet)
        if self.styles["bacteria_renaming"]:
            self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder(self.featureSet)
        if self.styles["trigger_features"]:
            self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
            self.triggerFeatureBuilder.useNonNameEntities = True
            if self.styles["genia_task1"]:
                self.triggerFeatureBuilder.filterAnnTypes.add("Entity")
            #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName)
        if self.styles["rel_features"]:
            self.relFeatureBuilder = RELFeatureBuilder(featureSet)
        if self.styles["ddi_features"]:
            self.drugFeatureBuilder = DrugFeatureBuilder(featureSet)
        if self.styles["evex"]:
            self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet)
        if self.styles["giuliano"]:
            self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
        self.pathLengths = length
        assert(self.pathLengths == None)
        self.types = types
        if self.styles["random"]:
            from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
            self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
 def __init__(self):
     ExampleBuilder.__init__(self)
     self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
 def __init__(self):
     ExampleBuilder.__init__(self)
     self.classSet = IdSet(1)
     assert( self.classSet.getId("neg") == 1 )
     self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
Exemplo n.º 17
0
 def __init__(self):
     ExampleBuilder.__init__(self)
     self.edgeFeatureBuilder = EdgeFeatureBuilder(self.featureSet)
     self.entityFeatureBuilder = TokenFeatureBuilder(self.featureSet)
 def __init__(self):
     ExampleBuilder.__init__(self)
     self.classSet = IdSet(1)
     assert (self.classSet.getId("neg") == 1)
Exemplo n.º 19
0
 def __init__(self):
     ExampleBuilder.__init__(self)
     self.entityExampleBuilder = GeneralEntityRecognizer()
     self.edgeExampleBuilder = SimpleDependencyExampleBuilder2()
Exemplo n.º 20
0
    def __init__(self,
                 style=["typed", "directed", "headsOnly"],
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None,
                 gazetteer=None,
                 pathGazetteer=None,
                 negFrac=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        if gazetteer != None:
            print >> sys.stderr, "Loading gazetteer from", gazetteer
            self.gazetteer = Gazetteer.loadGztr(gazetteer)
        else:
            print >> sys.stderr, "No gazetteer loaded"
            self.gazetteer = None

        self.pathGazetteer = None
        self.pathGazetteerDependencies = None
        self.pathGazetteerPairs = None
        if pathGazetteer != None:
            print >> sys.stderr, "Loading path gazetteer from", pathGazetteer
            self.pathGazetteer = PathGazetteer.load(pathGazetteer)
            self.pathGazetteerDependencies = PathGazetteer.getDependencies(
                self.pathGazetteer)
            self.pathGazetteerPairs = PathGazetteer.getPairs(
                self.pathGazetteer)
        else:
            print >> sys.stderr, "No path gazetteer loaded"

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        self.styles = style
        self.negFrac = negFrac
        print >> sys.stderr, "Downsampling negatives to", negFrac
        self.negRand = random.Random()

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        if True:  #"noAnnType" in self.styles:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if "noMasking" in self.styles:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if "maxFeatures" in self.styles:
            self.multiEdgeFeatureBuilder.maximum = True

        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        #if "ontology" in self.styles:
        #    self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types

        self.eventsByOrigId = {}
        self.headTokensByOrigId = {}
        self.interSentenceEvents = set()

        self.examplesByEventOrigId = {}
        self.skippedByType = {}
        self.skippedByTypeAndReason = {}
        self.builtByType = {}

        self.gazMatchCache = {}
 def __init__(self):
     ExampleBuilder.__init__(self)
     self.featureBuilder = EdgeFeatureBuilder(self.featureSet)
 def __init__(self):
     ExampleBuilder.__init__(self)
     self.classSet = IdSet(1)
     assert( self.classSet.getId("neg") == 1 )
Exemplo n.º 23
0
    def __init__(self,
                 style=None,
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1
                or (len(classSet.Ids) == 2 and classSet.getId("neg") == -1))

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)

        self.styles = self.getParameters(style, [
            "typed", "directed", "headsOnly", "graph_kernel", "noAnnType",
            "noMasking", "maxFeatures", "genia_limits", "epi_limits",
            "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits",
            "genia_task1", "ontology", "nodalida", "bacteria_renaming",
            "trigger_features", "rel_features", "ddi_features", "evex",
            "giuliano", "random", "themeOnly", "causeOnly", "no_path",
            "entities", "skip_extra_triggers", "headsOnly", "graph_kernel",
            "trigger_features", "no_task", "no_dependency",
            "disable_entity_features", "disable_terminus_features",
            "disable_single_element_features", "disable_ngram_features",
            "disable_path_edge_features", "no_linear", "subset", "binary",
            "pos_only", "entity_type"
        ])
        if style == None:  # no parameters given
            style["typed"] = style["directed"] = style["headsOnly"] = True
#        self.styles = style
#        if "selftrain_group" in self.styles:
#            self.selfTrainGroups = set()
#            if "selftrain_group-1" in self.styles:
#                self.selfTrainGroups.add("-1")
#            if "selftrain_group0" in self.styles:
#                self.selfTrainGroups.add("0")
#            if "selftrain_group1" in self.styles:
#                self.selfTrainGroups.add("1")
#            if "selftrain_group2" in self.styles:
#                self.selfTrainGroups.add("2")
#            if "selftrain_group3" in self.styles:
#                self.selfTrainGroups.add("3")
#            print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        # NOTE Temporarily re-enabling predicted range
        #self.multiEdgeFeatureBuilder.definePredictedValueRange([], None)
        if self.styles["graph_kernel"]:
            from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
            self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(
                self.featureSet)
        if self.styles["noAnnType"]:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if self.styles["noMasking"]:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if self.styles["maxFeatures"]:
            self.multiEdgeFeatureBuilder.maximum = True
        if self.styles["genia_task1"]:
            self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity")
        self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        if self.styles["ontology"]:
            self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(
                self.featureSet)
        if self.styles["nodalida"]:
            self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(
                self.featureSet)
        if self.styles["bacteria_renaming"]:
            self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder(
                self.featureSet)
        if self.styles["trigger_features"]:
            self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
            self.triggerFeatureBuilder.useNonNameEntities = True
            if self.styles["genia_task1"]:
                self.triggerFeatureBuilder.filterAnnTypes.add("Entity")
            #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName)
        if self.styles["rel_features"]:
            self.relFeatureBuilder = RELFeatureBuilder(featureSet)
        if self.styles["ddi_features"]:
            self.drugFeatureBuilder = DrugFeatureBuilder(featureSet)
        if self.styles["evex"]:
            self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet)
        if self.styles["giuliano"]:
            self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types
        if self.styles["random"]:
            from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
            self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)