def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): global speculationWords if classSet == None: classSet = IdSet(1) assert (classSet.getId("neg") == 1) if featureSet == None: featureSet = IdSet() self.specWords, self.specWordStems = readWords(speculationWords) ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName != None: self.gazetteer = Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName else: self.gazetteer = None self.styles = self.getParameters(style, { "classification": "multiclass", "speculation_words": True }, {"classification": ("multiclass", "speculation", "negation")})
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(1) assert (classSet.getId("neg") == 1) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName != None: self.gazetteer = Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer = None self.styles = self.getParameters(style, [ "rel_features", "wordnet", "bb_features", "giuliano", "epi_merge_negated", "limit_merged_types", "genia_task1", "build_for_nameless", "pos_only", "all_tokens", "names", "pos_pairs", "linear_ngrams", "phospho" ]) # if "selftrain_group" in self.styles: # self.selfTrainGroups = set() # if "selftrain_group-1" in self.styles: # self.selfTrainGroups.add("-1") # if "selftrain_group0" in self.styles: # self.selfTrainGroups.add("0") # if "selftrain_group1" in self.styles: # self.selfTrainGroups.add("1") # if "selftrain_group2" in self.styles: # self.selfTrainGroups.add("2") # if "selftrain_group3" in self.styles: # self.selfTrainGroups.add("3") # print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups self.skiplist = set() if skiplist != None: f = open(skiplist, "rt") for line in f.readlines(): self.skiplist.add(line.strip()) f.close() if self.styles["rel_features"]: self.relFeatureBuilder = RELFeatureBuilder(featureSet) if self.styles["wordnet"]: self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet) if self.styles["bb_features"]: self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens( PhraseTriggerExampleBuilder.getBacteriaNames()) if self.styles["giuliano"]: self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) self.styles = style self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) self.triggerFeatureBuilder.useNonNameEntities = False
def __init__(self, style=None, classSet=None, featureSet=None): if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) self.styles = style self.timerBuildExamples = Timer(False) self.timerCrawl = Timer(False) self.timerCrawlPrecalc = Timer(False) self.timerMatrix = Timer(False) self.timerMatrixPrecalc = Timer(False)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): if classSet == None: classSet = IdSet(1) assert (classSet.getId("neg") == 1) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) self.styles = style self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) self.triggerFeatureBuilder.useNonNameEntities = False
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) if gazetteerFileName!=None: self.gazetteer=Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from",gazetteerFileName else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer=None self.styles = style
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName!=None: self.gazetteer=Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from",gazetteerFileName else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer=None self.styles = self.getParameters(style, ["rel_features", "wordnet", "bb_features", "giuliano", "epi_merge_negated", "limit_merged_types", "genia_task1", "build_for_nameless", "pos_only", "all_tokens", "names", "pos_pairs", "linear_ngrams", "phospho"]) # if "selftrain_group" in self.styles: # self.selfTrainGroups = set() # if "selftrain_group-1" in self.styles: # self.selfTrainGroups.add("-1") # if "selftrain_group0" in self.styles: # self.selfTrainGroups.add("0") # if "selftrain_group1" in self.styles: # self.selfTrainGroups.add("1") # if "selftrain_group2" in self.styles: # self.selfTrainGroups.add("2") # if "selftrain_group3" in self.styles: # self.selfTrainGroups.add("3") # print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups self.skiplist = set() if skiplist != None: f = open(skiplist, "rt") for line in f.readlines(): self.skiplist.add(line.strip()) f.close() if self.styles["rel_features"]: self.relFeatureBuilder = RELFeatureBuilder(featureSet) if self.styles["wordnet"]: self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet) if self.styles["bb_features"]: self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens(PhraseTriggerExampleBuilder.getBacteriaNames()) if self.styles["giuliano"]: self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(1) assert classSet.getId("neg") == 1 if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) # gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName != None: self.gazetteer = Gazetteer.loadGztr(gazetteerFileName) print >>sys.stderr, "Loaded gazetteer from", gazetteerFileName else: print >>sys.stderr, "No gazetteer loaded" self.gazetteer = None self.styles = style self.skiplist = set() if skiplist != None: f = open(skiplist, "rt") for line in f.readlines(): self.skiplist.add(line.strip()) f.close() self.styles = [ "trigger_features", "typed", "directed", "no_linear", "entities", "genia_limits", "noMasking", "maxFeatures", ] self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) if "graph_kernel" in self.styles: from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(self.featureSet) if "noAnnType" in self.styles: self.multiEdgeFeatureBuilder.noAnnType = True if "noMasking" in self.styles: self.multiEdgeFeatureBuilder.maskNamedEntities = False if "maxFeatures" in self.styles: self.multiEdgeFeatureBuilder.maximum = True self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(1) assert (classSet.getId("neg") == 1) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName != None: self.gazetteer = Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer = None self.styles = style self.skiplist = set() if skiplist != None: f = open(skiplist, "rt") for line in f.readlines(): self.skiplist.add(line.strip()) f.close() self.styles = [ "trigger_features", "typed", "directed", "no_linear", "entities", "genia_limits", "noMasking", "maxFeatures" ] self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) if "graph_kernel" in self.styles: from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder( self.featureSet) if "noAnnType" in self.styles: self.multiEdgeFeatureBuilder.noAnnType = True if "noMasking" in self.styles: self.multiEdgeFeatureBuilder.maskNamedEntities = False if "maxFeatures" in self.styles: self.multiEdgeFeatureBuilder.maximum = True self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName!=None: self.gazetteer=Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from",gazetteerFileName else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer=None self.styles = style self.excludedPOS = ["","(",")",",",".","CC","EX","FW","LS","MD","PDT","POS","PRP","PRP$","RBR","RBS","RP","WDT","WP","WP$","``"]
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): global speculationWords if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() self.specWords, self.specWordStems = readWords(speculationWords) ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName!=None: self.gazetteer=Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from",gazetteerFileName else: self.gazetteer=None self.styles = self.getParameters(style, {"classification":"multiclass", "speculation_words":True}, {"classification":("multiclass", "speculation", "negation")})