def classify(self, data, model, output, parse=None, task=None, goldData=None, workDir=None, fromStep=None, omitSteps=None, validate=False): model = self.openModel(model, "r") self.enterState(self.STATE_CLASSIFY) self.setWorkDir(workDir) if workDir == None: self.setTempWorkDir() model = self.openModel(model, "r") if parse == None: parse = self.getStr(self.tag+"parse", model) workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-") xml = self.classifyToXML(data, model, None, workOutputTag, model.get(self.tag+"classifier-model", defaultIfNotExist=None), goldData, parse, float(model.getStr("recallAdjustParameter", defaultIfNotExist=1.0))) if (validate): self.structureAnalyzer.load(model) self.structureAnalyzer.validate(xml) ETUtils.write(xml, output+"-pred.xml.gz") else: shutil.copy2(workOutputTag+self.tag+"pred.xml.gz", output+"-pred.xml.gz") EvaluateInteractionXML.run(self.evaluator, xml, data, parse) stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model) if stParams["convert"]: #self.useBioNLPSTFormat: extension = ".zip" if (stParams["convert"] == "zip") else ".tar.gz" Utils.STFormat.ConvertXML.toSTFormat(xml, output+"-events" + extension, outputTag=stParams["a2Tag"], writeExtra=(stParams["scores"] == True)) if stParams["evaluate"]: #self.stEvaluator != None: if task == None: task = self.getStr(self.tag+"task", model) self.stEvaluator.evaluate(output+"-events" + extension, task) self.deleteTempWorkDir() self.exitState()
def evaluateChemProt(xml, gold): EvaluateIXML.run(AveragingMultiClassEvaluator, xml, gold, "McCC") preprocessor = Preprocessor(steps=["EXPORT_CHEMPROT"]) tempDir = tempfile.mkdtemp() print >> sys.stderr, "Using temporary evaluation directory", tempDir tsvPath = os.path.join(tempDir, "predictions.tsv") preprocessor.process(xml, tsvPath) ChemProtEvaluator().evaluateTSV(tsvPath, tempDir) print >> sys.stderr, "Removing temporary evaluation directory", tempDir shutil.rmtree(tempDir)
def evaluateChemProt(xml, gold): EvaluateIXML.run(AveragingMultiClassEvaluator, xml, gold, "McCC") preprocessor = Preprocessor(steps=["EXPORT_CHEMPROT"]) tempDir = tempfile.mkdtemp() print >> sys.stderr, "Using temporary evaluation directory", tempDir tsvPath = os.path.join(tempDir, "predictions.tsv") preprocessor.process(xml, tsvPath) ChemProtEvaluator().evaluateTSV(tsvPath, tempDir) print >> sys.stderr, "Removing temporary evaluation directory", tempDir shutil.rmtree(tempDir)
def classify(self, data, model, output, parse=None, task=None): self.enterState(self.STATE_CLASSIFY) model = self.openModel(model, "r") if parse == None: parse = self.getStr(self.tag+"parse", model) if task == None: task = self.getStr(self.tag+"task", model) xml = self.classifyToXML(data, model, None, output + "-", model.get(self.tag+"classifier-model"), None, parse, float(model.get("recallAdjustParameter"))) EvaluateInteractionXML.run(self.evaluator, xml, data, parse) STFormat.ConvertXML.toSTFormat(xml, output+".tar.gz", outputTag="a2") if self.stEvaluator != None: self.stEvaluator.evaluate(output+".tar.gz", task) self.exitState()
def trainUnmergingDetector(self): xml = None if not self.unmerging: print >> sys.stderr, "No unmerging" if self.checkStep("SELF-TRAIN-EXAMPLES-FOR-UNMERGING", self.unmerging) and self.unmerging: # Self-classified train data for unmerging if self.doUnmergingSelfTraining: # This allows limiting to a subcorpus triggerStyle = copy.copy(Parameters.get(self.triggerExampleStyle)) edgeStyle = copy.copy(Parameters.get(self.edgeExampleStyle)) unmergingStyle = Parameters.get(self.unmergingExampleStyle) if "sentenceLimit" in unmergingStyle and unmergingStyle["sentenceLimit"]: triggerStyle["sentenceLimit"] = unmergingStyle["sentenceLimit"] edgeStyle["sentenceLimit"] = unmergingStyle["sentenceLimit"] # Build the examples xml = self.triggerDetector.classifyToXML(self.trainData, self.model, None, self.workDir+"unmerging-extra-", exampleStyle=triggerStyle)#, recallAdjust=0.5) xml = self.edgeDetector.classifyToXML(xml, self.model, None, self.workDir+"unmerging-extra-", exampleStyle=edgeStyle)#, recallAdjust=0.5) assert xml != None EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.trainData, self.parse) else: print >> sys.stderr, "No self-training for unmerging" if self.checkStep("UNMERGING-EXAMPLES", self.unmerging) and self.unmerging: # Unmerging example generation GOLD_TEST_FILE = self.optData.replace("-nodup", "") GOLD_TRAIN_FILE = self.trainData.replace("-nodup", "") if self.doUnmergingSelfTraining: if xml == None: xml = self.workDir+"unmerging-extra-edge-pred.xml.gz" self.unmergingDetector.buildExamples(self.model, [self.optData.replace("-nodup", ""), [self.trainData.replace("-nodup", ""), xml]], [self.workDir+"unmerging-opt-examples.gz", self.workDir+"unmerging-train-examples.gz"], [GOLD_TEST_FILE, [GOLD_TRAIN_FILE, GOLD_TRAIN_FILE]], exampleStyle=self.unmergingExampleStyle, saveIdsToModel=True) xml = None else: self.unmergingDetector.buildExamples(self.model, [self.optData.replace("-nodup", ""), self.trainData.replace("-nodup", "")], [self.workDir+"unmerging-opt-examples.gz", self.workDir+"unmerging-train-examples.gz"], [GOLD_TEST_FILE, GOLD_TRAIN_FILE], exampleStyle=self.unmergingExampleStyle, saveIdsToModel=True) xml = None #UnmergingExampleBuilder.run("/home/jari/biotext/EventExtension/TrainSelfClassify/test-predicted-edges.xml", GOLD_TRAIN_FILE, UNMERGING_TRAIN_EXAMPLE_FILE, PARSE, TOK, UNMERGING_FEATURE_PARAMS, UNMERGING_IDS, append=True) if self.checkStep("BEGIN-UNMERGING-MODEL", self.unmerging) and self.unmerging: self.unmergingDetector.beginModel(None, self.model, self.workDir+"unmerging-train-examples.gz", self.workDir+"unmerging-opt-examples.gz") if self.checkStep("END-UNMERGING-MODEL", self.unmerging) and self.unmerging: self.unmergingDetector.endModel(None, self.model, self.workDir+"unmerging-opt-examples.gz") print >> sys.stderr, "Adding unmerging classifier model to test-set event model" if self.combinedModel != None: self.combinedModel.addStr("unmerging-example-style", self.model.getStr("unmerging-example-style")) self.combinedModel.insert(self.model.get("unmerging-ids.classes"), "unmerging-ids.classes") self.combinedModel.insert(self.model.get("unmerging-ids.features"), "unmerging-ids.features") self.unmergingDetector.addClassifierModel(self.combinedModel, self.model.get("unmerging-classifier-model", True), self.model.getStr("unmerging-classifier-parameter")) self.combinedModel.save()
def classify(self, data, model, output, parse=None, task=None): self.enterState(self.STATE_CLASSIFY) model = self.openModel(model, "r") if parse == None: parse = self.getStr(self.tag + "parse", model) if task == None: task = self.getStr(self.tag + "task", model) xml = self.classifyToXML(data, model, None, output + "-", model.get(self.tag + "classifier-model"), None, parse, float(model.get("recallAdjustParameter"))) EvaluateInteractionXML.run(self.evaluator, xml, data, parse) STFormat.ConvertXML.toSTFormat(xml, output + ".tar.gz", outputTag="a2") if self.stEvaluator != None: self.stEvaluator.evaluate(output + ".tar.gz", task) self.exitState()
def classify(self, data, model, output, parse=None, task=None, goldData=None, workDir=None, fromStep=None, omitSteps=None, validate=False): model = self.openModel(model, "r") self.enterState(self.STATE_CLASSIFY) self.setWorkDir(workDir) if workDir == None: self.setTempWorkDir() model = self.openModel(model, "r") if parse == None: parse = self.getStr(self.tag + "parse", model) workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-") xml = self.classifyToXML( data, model, None, workOutputTag, model.get(self.tag + "classifier-model", defaultIfNotExist=None), goldData, parse, float(model.getStr("recallAdjustParameter", defaultIfNotExist=1.0))) if (validate): self.structureAnalyzer.load(model) self.structureAnalyzer.validate(xml) ETUtils.write(xml, output + "-pred.xml.gz") else: shutil.copy2(workOutputTag + self.tag + "pred.xml.gz", output + "-pred.xml.gz") EvaluateInteractionXML.run(self.evaluator, xml, data, parse) stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model) if stParams["convert"]: #self.useBioNLPSTFormat: extension = ".zip" if (stParams["convert"] == "zip") else ".tar.gz" Utils.STFormat.ConvertXML.toSTFormat( xml, output + "-events" + extension, outputTag=stParams["a2Tag"], writeExtra=(stParams["scores"] == True)) if stParams["evaluate"]: #self.stEvaluator != None: if task == None: task = self.getStr(self.tag + "task", model) self.stEvaluator.evaluate(output + "-events" + extension, task) self.deleteTempWorkDir() self.exitState()
def evaluateGrid(self, xml, params, bestResults): if xml != None: # TODO: Where should the EvaluateInteractionXML evaluator come from? EIXMLResult = EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.optData, self.parse) # Convert to ST-format if self.unmerging: xml = self.unmergingDetector.classifyToXML(xml, self.model, None, self.workDir+"grid-", goldData=self.optData) #self.structureAnalyzer.validate(xml) if self.bioNLPSTParams["evaluate"]: Utils.STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-unmerging-geniaformat", "a2") stFormatDir = self.workDir+"grid-unmerging-geniaformat" elif self.bioNLPSTParams["evaluate"]: #self.structureAnalyzer.validate(xml) Utils.STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-flat-geniaformat", "a2") #getA2FileTag(options.task, subTask)) stFormatDir = self.workDir+"grid-flat-geniaformat" # Evaluation # Attempt shared task evaluation stEvaluation = None if self.bioNLPSTParams["evaluate"]: stEvaluation = self.stEvaluator.evaluate(stFormatDir, self.task) if stEvaluation != None: if bestResults == None or stEvaluation[0] > bestResults[1][0]: bestResults = (params, stEvaluation, stEvaluation[0]) else: # If shared task evaluation was not done (failed or not requested) fall back to internal evaluation if bestResults == None or EIXMLResult.getData().fscore > bestResults[1].getData().fscore: bestResults = (params, EIXMLResult, EIXMLResult.getData().fscore) # Remove ST-format files if os.path.exists(self.workDir+"grid-flat-geniaformat"): shutil.rmtree(self.workDir+"grid-flat-geniaformat") if os.path.exists(self.workDir+"grid-unmerging-geniaformat"): shutil.rmtree(self.workDir+"grid-unmerging-geniaformat") else: print >> sys.stderr, "No predicted edges" return bestResults
def evaluateGrid(self, xml, params, bestResults): if xml != None: # TODO: Where should the EvaluateInteractionXML evaluator come from? EIXMLResult = EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.optData, self.parse) # Convert to ST-format STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-flat-geniaformat", "a2") #getA2FileTag(options.task, subTask)) stFormatDir = self.workDir+"grid-flat-geniaformat" if self.unmerging: xml = self.unmergingDetector.classifyToXML(xml, self.model, None, self.workDir+"grid-", goldData=self.optData.replace("-nodup", "")) STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-unmerging-geniaformat", "a2") stFormatDir = self.workDir+"grid-unmerging-geniaformat" stEvaluation = self.stEvaluator.evaluate(stFormatDir, self.task) if stEvaluation != None: if bestResults == None or stEvaluation[0] > bestResults[1][0]: bestResults = (params, stEvaluation, stEvaluation[0]) else: if bestResults == None or EIXMLResult.getData().fscore > bestResults[1].getData().fscore: bestResults = (params, EIXMLResult, EIXMLResult.getData().fscore) shutil.rmtree(self.workDir+"grid-flat-geniaformat") if os.path.exists(self.workDir+"grid-unmerging-geniaformat"): shutil.rmtree(self.workDir+"grid-unmerging-geniaformat") else: print >> sys.stderr, "No predicted edges" return bestResults
def classify(self, data, model, output, parse=None, task=None, fromStep=None, toStep=None): BINARY_RECALL_MODE = False # TODO: make a parameter xml = None self.initVariables(classifyData=data, model=model, xml=None, task=task, parse=parse) self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep) #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep) self.model = self.openModel(self.model, "r") if self.checkStep("TRIGGERS"): xml = self.triggerDetector.classifyToXML(self.classifyData, self.model, None, output + "-", parse=self.parse, recallAdjust=float(self.getStr("recallAdjustParameter", self.model))) if self.checkStep("EDGES"): xml = self.getWorkFile(xml, output + "-recall-adjusted.xml.gz") xml = self.edgeDetector.classifyToXML(xml, self.model, None, output + "-", parse=self.parse) assert xml != None if self.parse == None: edgeParse = self.getStr(self.edgeDetector.tag+"parse", self.model) else: edgeParse = self.parse #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, None, edgeParse) if self.checkStep("UNMERGING"): if self.model.hasMember("unmerging-classifier-model"): #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz") # To avoid running out of memory, always use file on disk xml = self.getWorkFile(None, output + "-edge-pred.xml.gz") goldData = None if type(self.classifyData) in types.StringTypes: if os.path.exists(self.classifyData.replace("-nodup", "")): goldData = self.classifyData.replace("-nodup", "") xml = self.unmergingDetector.classifyToXML(xml, self.model, None, output + "-", goldData=goldData, parse=self.parse) else: print >> sys.stderr, "No model for unmerging" if self.checkStep("MODIFIERS"): if self.model.hasMember("modifier-classifier-model"): xml = self.getWorkFile(xml, [output + "-unmerging-pred.xml.gz", output + "-edge-pred.xml.gz"]) xml = self.modifierDetector.classifyToXML(xml, self.model, None, output + "-", parse=self.parse) else: print >> sys.stderr, "No model for modifier detection" if self.checkStep("ST-CONVERT"): xml = self.getWorkFile(xml, [output + "-modifier-pred.xml.gz", output + "-unmerging-pred.xml.gz", output + "-edge-pred.xml.gz"]) STFormat.ConvertXML.toSTFormat(xml, output+"-events.tar.gz", outputTag="a2", writeScores=self.stWriteScores) if self.stEvaluator != None: task = self.task if task == None: task = self.getStr(self.edgeDetector.tag+"task", self.model) self.stEvaluator.evaluate(output + "-events.tar.gz", task) self.exitState()
def classify(self, data, model, output, parse=None, task=None, goldData=None, workDir=None, fromStep=None, omitSteps=None): model = self.openModel(model, "r") self.enterState(self.STATE_CLASSIFY) self.setWorkDir(workDir) if workDir == None: self.setTempWorkDir() model = self.openModel(model, "r") if parse == None: parse = self.getStr(self.tag+"parse", model) if task == None: task = self.getStr(self.tag+"task", model) workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-") xml = self.classifyToXML(data, model, None, workOutputTag, model.get(self.tag+"classifier-model"), goldData, parse, float(model.getStr("recallAdjustParameter", defaultIfNotExist=1.0))) shutil.copy2(workOutputTag+self.tag+"pred.xml.gz", output+"-pred.xml.gz") EvaluateInteractionXML.run(self.evaluator, xml, data, parse) stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model) if stParams["convert"]: #self.useBioNLPSTFormat: Utils.STFormat.ConvertXML.toSTFormat(xml, output+"-events.tar.gz", outputTag="a2") if stParams["evaluate"]: #self.stEvaluator != None: self.stEvaluator.evaluate(output+"-events.tar.gz", task) self.deleteTempWorkDir() self.exitState()
def evaluateGrid(self, xml, params, bestResults): #traceback.print_stack() #pdb.set_trace() if xml != None: # TODO: Where should the EvaluateInteractionXML evaluator come from? EIXMLResult = EvaluateInteractionXML.run( self.edgeDetector.evaluator, xml, self.optData, self.parse) # Convert to ST-format if self.unmerging: xml = self.unmergingDetector.classifyToXML( xml, self.model, None, self.workDir + "grid-", goldData=self.optData) #self.structureAnalyzer.validate(xml) if self.bioNLPSTParams["evaluate"]: Utils.STFormat.ConvertXML.toSTFormat( xml, self.workDir + "grid-unmerging-geniaformat", "a2") stFormatDir = self.workDir + "grid-unmerging-geniaformat" elif self.bioNLPSTParams["evaluate"]: #self.structureAnalyzer.validate(xml) Utils.STFormat.ConvertXML.toSTFormat( xml, self.workDir + "grid-flat-geniaformat", "a2") #getA2FileTag(options.task, subTask)) stFormatDir = self.workDir + "grid-flat-geniaformat" # Evaluation # Attempt shared task evaluation stEvaluation = None if self.bioNLPSTParams["evaluate"]: stEvaluation = self.stEvaluator.evaluate( stFormatDir, self.task) if stEvaluation != None: if bestResults == None or stEvaluation[0] > bestResults[1][0]: bestResults = (params, stEvaluation, stEvaluation[0]) else: # If shared task evaluation was not done (failed or not requested) fall back to internal evaluation if bestResults == None or EIXMLResult.getData( ).fscore > bestResults[1].getData().fscore: bestResults = (params, EIXMLResult, EIXMLResult.getData().fscore) # Remove ST-format files if os.path.exists(self.workDir + "grid-flat-geniaformat"): shutil.rmtree(self.workDir + "grid-flat-geniaformat") if os.path.exists(self.workDir + "grid-unmerging-geniaformat"): shutil.rmtree(self.workDir + "grid-unmerging-geniaformat") else: print >> sys.stderr, "No predicted edges" return bestResults
def evaluateGrid(self, xml, params, bestResults): if xml != None: # TODO: Where should the EvaluateInteractionXML evaluator come from? EIXMLResult = EvaluateInteractionXML.run( self.edgeDetector.evaluator, xml, self.optData, self.parse) # Convert to ST-format STFormat.ConvertXML.toSTFormat( xml, self.workDir + "grid-flat-geniaformat", "a2") #getA2FileTag(options.task, subTask)) stFormatDir = self.workDir + "grid-flat-geniaformat" if self.unmerging: xml = self.unmergingDetector.classifyToXML( xml, self.model, None, self.workDir + "grid-", goldData=self.optData.replace("-nodup", "")) STFormat.ConvertXML.toSTFormat( xml, self.workDir + "grid-unmerging-geniaformat", "a2") stFormatDir = self.workDir + "grid-unmerging-geniaformat" stEvaluation = self.stEvaluator.evaluate(stFormatDir, self.task) if stEvaluation != None: if bestResults == None or stEvaluation[0] > bestResults[1][0]: bestResults = (params, stEvaluation, stEvaluation[0]) else: if bestResults == None or EIXMLResult.getData( ).fscore > bestResults[1].getData().fscore: bestResults = (params, EIXMLResult, EIXMLResult.getData().fscore) shutil.rmtree(self.workDir + "grid-flat-geniaformat") if os.path.exists(self.workDir + "grid-unmerging-geniaformat"): shutil.rmtree(self.workDir + "grid-unmerging-geniaformat") else: print >> sys.stderr, "No predicted edges" return bestResults
CLASSIFIER_PARAMS="c:25000,50000,87500" WORKDIR="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest" PARSE_TOK="split-Charniak-Lease" workdir(WORKDIR, False) log() # Trigger detection #Gazetteer.run(TRAIN_FILE, "gazetteer-train") #GeneralEntityTypeRecognizer.run(TRAIN_FILE, "trigger-train-examples", PARSE_TOK, PARSE_TOK, "style:typed", "trigger-ids") GeneralEntityTypeRecognizer.run(TEST_FILE, "trigger-test-examples", PARSE_TOK, PARSE_TOK, "style:typed", "trigger-ids") Cls.test("trigger-test-examples", "trigger-param-opt/model-c_75000", "trigger-test-classifications") evaluator = Ev.evaluate("trigger-test-examples", "trigger-test-classifications", "trigger-ids.class_names") #evaluator = optimize(Cls, Ev, "trigger-train-examples", "trigger-test-examples",\ # "trigger-ids.class_names", CLASSIFIER_PARAMS, "trigger-param-opt")[0] ExampleUtils.writeToInteractionXML(evaluator.classifications, TEST_FILE, "test-predicted-triggers.xml", "trigger-ids.class_names", PARSE_TOK, PARSE_TOK) # RecallAdjust.run("test-predicted-triggers.xml",1.0,"test-predicted-triggers-adj.xml") # ix.splitMergedElements("test-predicted-triggers-adj.xml", "test-predicted-triggers-adj-split.xml") # ix.recalculateIds("test-predicted-triggers-adj-split.xml", "test-predicted-triggers-adj-split-recids.xml", True) # EvaluateInteractionXML.run(Ev, "test-predicted-triggers-adj-split-recids.xml", GOLD_TEST_FILE, PARSE_TOK, PARSE_TOK) ix.splitMergedElements("test-predicted-triggers.xml", "test-predicted-triggers-split.xml") ix.recalculateIds("test-predicted-triggers-split.xml", "test-predicted-triggers-split-recids.xml", True) EvaluateInteractionXML.run(Ev, "test-predicted-triggers-split-recids.xml", GOLD_TEST_FILE, PARSE_TOK, PARSE_TOK)
def trainUnmergingDetector(self): xml = None if not self.unmerging: print >> sys.stderr, "No unmerging" if self.checkStep("SELF-TRAIN-EXAMPLES-FOR-UNMERGING", self.unmerging) and self.unmerging: # Self-classified train data for unmerging if self.doUnmergingSelfTraining: # This allows limiting to a subcorpus triggerStyle = copy.copy( Parameters.get(self.triggerExampleStyle)) edgeStyle = copy.copy(Parameters.get(self.edgeExampleStyle)) unmergingStyle = Parameters.get(self.unmergingExampleStyle) if "sentenceLimit" in unmergingStyle and unmergingStyle[ "sentenceLimit"]: triggerStyle["sentenceLimit"] = unmergingStyle[ "sentenceLimit"] edgeStyle["sentenceLimit"] = unmergingStyle[ "sentenceLimit"] # Build the examples xml = self.triggerDetector.classifyToXML( self.trainData, self.model, None, self.workDir + "unmerging-extra-", exampleStyle=triggerStyle) #, recallAdjust=0.5) xml = self.edgeDetector.classifyToXML( xml, self.model, None, self.workDir + "unmerging-extra-", exampleStyle=edgeStyle) #, recallAdjust=0.5) assert xml != None EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.trainData, self.parse) else: print >> sys.stderr, "No self-training for unmerging" if self.checkStep("UNMERGING-EXAMPLES", self.unmerging) and self.unmerging: # Unmerging example generation GOLD_TEST_FILE = self.optData.replace("-nodup", "") GOLD_TRAIN_FILE = self.trainData.replace("-nodup", "") if self.doUnmergingSelfTraining: if xml == None: xml = self.workDir + "unmerging-extra-edge-pred.xml.gz" self.unmergingDetector.buildExamples( self.model, [ self.optData.replace("-nodup", ""), [self.trainData.replace("-nodup", ""), xml] ], [ self.workDir + "unmerging-opt-examples.gz", self.workDir + "unmerging-train-examples.gz" ], [GOLD_TEST_FILE, [GOLD_TRAIN_FILE, GOLD_TRAIN_FILE]], exampleStyle=self.unmergingExampleStyle, saveIdsToModel=True) xml = None else: self.unmergingDetector.buildExamples( self.model, [ self.optData.replace("-nodup", ""), self.trainData.replace("-nodup", "") ], [ self.workDir + "unmerging-opt-examples.gz", self.workDir + "unmerging-train-examples.gz" ], [GOLD_TEST_FILE, GOLD_TRAIN_FILE], exampleStyle=self.unmergingExampleStyle, saveIdsToModel=True) xml = None #UnmergingExampleBuilder.run("/home/jari/biotext/EventExtension/TrainSelfClassify/test-predicted-edges.xml", GOLD_TRAIN_FILE, UNMERGING_TRAIN_EXAMPLE_FILE, PARSE, TOK, UNMERGING_FEATURE_PARAMS, UNMERGING_IDS, append=True) if self.checkStep("BEGIN-UNMERGING-MODEL", self.unmerging) and self.unmerging: self.unmergingDetector.beginModel( None, self.model, self.workDir + "unmerging-train-examples.gz", self.workDir + "unmerging-opt-examples.gz") if self.checkStep("END-UNMERGING-MODEL", self.unmerging) and self.unmerging: self.unmergingDetector.endModel( None, self.model, self.workDir + "unmerging-opt-examples.gz") print >> sys.stderr, "Adding unmerging classifier model to test-set event model" if self.combinedModel != None: self.combinedModel.addStr( "unmerging-example-style", self.model.getStr("unmerging-example-style")) self.combinedModel.insert( self.model.get("unmerging-ids.classes"), "unmerging-ids.classes") self.combinedModel.insert( self.model.get("unmerging-ids.features"), "unmerging-ids.features") self.unmergingDetector.addClassifierModel( self.combinedModel, self.model.get("unmerging-classifier-model", True), self.model.getStr("unmerging-classifier-parameter")) self.combinedModel.save()
def train(output, task=None, detector=None, inputFiles=None, models=None, parse=None, processUnmerging=None, processModifiers=None, bioNLPSTParams=None, preprocessorParams=None, exampleStyles=None, classifierParams=None, doFullGrid=False, deleteOutput=False, copyFrom=None, log="log.txt", step=None, omitSteps=None, debug=False, connection=None, subset=None, folds=None, corpusDir=None, corpusPreprocessing=None, evaluator=None): """ Train a new model for event or relation detection. @param output: A directory where output files will appear. @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks. @param detector: a Detector object, or a string defining one to be imported @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test" @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test" @param parse: The parse element name in the training interaction XML @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default. @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default. @param bioNLPSTParams: Parameters controlling BioNLP ST format output. @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying. @param exampleStyles: A parameter set for controlling example builders. @param classifierParams: A parameter set for controlling classifiers. @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search. @param deleteOutput: Remove an existing output directory @param copyFrom: Copy an existing output directory for use as a template @param log: An optional alternative name for the log file. None is for no logging. @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST" @param omitSteps: step=substep parameters, where multiple substeps can be defined. @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved @param connection: A parameter set defining a local or remote connection for training the classifier @param subset: A parameter set for making subsets of input files """ # Insert default arguments where needed inputFiles = setDictDefaults(inputFiles, {"train":None, "devel":None, "test":None}) models = setDictDefaults(models, {"devel":"model-devel", "test":"model-test"}) exampleStyles = setDictDefaults(exampleStyles, {"examples":None, "trigger":None, "edge":None, "unmerging":None, "modifiers":None}) classifierParams = setDictDefaults(classifierParams, {"examples":None, "trigger":None, "recall":None, "edge":None, "unmerging":None, "modifiers":None}) subset = setDictDefaults(Parameters.get(subset), {"train":None, "devel":None, "test":None, "seed":0, "all":None}) folds = setDictDefaults(folds, {"train":None, "devel":None, "test":None}) processUnmerging = getDefinedBool(processUnmerging) processModifiers = getDefinedBool(processModifiers) # Initialize working directory workdir(output, deleteOutput, copyFrom, log) # Get task specific parameters useKerasDetector = False if detector != None and "keras" in detector.lower(): print >> sys.stderr, "Using a Keras Detector" useKerasDetector = True if detector.lower() == "keras": detector = None detector, bioNLPSTParams, preprocessorParams, folds = getTaskSettings(task, detector, bioNLPSTParams, preprocessorParams, inputFiles, exampleStyles, classifierParams, folds, corpusDir=corpusDir, useKerasDetector=useKerasDetector) # Learn training settings from input files detector = learnSettings(inputFiles, detector, classifierParams, task, exampleStyles, useKerasDetector=useKerasDetector) # Get corpus subsets getFolds(inputFiles, folds) getSubsets(inputFiles, subset) if task != None: task = task.replace("-FULL", "") if "." in task: _, subTask = getSubTask(task) if subTask != 3: processModifiers = False # Preprocess the corpus if required if corpusPreprocessing != None: preprocessor = Preprocessor(steps=corpusPreprocessing) assert preprocessor.steps[0].name == "MERGE_SETS" assert preprocessor.steps[-1].name == "DIVIDE_SETS" preprocessedCorpusDir = os.path.join(output, "corpus") #outputFiles = {x:os.path.join(preprocessedCorpusDir, os.path.basename(inputFiles[x])) for x in inputFiles} preprocessor.process(inputFiles, os.path.join(preprocessedCorpusDir, task)) #inputFiles = outputFiles for setName in inputFiles.keys(): if inputFiles[setName] != None: inputFiles[setName] = os.path.join(preprocessedCorpusDir, task + "-" + setName + ".xml") # Define processing steps selector, detectorSteps, omitDetectorSteps = getSteps(step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"]) # Initialize the detector detector, detectorName = getDetector(detector, evaluator=evaluator) evaluator, evaluatorName = importClass(evaluator, "evaluator") detector = detector() # initialize object if evaluator != None: print >> sys.stderr, "Using evaluator", evaluator.__name__ detector.evaluator = evaluator detector.debug = debug detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams(bioNLPSTParams) #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format #detector.stWriteScores = True # write confidence scores into additional st-format files connection = getConnection(connection) detector.setConnection(connection) connection.debug = debug if deleteOutput: connection.clearWorkDir() # Train if selector.check("TRAIN"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------------ Train Detector ------------------" print >> sys.stderr, "----------------------------------------------------" if not isinstance(detector, EventDetector): detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], exampleStyles["examples"], classifierParams["examples"], parse, None, task, fromStep=detectorSteps["TRAIN"], workDir="training", testData=inputFiles["test"]) else: detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], exampleStyles["trigger"], exampleStyles["edge"], exampleStyles["unmerging"], exampleStyles["modifiers"], classifierParams["trigger"], classifierParams["edge"], classifierParams["unmerging"], classifierParams["modifiers"], classifierParams["recall"], processUnmerging, processModifiers, doFullGrid, task, parse, None, fromStep=detectorSteps["TRAIN"], workDir="training", testData=inputFiles["test"]) # Save the detector type for model in [models["devel"], models["test"]]: if model != None and os.path.exists(model): model = Model(model, "a") model.addStr("detector", detectorName) if evaluatorName != None: model.addStr("detector", evaluatorName) if preprocessorParams != None: preprocessor = Preprocessor() model.addStr("preprocessorParams", Parameters.toString(preprocessor.getParameters(preprocessorParams))) model.save() model.close() if selector.check("DEVEL"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Check devel classification ------------" print >> sys.stderr, "----------------------------------------------------" #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files detector.classify(inputFiles["devel"], models["devel"], "classification-devel/devel", goldData=inputFiles["devel"], fromStep=detectorSteps["DEVEL"], workDir="classification-devel") if selector.check("EMPTY"): # By passing an emptied devel set through the prediction system, we can check that we get the same predictions # as in the DEVEL step, ensuring the model does not use leaked information. print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Empty devel classification ------------" print >> sys.stderr, "----------------------------------------------------" #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files removalScope = "non-given" if "names" in str(exampleStyles["examples"]) or "names" in str(exampleStyles["trigger"]): removalScope = "all" elif "Edge" in detector.__class__.__name__: removalScope = "interactions" detector.classify(getEmptyCorpus(inputFiles["devel"], scope=removalScope), models["devel"], "classification-empty/devel-empty", fromStep=detectorSteps["EMPTY"], workDir="classification-empty") print >> sys.stderr, "*** Evaluate empty devel classification ***" if os.path.exists("classification-empty/devel-empty-pred.xml.gz"): EvaluateInteractionXML.run(detector.evaluator, "classification-empty/devel-empty-pred.xml.gz", inputFiles["devel"], parse) else: print >> sys.stderr, "No output file for evaluation" if selector.check("TEST"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------- Test set classification --------------" print >> sys.stderr, "----------------------------------------------------" if inputFiles["test"] == None or not os.path.exists(inputFiles["test"]): print >> sys.stderr, "Skipping, test file", inputFiles["test"], "does not exist" else: #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files detector.classify(inputFiles["test"], models["test"] if models["test"] != None else models["devel"], "classification-test/test", fromStep=detectorSteps["TEST"], workDir="classification-test") if detector.bioNLPSTParams["convert"]: extension = ".zip" if (detector.bioNLPSTParams["convert"] == "zip") else ".tar.gz" Utils.STFormat.Compare.compare("classification-test/test-events" + extension, "classification-devel/devel-events" + extension, "a2") # Stop logging if log != None: Stream.closeLog(log)
def classify(self, data, model, output, parse=None, task=None, goldData=None, fromStep=None, toStep=None, omitSteps=None, workDir=None): #BINARY_RECALL_MODE = False # TODO: make a parameter xml = None model = self.openModel(model, "r") self.initVariables(classifyData=data, model=model, xml=None, task=task, parse=parse) self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep, omitSteps) #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep) self.setWorkDir(workDir) if workDir == None: self.setTempWorkDir() workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-") self.model = self.openModel(self.model, "r") stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model) if self.checkStep("TRIGGERS"): xml = self.triggerDetector.classifyToXML(self.classifyData, self.model, None, workOutputTag, goldData=goldData, parse=self.parse, recallAdjust=float(self.getStr("recallAdjustParameter", self.model))) if self.checkStep("EDGES"): xml = self.getWorkFile(xml, workOutputTag + "trigger-pred.xml.gz") xml = self.edgeDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) assert xml != None if self.parse == None: edgeParse = self.getStr(self.edgeDetector.tag+"parse", self.model) else: edgeParse = self.parse #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) if goldData != None: EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, goldData, edgeParse) else: EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) if self.checkStep("UNMERGING"): if self.model.getStr("unmerging-classifier-parameter", None) != None: #self.model.hasMember("unmerging-classifier-model"): #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz") # To avoid running out of memory, always use file on disk xml = self.getWorkFile(None, workOutputTag + "edge-pred.xml.gz") #goldData = None #if type(self.classifyData) in types.StringTypes: # if os.path.exists(self.classifyData.replace("-nodup", "")): # goldData = self.classifyData.replace("-nodup", "") xml = self.unmergingDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) # Evaluate after unmerging if self.parse == None: edgeParse = self.getStr(self.edgeDetector.tag+"parse", self.model) else: edgeParse = self.parse if goldData != None: EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, goldData, edgeParse) else: EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) else: print >> sys.stderr, "No model for unmerging" if self.checkStep("MODIFIERS"): if self.model.hasMember("modifier-classifier-model"): xml = self.getWorkFile(xml, [workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) xml = self.modifierDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) else: print >> sys.stderr, "No model for modifier detection" # if self.checkStep("VALIDATE"): # xml = self.getWorkFile(xml, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) # self.structureAnalyzer.load(model) # self.structureAnalyzer.validate(xml) # ETUtils.write(xml, workOutputTag + "validate-pred.xml.gz") if self.checkStep("ST-CONVERT"): if stParams["convert"]: #xml = self.getWorkFile(xml, [workOutputTag + "validate-pred.xml.gz", workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) xml = self.getWorkFile(xml, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) Utils.STFormat.ConvertXML.toSTFormat(xml, output+"-events.tar.gz", outputTag=stParams["a2Tag"], writeExtra=(stParams["scores"] == True)) if stParams["evaluate"]: #self.stEvaluator != None: task = self.task if task == None: task = self.getStr(self.edgeDetector.tag+"task", self.model) self.stEvaluator.evaluate(output + "-events.tar.gz", task) else: print >> sys.stderr, "No BioNLP shared task format conversion" finalXMLFile = self.getWorkFile(None, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) if finalXMLFile != None: shutil.copy2(finalXMLFile, output+"-pred.xml.gz") self.deleteTempWorkDir() self.exitState()
def classify(self, data, model, output, parse=None, task=None, fromStep=None, toStep=None): BINARY_RECALL_MODE = False # TODO: make a parameter xml = None self.initVariables(classifyData=data, model=model, xml=None, task=task, parse=parse) self.enterState( self.STATE_CLASSIFY, ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep) #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep) self.model = self.openModel(self.model, "r") if self.checkStep("TRIGGERS"): xml = self.triggerDetector.classifyToXML( self.classifyData, self.model, None, output + "-", parse=self.parse, recallAdjust=float( self.getStr("recallAdjustParameter", self.model))) if self.checkStep("EDGES"): xml = self.getWorkFile(xml, output + "-recall-adjusted.xml.gz") xml = self.edgeDetector.classifyToXML(xml, self.model, None, output + "-", parse=self.parse) assert xml != None if self.parse == None: edgeParse = self.getStr(self.edgeDetector.tag + "parse", self.model) else: edgeParse = self.parse #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, None, edgeParse) if self.checkStep("UNMERGING"): if self.model.hasMember("unmerging-classifier-model"): #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz") # To avoid running out of memory, always use file on disk xml = self.getWorkFile(None, output + "-edge-pred.xml.gz") goldData = None if type(self.classifyData) in types.StringTypes: if os.path.exists(self.classifyData.replace("-nodup", "")): goldData = self.classifyData.replace("-nodup", "") xml = self.unmergingDetector.classifyToXML(xml, self.model, None, output + "-", goldData=goldData, parse=self.parse) else: print >> sys.stderr, "No model for unmerging" if self.checkStep("MODIFIERS"): if self.model.hasMember("modifier-classifier-model"): xml = self.getWorkFile(xml, [ output + "-unmerging-pred.xml.gz", output + "-edge-pred.xml.gz" ]) xml = self.modifierDetector.classifyToXML(xml, self.model, None, output + "-", parse=self.parse) else: print >> sys.stderr, "No model for modifier detection" if self.checkStep("ST-CONVERT"): xml = self.getWorkFile(xml, [ output + "-modifier-pred.xml.gz", output + "-unmerging-pred.xml.gz", output + "-edge-pred.xml.gz" ]) STFormat.ConvertXML.toSTFormat(xml, output + "-events.tar.gz", outputTag="a2", writeScores=self.stWriteScores) if self.stEvaluator != None: task = self.task if task == None: task = self.getStr(self.edgeDetector.tag + "task", self.model) self.stEvaluator.evaluate(output + "-events.tar.gz", task) self.exitState()
def classify(self, data, model, output, parse=None, task=None, goldData=None, fromStep=None, toStep=None, omitSteps=None, workDir=None): #BINARY_RECALL_MODE = False # TODO: make a parameter xml = None model = self.openModel(model, "r") self.initVariables(classifyData=data, model=model, xml=None, task=task, parse=parse) self.enterState( self.STATE_CLASSIFY, ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep, omitSteps) #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep) self.setWorkDir(workDir) if workDir == None: self.setTempWorkDir() workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-") self.model = self.openModel(self.model, "r") stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model) if self.checkStep("TRIGGERS"): xml = self.triggerDetector.classifyToXML( self.classifyData, self.model, None, workOutputTag, goldData=goldData, parse=self.parse, recallAdjust=float( self.getStr("recallAdjustParameter", self.model))) if self.checkStep("EDGES"): xml = self.getWorkFile(xml, workOutputTag + "trigger-pred.xml.gz") xml = self.edgeDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) assert xml != None if self.parse == None: edgeParse = self.getStr(self.edgeDetector.tag + "parse", self.model) else: edgeParse = self.parse #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) if goldData != None: EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, goldData, edgeParse) else: EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) if self.checkStep("UNMERGING"): if self.model.getStr( "unmerging-classifier-parameter", None ) != None: #self.model.hasMember("unmerging-classifier-model"): #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz") # To avoid running out of memory, always use file on disk xml = self.getWorkFile(None, workOutputTag + "edge-pred.xml.gz") #goldData = None #if type(self.classifyData) in types.StringTypes: # if os.path.exists(self.classifyData.replace("-nodup", "")): # goldData = self.classifyData.replace("-nodup", "") xml = self.unmergingDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) # Evaluate after unmerging if self.parse == None: edgeParse = self.getStr(self.edgeDetector.tag + "parse", self.model) else: edgeParse = self.parse if goldData != None: EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, goldData, edgeParse) else: EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) else: print >> sys.stderr, "No model for unmerging" if self.checkStep("MODIFIERS"): if self.model.hasMember("modifier-classifier-model"): xml = self.getWorkFile(xml, [ workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz" ]) xml = self.modifierDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) else: print >> sys.stderr, "No model for modifier detection" # if self.checkStep("VALIDATE"): # xml = self.getWorkFile(xml, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) # self.structureAnalyzer.load(model) # self.structureAnalyzer.validate(xml) # ETUtils.write(xml, workOutputTag + "validate-pred.xml.gz") if self.checkStep("ST-CONVERT"): if stParams["convert"]: #xml = self.getWorkFile(xml, [workOutputTag + "validate-pred.xml.gz", workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) xml = self.getWorkFile(xml, [ workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz" ]) Utils.STFormat.ConvertXML.toSTFormat( xml, output + "-events.tar.gz", outputTag=stParams["a2Tag"], writeExtra=(stParams["scores"] == True)) if stParams["evaluate"]: #self.stEvaluator != None: task = self.task if task == None: task = self.getStr(self.edgeDetector.tag + "task", self.model) self.stEvaluator.evaluate(output + "-events.tar.gz", task) else: print >> sys.stderr, "No BioNLP shared task format conversion" finalXMLFile = self.getWorkFile(None, [ workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz" ]) if finalXMLFile != None: shutil.copy2(finalXMLFile, output + "-pred.xml.gz") self.deleteTempWorkDir() self.exitState()
#Gazetteer.run(TRAIN_FILE, "gazetteer-train") #GeneralEntityTypeRecognizer.run(TRAIN_FILE, "trigger-train-examples", PARSE_TOK, PARSE_TOK, "style:typed", "trigger-ids") GeneralEntityTypeRecognizer.run(TEST_FILE, "trigger-test-examples", PARSE_TOK, PARSE_TOK, "style:typed", "trigger-ids") Cls.test("trigger-test-examples", "trigger-param-opt/model-c_75000", "trigger-test-classifications") evaluator = Ev.evaluate("trigger-test-examples", "trigger-test-classifications", "trigger-ids.class_names") #evaluator = optimize(Cls, Ev, "trigger-train-examples", "trigger-test-examples",\ # "trigger-ids.class_names", CLASSIFIER_PARAMS, "trigger-param-opt")[0] ExampleUtils.writeToInteractionXML(evaluator.classifications, TEST_FILE, "test-predicted-triggers.xml", "trigger-ids.class_names", PARSE_TOK, PARSE_TOK) # RecallAdjust.run("test-predicted-triggers.xml",1.0,"test-predicted-triggers-adj.xml") # ix.splitMergedElements("test-predicted-triggers-adj.xml", "test-predicted-triggers-adj-split.xml") # ix.recalculateIds("test-predicted-triggers-adj-split.xml", "test-predicted-triggers-adj-split-recids.xml", True) # EvaluateInteractionXML.run(Ev, "test-predicted-triggers-adj-split-recids.xml", GOLD_TEST_FILE, PARSE_TOK, PARSE_TOK) ix.splitMergedElements("test-predicted-triggers.xml", "test-predicted-triggers-split.xml") ix.recalculateIds("test-predicted-triggers-split.xml", "test-predicted-triggers-split-recids.xml", True) EvaluateInteractionXML.run(Ev, "test-predicted-triggers-split-recids.xml", GOLD_TEST_FILE, PARSE_TOK, PARSE_TOK)