def __init__(self): ToolChain.__init__(self) # Steps self.addStep("CONVERT", self.convert, {"dataSetNames":None, "corpusName":None} , "documents.xml") self.addStep("SPLIT-SENTENCES", Tools.GeniaSentenceSplitter.makeSentences, {"debug":False, "postProcess":True}, "sentences.xml") self.addStep("NER", Tools.BANNER.run, {"elementName":"entity", "processElement":"sentence", "debug":False, "splitNewlines":True}, "ner.xml") self.addStep("PARSE", Tools.CharniakJohnsonParser.parse, {"parseName":"McCC", "requireEntities":False, "debug":False}, "parse.xml") self.addStep("CONVERT-PARSE", Tools.StanfordParser.convertXML, {"parser":"McCC", "debug":False}, "converted-parse.xml") self.addStep("SPLIT-NAMES", ProteinNameSplitter.mainFunc, {"parseName":"McCC"}, "split-names.xml") self.addStep("FIND-HEADS", FindHeads.findHeads, {"parse":"McCC", "removeExisting":True}, "heads.xml") self.addStep("DIVIDE-SETS", self.divideSets, {"outputStem":None, "saveCombined":True})
def __init__(self, steps, parseName="McCC", requireEntities=False): #if constParser == "None": constParser = None #if depParser == "None": depParser = None #assert constParser in ("BLLIP", "BLLIP-BIO", "STANFORD", None), constParser #assert depParser in ("STANFORD", "STANFORD-CONVERT", "SYNTAXNET", None), depParser #self.constParser = constParser #self.depParser = depParser self.requireEntities = requireEntities self.parseName = parseName ToolChain.__init__(self, steps) self.modelParameterStringName = "preprocessorParams"
def __init__(self): ToolChain.__init__(self) # Steps self.addStep("CONVERT", self.convert, { "dataSetNames": None, "corpusName": None }, "documents.xml") self.addStep("SPLIT-SENTENCES", Tools.GeniaSentenceSplitter.makeSentences, { "debug": False, "postProcess": True }, "sentences.xml") self.addStep( "NER", Tools.BANNER.run, { "elementName": "entity", "processElement": "sentence", "debug": False, "splitNewlines": True }, "ner.xml") self.addStep("PARSE", Tools.CharniakJohnsonParser.parse, { "parseName": "McCC", "requireEntities": False, "debug": False }, "parse.xml") self.addStep("CONVERT-PARSE", Tools.StanfordParser.convertXML, { "parser": "McCC", "debug": False }, "converted-parse.xml") self.addStep("SPLIT-NAMES", ProteinNameSplitter.mainFunc, {"parseName": "McCC"}, "split-names.xml") self.addStep("FIND-HEADS", FindHeads.findHeads, { "parse": "McCC", "removeExisting": True }, "heads.xml") self.addStep("DIVIDE-SETS", self.divideSets, { "outputStem": None, "saveCombined": True })
def __init__(self): ToolChain.__init__(self) self.modelParameterStringName = "preprocessorParams"