def __init__(self, rawTextFileName, intermediateXMLFileName): preprocessor = Preprocessor(rawTextFileName, intermediateXMLFileName) preprocessor.posTaggedText() preprocessor.getParseTree() preprocessor.getMetaMapConcepts() self.intermediate = intermediateXMLFileName self.vectors = []
def main(aRawTextFileName=None, aIntermediateXMLFileName=None, aConfigFile=None): assemblerList = [] if aRawTextFileName is None and aIntermediateXMLFileName is None: sysArgs = sys.argv[1:] if len(sysArgs) >= 3: """when calling ProjectAeris, it should be done with a raw text file and an output xml file location as the first and second arguments respectively""" rawTextFileName = sysArgs[0] intermediateXMLFileName = sysArgs[1] configFileName = sysArgs[2] else: print "Missing some command-line arguments" return else: rawTextFileName = aRawTextFileName intermediateXMLFileName = aIntermediateXMLFileName configFileName = aConfigFile print 'initial preprocess done!' preprocessOne = Preprocessor( rawTextFileName=rawTextFileName, intermediateXMLFileName=intermediateXMLFileName) configFile = configFileName allAssemblerDict = { 'Event Date': EventDateAssembler(rawTextFileName, intermediateXMLFileName), 'Age': AgeAssembler(rawTextFileName, intermediateXMLFileName), 'Dosage': DosageAssembler(rawTextFileName, intermediateXMLFileName), 'Drugname': DrugnameAssembler(rawTextFileName, intermediateXMLFileName), 'Weight': WeightAssembler(rawTextFileName, intermediateXMLFileName), 'Gender': GenderAssembler(rawTextFileName, intermediateXMLFileName), 'Reaction': ReactionAssembler(rawTextFileName, intermediateXMLFileName) } #Place to test new preprocess methods preprocessOne.getMetaMapConcepts() # preprocessOne.posTaggedText() # preprocessOne.getParseTree() # print preprocessOne.rawText() #Place to test new preprocess methods #The following is to actually run the extractors config = json.load(open(configFile)) entities = config.keys() for entity in entities: if entity not in allAssemblerDict: raise KeyError("An entity you entered doesn't exist") else: assemblerList.append((entity, allAssemblerDict[entity])) for name, assembler in assemblerList: if config[name]: assembler.setExtractorList(config[name]) assembler.runExtractors() assembler.writeToSemiFinalXML() assembler.launchTestSuite()