def __init__(self, rawTextFileName, intermediateXMLFileName):
     preprocessor = Preprocessor(rawTextFileName, intermediateXMLFileName)
     preprocessor.posTaggedText()
     preprocessor.getParseTree()
     preprocessor.getMetaMapConcepts()
     self.intermediate = intermediateXMLFileName
     self.vectors = []
示例#2
0
def main(aRawTextFileName=None,
         aIntermediateXMLFileName=None,
         aConfigFile=None):
    assemblerList = []
    if aRawTextFileName is None and aIntermediateXMLFileName is None:

        sysArgs = sys.argv[1:]
        if len(sysArgs) >= 3:
            """when calling ProjectAeris, it should be done with a raw text file and an output xml file location as the first and second arguments respectively"""

            rawTextFileName = sysArgs[0]
            intermediateXMLFileName = sysArgs[1]
            configFileName = sysArgs[2]

        else:
            print "Missing some command-line arguments"
            return

    else:
        rawTextFileName = aRawTextFileName
        intermediateXMLFileName = aIntermediateXMLFileName
        configFileName = aConfigFile

    print 'initial preprocess done!'

    preprocessOne = Preprocessor(
        rawTextFileName=rawTextFileName,
        intermediateXMLFileName=intermediateXMLFileName)
    configFile = configFileName

    allAssemblerDict = {
        'Event Date': EventDateAssembler(rawTextFileName,
                                         intermediateXMLFileName),
        'Age': AgeAssembler(rawTextFileName, intermediateXMLFileName),
        'Dosage': DosageAssembler(rawTextFileName, intermediateXMLFileName),
        'Drugname': DrugnameAssembler(rawTextFileName,
                                      intermediateXMLFileName),
        'Weight': WeightAssembler(rawTextFileName, intermediateXMLFileName),
        'Gender': GenderAssembler(rawTextFileName, intermediateXMLFileName),
        'Reaction': ReactionAssembler(rawTextFileName, intermediateXMLFileName)
    }

    #Place to test new preprocess methods
    preprocessOne.getMetaMapConcepts()
    #    preprocessOne.posTaggedText()
    #    preprocessOne.getParseTree()
    #    print preprocessOne.rawText()
    #Place to test new preprocess methods

    #The following is to actually run the extractors

    config = json.load(open(configFile))
    entities = config.keys()

    for entity in entities:
        if entity not in allAssemblerDict:
            raise KeyError("An entity you entered doesn't exist")
        else:
            assemblerList.append((entity, allAssemblerDict[entity]))

    for name, assembler in assemblerList:
        if config[name]:
            assembler.setExtractorList(config[name])
            assembler.runExtractors()
            assembler.writeToSemiFinalXML()
            assembler.launchTestSuite()