def setupLogging(logLevel, fileName = None, logToStd = True): """Logging for the server module. Default level is INFO. """ #Message queue taskLogger = logging.getLogger("Asrt") taskLogger.setLevel(logLevel) #Rendering engines mediaparlFormatter = MultiLineFormatter("%(lineno)-4d : %(levelname)-10s %(name)-30s %(asctime)-25s %(message)s") if fileName != None: #Check and make directory MyFile.checkDirExists(MyFile(fileName).getFileDir()) fileHandler = logging.handlers.RotatingFileHandler(filename=fileName,maxBytes=1024000, backupCount=5) fileHandler.setLevel(logLevel) fileHandler.setFormatter(mediaparlFormatter) taskLogger.addHandler(fileHandler) if logToStd: streamHandler = logging.StreamHandler(sys.stdout) streamHandler.setLevel(logLevel) streamHandler.setFormatter(mediaparlFormatter) taskLogger.addHandler(streamHandler) return taskLogger
def setupLogging(logLevel, fileName=None, logToStd=True): """Logging for the server module. Default level is INFO. """ #Message queue taskLogger = logging.getLogger("Asrt") taskLogger.setLevel(logLevel) #Rendering engines mediaparlFormatter = MultiLineFormatter( "%(lineno)-4d : %(levelname)-10s %(name)-30s %(asctime)-25s %(message)s" ) if fileName != None: #Check and make directory MyFile.checkDirExists(MyFile(fileName).getFileDir()) fileHandler = logging.handlers.RotatingFileHandler(filename=fileName, maxBytes=1024000, backupCount=5) fileHandler.setLevel(logLevel) fileHandler.setFormatter(mediaparlFormatter) taskLogger.addHandler(fileHandler) if logToStd: streamHandler = logging.StreamHandler(sys.stdout) streamHandler.setLevel(logLevel) streamHandler.setFormatter(mediaparlFormatter) taskLogger.addHandler(streamHandler) return taskLogger
def execute(commandList, logPath, outFileName = None, errFileName = None): """Wrapper to execute a sub process. """ #Make sure the directory exists MyFile.checkDirExists(logPath) stdout, stderr, retCode = None, None, 0 try: #Default to one log p = subprocess.Popen(commandList, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if errFileName is not None: p = subprocess.Popen(commandList, stdout=subprocess.PIPE, stderr=subprocess.PIPE) #Run the subprocess stdout, stderr = p.communicate() retCode = p.poll() except Exception, e: AsrtSubprocess.logger.critical("Subprocess error: %s" % str(e)) errorMessage = str(commandList) + "\n" + \ "------------ Begin stack ------------\n" + \ traceback.format_exc().rstrip() + "\n" + \ "------------ End stack --------------" print errorMessage #Make sure the trace is logged if stderr is None: stderr = errorMessage else: stderr += errorMessage retCode = 1
def _readDataList(self): """Read the only data list from 'target directory'. """ self._log(logging.INFO, "Gather input lists from %s" % self.getTargetDirectory()) #Input list dataListFiles = MyFile.dirContent(self.getTargetDirectory(), "*" + Task.OUTPUTLISTEXTENSION) #Input data or representations if len(dataListFiles) == 0: raise Exception("No data list found in %s!" % self.getTargetDirectory()) elif len(dataListFiles) > 1: raise Exception("One input list max, %d found!" % len(dataListFiles)) self._log(logging.INFO, "Found data list: %s!" % dataListFiles[0]) #Copy from target directory dataListSrcPath = self.getTargetDirectory() + os.sep + dataListFiles[0] dataListDestPath = self.getInputDirectory() + os.sep +\ MyFile.removeExtension(dataListFiles[0])[0] + Task.INPUTLISTEXTENSION MyFile.copyFile(dataListSrcPath, dataListDestPath) #Read content self.inputList = DataList() self.inputList.readFile(dataListDestPath)
def testConvertToText(self): rep = TextRepresentation(TestTextRepresentation.pdfFile, TEMPDIRUNITTEST, LOGDIR) rep.convertToText() MyFile.checkFileExists(TestTextRepresentation.tmpPdfFile) rep = TextRepresentation(TestTextRepresentation.textFile, TEMPDIRUNITTEST, LOGDIR) rep.convertToText() MyFile.checkFileExists(TestTextRepresentation.tmpTextFile)
def execute(commandList, logPath, outFileName=None, errFileName=None): """Wrapper to execute a sub process. """ #Make sure the directory exists MyFile.checkDirExists(logPath) stdout, stderr, retCode = None, None, 0 try: #Default to one log p = subprocess.Popen(commandList, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if errFileName is not None: p = subprocess.Popen(commandList, stdout=subprocess.PIPE, stderr=subprocess.PIPE) #Run the subprocess stdout, stderr = p.communicate() retCode = p.poll() except Exception as e: AsrtSubprocess.logger.critical("Subprocess error: %s" % str(e)) errorMessage = str(commandList) + "\n" + \ "------------ Begin stack ------------\n" + \ traceback.format_exc().rstrip() + "\n" + \ "------------ End stack --------------" print(errorMessage) #Make sure the trace is logged if stderr is None: stderr = errorMessage else: stderr += errorMessage retCode = 1 #Now log results #It is important to be ouside exception management as we #still want to log what happened io = Ioread() if stdout != None and len(stdout) > 0 and outFileName != None: io.writeFileContent("%s/%s" % (logPath, outFileName), str(stdout, 'utf-8')) if stderr != None and len(stderr) > 0 and errFileName != None: io.writeFileContent("%s/%s" % (logPath, errFileName), str(stderr, 'utf-8')) return retCode, stdout, stderr
def loadTextFile(self): """Load converted text file. """ if self.tempFilePath is None or not MyFile.checkFileExists(self.tempFilePath): raise Exception("Temporary text file does not exist!") io = Ioread() self.sentencesList = io.readFileContentList(self.tempFilePath)
def loadTextFile(self): """Load converted text file. """ if self.tempFilePath is None or not MyFile.checkFileExists( self.tempFilePath): raise Exception("Temporary text file does not exist!") io = Ioread() self.sentencesList = io.readFileContentList(self.tempFilePath)
def prepareOutputData(self): """Copy results, old lists and build new input and map lists. """ self._log(logging.INFO, "Copy results files to output folder:%s" % self.getOutputDirectory()) # Data maps dataMapFiles = MyFile.dirContent(self.getTempDirectory(), "*sentences_*.txt") for sentenceFile in dataMapFiles: srcFile = self.getTempDirectory() + os.sep + sentenceFile shutil.copy(srcFile, self.getOutputDirectory())
def prepareOutputData(self): """Copy results, old lists and build new input and map lists. """ self._log(logging.INFO, "Copy results files to output folder:%s" % self.getOutputDirectory()) #Data maps dataMapFiles = MyFile.dirContent(self.getTempDirectory(), "*sentences_*.txt") for sentenceFile in dataMapFiles: srcFile = self.getTempDirectory() + os.sep + sentenceFile shutil.copy(srcFile,self.getOutputDirectory())
def loadDocumentAsSentences(self, tempDir): """Convert to text, remove new lines and segment into sentences using NLTK toolkit. """ #Pdf to text tempFileName = self.convertToText(self.sourceFileName, tempDir, self.logDir) #Segment into sentences using NLTK toolkit self._loadTextDocumentAsSentences(tempFileName) #Delete temporary file MyFile(tempFileName).removeFile(tempFileName)
def gatherInputData(self): """Prepare task directories and load data list and map lists. """ workingDirectory = self.getWorkingDirectory() targetDirectory = self.getTargetDirectory() self.taskDirectory = "%s%s%s" % (workingDirectory, os.sep, self.taskInstanceName) #Don't want to keep old results MyFile.forceRemoveDir(self.taskDirectory) #Make task working directory MyFile.makeDir(self.taskDirectory) #Sub folders MyFile.makeDir(self.getInputDirectory()) MyFile.makeDir(self.getTempDirectory()) MyFile.makeDir(self.getOutputDirectory()) #Read data list and data maps self._copyLists()
def _readMapLists(self): """Read the data maps from 'target directory'. """ #Data maps dataMapFiles = MyFile.dirContent(self.getTargetDirectory(), "*" + Task.OUTPUTMAPEXTENSION) #Map of representations for data if len(dataMapFiles) == 0: raise Exception("No data map found in %s!" % self.getTargetDirectory()) self._log(logging.INFO, "Found %d input map list(s)!" % len(dataMapFiles)) for dataMapFile in dataMapFiles: self._log(logging.INFO, "Found map list: %s!" % dataMapFile) #Copy from target directory dataMapSrcPath = self.getTargetDirectory() + os.sep + dataMapFile dataMapDestPath = self.getInputDirectory() + os.sep +\ MyFile.removeExtension(dataMapFile)[0] + Task.INPUTMAPEXTENSION MyFile.copyFile(dataMapSrcPath, dataMapDestPath) #Read content tempDataMap = DataMap() tempDataMap.readFile(dataMapDestPath) self.mapLists.append(tempDataMap) self._log(logging.INFO, "Lists have been copied to %s" % self.getInputDirectory()) #Debug information dataListFiles = MyFile.dirContent(self.getInputDirectory(), "*") self._log(logging.INFO, "Files in input directory: '%s'." % ", ".join(dataListFiles))
def execute(commandList, logPath, outFileName=None, errFileName=None): """Wrapper to execute a sub process. """ #Make sure the directory exists MyFile.checkDirExists(logPath) stdout, stderr, retCode = None, None, 0 try: #Default to one log p = subprocess.Popen(commandList, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if errFileName is not None: p = subprocess.Popen(commandList, stdout=subprocess.PIPE, stderr=subprocess.PIPE) #Run the subprocess stdout, stderr = p.communicate() retCode = p.poll() except Exception, e: AsrtSubprocess.logger.critical("Subprocess error: %s" % str(e)) errorMessage = str(commandList) + "\n" + \ "------------ Begin stack ------------\n" + \ traceback.format_exc().rstrip() + "\n" + \ "------------ End stack --------------" print errorMessage #Make sure the trace is logged if stderr is None: stderr = errorMessage else: stderr += errorMessage retCode = 1
def testGatherInputData(self): #No data olist task1 = Task( TaskInfo("", TestTask.workingDirectory, TestTask.targetFolderErr1)) with self.assertRaises(Exception): task1.gatherInputData() #No map .omap task1 = Task( TaskInfo("", TestTask.workingDirectory, TestTask.targetFolderErr2)) with self.assertRaises(Exception): task1.gatherInputData() task1 = Task( TaskInfo("", TestTask.workingDirectory, TestTask.targetFolder1)) task1.gatherInputData() self.assertTrue(MyFile.checkFileExists(task1.getTaskDirectory())) self.assertTrue(MyFile.checkFileExists(task1.getInputDirectory())) self.assertTrue(MyFile.checkFileExists(task1.getTempDirectory())) self.assertTrue(MyFile.checkFileExists(task1.getOutputDirectory())) dataListPath = "%s%s%s" % (task1.getInputDirectory(), os.sep, 'data.ilist') dataMap1Path = "%s%s%s" % (task1.getInputDirectory(), os.sep, 'audio.imap') dataMap2Path = "%s%s%s" % (task1.getInputDirectory(), os.sep, 'model.imap') self.assertTrue(MyFile.checkFileExists(dataListPath)) self.assertTrue(MyFile.checkFileExists(dataMap1Path)) self.assertTrue(MyFile.checkFileExists(dataMap2Path)) self.assertEqual(15, task1.inputList.getCount()) self.assertEqual(2, len(task1.mapLists)) for dataMap in task1.mapLists: self.assertTrue(dataMap.getCount() in [2, 1]) task1 = Task( TaskInfo("", TestTask.workingDirectory, TestTask.targetFolderErr)) #Two input lists with self.assertRaises(Exception): task1.gatherInputData()
def testGatherInputData(self): #No data olist task1 = Task(TaskInfo("",TestTask.workingDirectory, TestTask.targetFolderErr1)) with self.assertRaises(Exception): task1.gatherInputData() #No map .omap task1 = Task(TaskInfo("",TestTask.workingDirectory, TestTask.targetFolderErr2)) with self.assertRaises(Exception): task1.gatherInputData() task1 = Task(TaskInfo("",TestTask.workingDirectory, TestTask.targetFolder1)) task1.gatherInputData() self.assertTrue(MyFile.checkFileExists(task1.getTaskDirectory())) self.assertTrue(MyFile.checkFileExists(task1.getInputDirectory())) self.assertTrue(MyFile.checkFileExists(task1.getTempDirectory())) self.assertTrue(MyFile.checkFileExists(task1.getOutputDirectory())) dataListPath = "%s%s%s" % (task1.getInputDirectory(),os.sep,'data.ilist') dataMap1Path = "%s%s%s" % (task1.getInputDirectory(),os.sep,'audio.imap') dataMap2Path = "%s%s%s" % (task1.getInputDirectory(),os.sep,'model.imap') self.assertTrue(MyFile.checkFileExists(dataListPath)) self.assertTrue(MyFile.checkFileExists(dataMap1Path)) self.assertTrue(MyFile.checkFileExists(dataMap2Path)) self.assertEquals(15,task1.inputList.getCount()) self.assertEquals(2,len(task1.mapLists)) for dataMap in task1.mapLists: self.assertTrue(dataMap.getCount() in [2,1]) task1 = Task(TaskInfo("",TestTask.workingDirectory, TestTask.targetFolderErr)) #Two input lists with self.assertRaises(Exception): task1.gatherInputData()
def getTempFilePath(self): """Temporary version of the source file. """ return self.tempDir + os.sep + MyFile( self.sourceFileName).getCurrentFileName() + ".tmp"
setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt") # Api setup api = DataPreparationAPI(None, outputDir) api.setRegexFile(regexFile) api.setFilterSentences(filterSentences) api.setFilterSentences2ndStage(filterSentences2ndStage) api.setLMModeling(lmModeling) api.setRemovePunctuation(removePunctuation) api.setVerbalizePunctuation(verbalizePunctuation) api.setSegmentWithNLTK(not rawSeg) api.setExpandNumberInWords(expandNumberInWords) if language == 0: api.trainClassifier() # Main processing MyFile.checkDirExists(outputDir) io = Ioread() inputList = io.readFileContentList(inputList) for i, f in enumerate(inputList): api.setInputFile(f) api.prepareDocument(language) strUnformatted = api.getCleanedText() outputFile = "%s/%s.lab" % (outputDir, os.path.splitext(os.path.basename(f))[0]) io.writeFileContent(outputFile, strUnformatted + "\n")
setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt") #Api setup api = DataPreparationAPI(None, outputDir) api.setRegexFile(regexFile) api.setFilterSentences(filterSentences) api.setFilterSentences2ndStage(filterSentences2ndStage) api.setLMModeling(lmModeling) api.setRemovePunctuation(removePunctuation) api.setVerbalizePunctuation(verbalizePunctuation) api.setSegmentWithNLTK(not rawSeg) api.setKeepNewWords(keepNewWords) if language == 0: api.trainClassifier() #Main processing MyFile.checkDirExists(outputDir) io = Ioread() inputList = io.readFileContentList(inputList) for i, f in enumerate(inputList): api.setInputFile(f) api.prepareDocument(language) strUnformatted = api.getCleanedText() outputFile = "%s/%s.lab" % (outputDir, os.path.splitext(os.path.basename(f))[0]) io.writeFileContent(outputFile, strUnformatted + u"\n")
commonTestSuite = CommonTestSuite.getCommonTestSuite(unitTestList) frenchTestSuite = FrenchTestSuite.getFrenchTestSuite(unitTestList) germanTestSuite = GermanTestSuite.getGermanTestSuite(unitTestList) formulaTestSuite = FormulaTestSuite.getFormulaTestSuite(unitTestList) allTestSuite = [] if commonTestSuite is not None: allTestSuite.extend(commonTestSuite) if formulaTestSuite is not None: allTestSuite.extend(formulaTestSuite) if frenchTestSuite is not None: allTestSuite.extend(frenchTestSuite) if germanTestSuite is not None: allTestSuite.extend(germanTestSuite) allTests = unittest.TestSuite(allTestSuite) return allTests if __name__ == "__main__": if len(sys.argv) < 2: print getUsage() print " usage: %s 'unit test name 1 or all' 'unit test name 2' " % sys.argv[0] print "" sys.exit(0) MyFile.checkDirExists(TEMPDIRUNITTEST) runner = unittest.TextTestRunner(verbosity = 2) runner.run(asrtTestSuite(sys.argv[1:]))
allTestSuite = [] if commonTestSuite is not None: allTestSuite.extend(commonTestSuite) if formulaTestSuite is not None: allTestSuite.extend(formulaTestSuite) if frenchTestSuite is not None: allTestSuite.extend(frenchTestSuite) if germanTestSuite is not None: allTestSuite.extend(germanTestSuite) if englishTestSuite is not None: allTestSuite.extend(englishTestSuite) allTests = unittest.TestSuite(allTestSuite) return allTests if __name__ == "__main__": if len(sys.argv) < 2: print((getUsage())) print((" usage: %s 'unit test name 1 or all' 'unit test name 2' " % sys.argv[0])) print("") sys.exit(0) MyFile.checkDirExists(TEMPDIRUNITTEST) runner = unittest.TextTestRunner(verbosity=2) runner.run(asrtTestSuite(sys.argv[1:]))