示例#1
0
def setupLogging(logLevel, fileName = None, logToStd = True):
    """Logging for the server module. Default
       level is INFO.
    """
    #Message queue
    taskLogger = logging.getLogger("Asrt")

    taskLogger.setLevel(logLevel)
       
    #Rendering engines
    mediaparlFormatter = MultiLineFormatter("%(lineno)-4d : %(levelname)-10s %(name)-30s %(asctime)-25s %(message)s")    
    
    if fileName != None:
        #Check and make directory
        MyFile.checkDirExists(MyFile(fileName).getFileDir())
            
        fileHandler = logging.handlers.RotatingFileHandler(filename=fileName,maxBytes=1024000, backupCount=5)
        fileHandler.setLevel(logLevel)
        fileHandler.setFormatter(mediaparlFormatter)

        taskLogger.addHandler(fileHandler)
        
    if logToStd:        
        streamHandler = logging.StreamHandler(sys.stdout)
        streamHandler.setLevel(logLevel)
        streamHandler.setFormatter(mediaparlFormatter)
        taskLogger.addHandler(streamHandler)
        
    return taskLogger
示例#2
0
def setupLogging(logLevel, fileName=None, logToStd=True):
    """Logging for the server module. Default
       level is INFO.
    """
    #Message queue
    taskLogger = logging.getLogger("Asrt")

    taskLogger.setLevel(logLevel)

    #Rendering engines
    mediaparlFormatter = MultiLineFormatter(
        "%(lineno)-4d : %(levelname)-10s %(name)-30s %(asctime)-25s %(message)s"
    )

    if fileName != None:
        #Check and make directory
        MyFile.checkDirExists(MyFile(fileName).getFileDir())

        fileHandler = logging.handlers.RotatingFileHandler(filename=fileName,
                                                           maxBytes=1024000,
                                                           backupCount=5)
        fileHandler.setLevel(logLevel)
        fileHandler.setFormatter(mediaparlFormatter)

        taskLogger.addHandler(fileHandler)

    if logToStd:
        streamHandler = logging.StreamHandler(sys.stdout)
        streamHandler.setLevel(logLevel)
        streamHandler.setFormatter(mediaparlFormatter)
        taskLogger.addHandler(streamHandler)

    return taskLogger
    def execute(commandList, logPath, outFileName = None, errFileName = None):
        """Wrapper to execute a sub process.
        """
        #Make sure the directory exists
        MyFile.checkDirExists(logPath)

        stdout, stderr, retCode = None, None, 0

        try:
            #Default to one log
            p = subprocess.Popen(commandList, stdout=subprocess.PIPE, 
                                 stderr=subprocess.STDOUT)

            if errFileName is not None:
                p = subprocess.Popen(commandList, stdout=subprocess.PIPE, 
                                 stderr=subprocess.PIPE)
                
            #Run the subprocess
            stdout, stderr = p.communicate()
            retCode = p.poll()
        except Exception, e:
            AsrtSubprocess.logger.critical("Subprocess error: %s" % str(e))
            errorMessage = str(commandList) + "\n" + \
                           "------------ Begin stack ------------\n" + \
                           traceback.format_exc().rstrip() + "\n" + \
                           "------------ End stack --------------"
            print errorMessage
            
            #Make sure the trace is logged
            if stderr is None: 
                stderr = errorMessage
            else:
                stderr += errorMessage
            
            retCode = 1
示例#4
0
    def _readDataList(self):
        """Read the only data list from 'target directory'.
        """
        self._log(logging.INFO,
                  "Gather input lists from %s" % self.getTargetDirectory())

        #Input list
        dataListFiles = MyFile.dirContent(self.getTargetDirectory(),
                                          "*" + Task.OUTPUTLISTEXTENSION)

        #Input data or representations
        if len(dataListFiles) == 0:
            raise Exception("No data list found in %s!" %
                            self.getTargetDirectory())
        elif len(dataListFiles) > 1:
            raise Exception("One input list max, %d found!" %
                            len(dataListFiles))

        self._log(logging.INFO, "Found data list: %s!" % dataListFiles[0])

        #Copy from target directory
        dataListSrcPath = self.getTargetDirectory() + os.sep + dataListFiles[0]
        dataListDestPath = self.getInputDirectory() + os.sep +\
                           MyFile.removeExtension(dataListFiles[0])[0] + Task.INPUTLISTEXTENSION

        MyFile.copyFile(dataListSrcPath, dataListDestPath)

        #Read content
        self.inputList = DataList()
        self.inputList.readFile(dataListDestPath)
	def testConvertToText(self):
		rep = TextRepresentation(TestTextRepresentation.pdfFile,
			                     TEMPDIRUNITTEST, LOGDIR)

		rep.convertToText()
		MyFile.checkFileExists(TestTextRepresentation.tmpPdfFile)

		rep = TextRepresentation(TestTextRepresentation.textFile,
			                     TEMPDIRUNITTEST, LOGDIR)

		rep.convertToText()
		MyFile.checkFileExists(TestTextRepresentation.tmpTextFile)		
    def testConvertToText(self):
        rep = TextRepresentation(TestTextRepresentation.pdfFile,
                                 TEMPDIRUNITTEST, LOGDIR)

        rep.convertToText()
        MyFile.checkFileExists(TestTextRepresentation.tmpPdfFile)

        rep = TextRepresentation(TestTextRepresentation.textFile,
                                 TEMPDIRUNITTEST, LOGDIR)

        rep.convertToText()
        MyFile.checkFileExists(TestTextRepresentation.tmpTextFile)
示例#7
0
    def execute(commandList, logPath, outFileName=None, errFileName=None):
        """Wrapper to execute a sub process.
        """
        #Make sure the directory exists
        MyFile.checkDirExists(logPath)

        stdout, stderr, retCode = None, None, 0

        try:
            #Default to one log
            p = subprocess.Popen(commandList,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT)

            if errFileName is not None:
                p = subprocess.Popen(commandList,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)

            #Run the subprocess
            stdout, stderr = p.communicate()
            retCode = p.poll()
        except Exception as e:
            AsrtSubprocess.logger.critical("Subprocess error: %s" % str(e))
            errorMessage = str(commandList) + "\n" + \
                           "------------ Begin stack ------------\n" + \
                           traceback.format_exc().rstrip() + "\n" + \
                           "------------ End stack --------------"
            print(errorMessage)

            #Make sure the trace is logged
            if stderr is None:
                stderr = errorMessage
            else:
                stderr += errorMessage

            retCode = 1

        #Now log results
        #It is important to be ouside exception management as we
        #still want to log what happened
        io = Ioread()

        if stdout != None and len(stdout) > 0 and outFileName != None:
            io.writeFileContent("%s/%s" % (logPath, outFileName),
                                str(stdout, 'utf-8'))

        if stderr != None and len(stderr) > 0 and errFileName != None:
            io.writeFileContent("%s/%s" % (logPath, errFileName),
                                str(stderr, 'utf-8'))

        return retCode, stdout, stderr
    def loadTextFile(self):
        """Load converted text file.
        """
        if self.tempFilePath is None or not MyFile.checkFileExists(self.tempFilePath):
            raise Exception("Temporary text file does not exist!")

        io = Ioread()
        self.sentencesList = io.readFileContentList(self.tempFilePath)
示例#9
0
    def loadTextFile(self):
        """Load converted text file.
        """
        if self.tempFilePath is None or not MyFile.checkFileExists(
                self.tempFilePath):
            raise Exception("Temporary text file does not exist!")

        io = Ioread()
        self.sentencesList = io.readFileContentList(self.tempFilePath)
示例#10
0
    def prepareOutputData(self):
        """Copy results, old lists and build new input
           and map lists.
        """
        self._log(logging.INFO, "Copy results files to output folder:%s" % self.getOutputDirectory())

        # Data maps
        dataMapFiles = MyFile.dirContent(self.getTempDirectory(), "*sentences_*.txt")
        for sentenceFile in dataMapFiles:
            srcFile = self.getTempDirectory() + os.sep + sentenceFile
            shutil.copy(srcFile, self.getOutputDirectory())
示例#11
0
    def prepareOutputData(self):
        """Copy results, old lists and build new input
           and map lists.
        """
        self._log(logging.INFO, "Copy results files to output folder:%s" %
                        self.getOutputDirectory())

        #Data maps
        dataMapFiles = MyFile.dirContent(self.getTempDirectory(),
                                         "*sentences_*.txt")
        for sentenceFile in dataMapFiles:
            srcFile = self.getTempDirectory() + os.sep + sentenceFile
            shutil.copy(srcFile,self.getOutputDirectory())
示例#12
0
    def loadDocumentAsSentences(self, tempDir):
        """Convert to text, remove new lines and
           segment into sentences using NLTK
           toolkit.
        """
        #Pdf to text
        tempFileName = self.convertToText(self.sourceFileName, tempDir, self.logDir)

        #Segment into sentences using NLTK toolkit
        self._loadTextDocumentAsSentences(tempFileName)

        #Delete temporary file
        MyFile(tempFileName).removeFile(tempFileName)
示例#13
0
    def gatherInputData(self):
        """Prepare task directories and load data list
           and map lists.
        """
        workingDirectory = self.getWorkingDirectory()
        targetDirectory = self.getTargetDirectory()

        self.taskDirectory = "%s%s%s" % (workingDirectory, os.sep,
                                         self.taskInstanceName)

        #Don't want to keep old results
        MyFile.forceRemoveDir(self.taskDirectory)

        #Make task working directory
        MyFile.makeDir(self.taskDirectory)

        #Sub folders
        MyFile.makeDir(self.getInputDirectory())
        MyFile.makeDir(self.getTempDirectory())
        MyFile.makeDir(self.getOutputDirectory())

        #Read data list and data maps
        self._copyLists()
示例#14
0
    def _readMapLists(self):
        """Read the data maps from 'target directory'.
        """
        #Data maps
        dataMapFiles = MyFile.dirContent(self.getTargetDirectory(),
                                         "*" + Task.OUTPUTMAPEXTENSION)

        #Map of representations for data
        if len(dataMapFiles) == 0:
            raise Exception("No data map found in %s!" %
                            self.getTargetDirectory())

        self._log(logging.INFO,
                  "Found %d input map list(s)!" % len(dataMapFiles))

        for dataMapFile in dataMapFiles:
            self._log(logging.INFO, "Found map list: %s!" % dataMapFile)

            #Copy from target directory
            dataMapSrcPath = self.getTargetDirectory() + os.sep + dataMapFile
            dataMapDestPath = self.getInputDirectory() + os.sep +\
                              MyFile.removeExtension(dataMapFile)[0] + Task.INPUTMAPEXTENSION

            MyFile.copyFile(dataMapSrcPath, dataMapDestPath)

            #Read content
            tempDataMap = DataMap()
            tempDataMap.readFile(dataMapDestPath)
            self.mapLists.append(tempDataMap)

        self._log(logging.INFO,
                  "Lists have been copied to %s" % self.getInputDirectory())

        #Debug information
        dataListFiles = MyFile.dirContent(self.getInputDirectory(), "*")
        self._log(logging.INFO,
                  "Files in input directory: '%s'." % ", ".join(dataListFiles))
示例#15
0
    def execute(commandList, logPath, outFileName=None, errFileName=None):
        """Wrapper to execute a sub process.
        """
        #Make sure the directory exists
        MyFile.checkDirExists(logPath)

        stdout, stderr, retCode = None, None, 0

        try:
            #Default to one log
            p = subprocess.Popen(commandList,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT)

            if errFileName is not None:
                p = subprocess.Popen(commandList,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)

            #Run the subprocess
            stdout, stderr = p.communicate()
            retCode = p.poll()
        except Exception, e:
            AsrtSubprocess.logger.critical("Subprocess error: %s" % str(e))
            errorMessage = str(commandList) + "\n" + \
                           "------------ Begin stack ------------\n" + \
                           traceback.format_exc().rstrip() + "\n" + \
                           "------------ End stack --------------"
            print errorMessage

            #Make sure the trace is logged
            if stderr is None:
                stderr = errorMessage
            else:
                stderr += errorMessage

            retCode = 1
示例#16
0
    def testGatherInputData(self):
        #No data olist
        task1 = Task(
            TaskInfo("", TestTask.workingDirectory, TestTask.targetFolderErr1))

        with self.assertRaises(Exception):
            task1.gatherInputData()

        #No map .omap
        task1 = Task(
            TaskInfo("", TestTask.workingDirectory, TestTask.targetFolderErr2))

        with self.assertRaises(Exception):
            task1.gatherInputData()

        task1 = Task(
            TaskInfo("", TestTask.workingDirectory, TestTask.targetFolder1))

        task1.gatherInputData()

        self.assertTrue(MyFile.checkFileExists(task1.getTaskDirectory()))
        self.assertTrue(MyFile.checkFileExists(task1.getInputDirectory()))
        self.assertTrue(MyFile.checkFileExists(task1.getTempDirectory()))
        self.assertTrue(MyFile.checkFileExists(task1.getOutputDirectory()))

        dataListPath = "%s%s%s" % (task1.getInputDirectory(), os.sep,
                                   'data.ilist')
        dataMap1Path = "%s%s%s" % (task1.getInputDirectory(), os.sep,
                                   'audio.imap')
        dataMap2Path = "%s%s%s" % (task1.getInputDirectory(), os.sep,
                                   'model.imap')

        self.assertTrue(MyFile.checkFileExists(dataListPath))
        self.assertTrue(MyFile.checkFileExists(dataMap1Path))
        self.assertTrue(MyFile.checkFileExists(dataMap2Path))

        self.assertEqual(15, task1.inputList.getCount())
        self.assertEqual(2, len(task1.mapLists))

        for dataMap in task1.mapLists:
            self.assertTrue(dataMap.getCount() in [2, 1])

        task1 = Task(
            TaskInfo("", TestTask.workingDirectory, TestTask.targetFolderErr))

        #Two input lists
        with self.assertRaises(Exception):
            task1.gatherInputData()
示例#17
0
	def testGatherInputData(self):
		#No data olist
		task1 = Task(TaskInfo("",TestTask.workingDirectory,
			                     TestTask.targetFolderErr1))

		with self.assertRaises(Exception):
			task1.gatherInputData()

		#No map .omap
		task1 = Task(TaskInfo("",TestTask.workingDirectory,
			                     TestTask.targetFolderErr2))

		with self.assertRaises(Exception):
			task1.gatherInputData()

		
		task1 = Task(TaskInfo("",TestTask.workingDirectory,
			                     TestTask.targetFolder1))

		task1.gatherInputData()

		self.assertTrue(MyFile.checkFileExists(task1.getTaskDirectory()))
		self.assertTrue(MyFile.checkFileExists(task1.getInputDirectory()))
		self.assertTrue(MyFile.checkFileExists(task1.getTempDirectory()))
		self.assertTrue(MyFile.checkFileExists(task1.getOutputDirectory()))

		dataListPath = "%s%s%s" % (task1.getInputDirectory(),os.sep,'data.ilist')
		dataMap1Path = "%s%s%s" % (task1.getInputDirectory(),os.sep,'audio.imap')
		dataMap2Path = "%s%s%s" % (task1.getInputDirectory(),os.sep,'model.imap')

		self.assertTrue(MyFile.checkFileExists(dataListPath))
		self.assertTrue(MyFile.checkFileExists(dataMap1Path))
		self.assertTrue(MyFile.checkFileExists(dataMap2Path))

		self.assertEquals(15,task1.inputList.getCount())
		self.assertEquals(2,len(task1.mapLists))

		for dataMap in task1.mapLists:
			self.assertTrue(dataMap.getCount() in [2,1])

		task1 = Task(TaskInfo("",TestTask.workingDirectory,
			                     TestTask.targetFolderErr))

		#Two input lists
		with self.assertRaises(Exception):
			task1.gatherInputData()
示例#18
0
 def getTempFilePath(self):
     """Temporary version of the source file.
     """
     return self.tempDir + os.sep + MyFile(
         self.sourceFileName).getCurrentFileName() + ".tmp"
    setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt")

    # Api setup
    api = DataPreparationAPI(None, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setExpandNumberInWords(expandNumberInWords)

    if language == 0:
        api.trainClassifier()

    # Main processing
    MyFile.checkDirExists(outputDir)

    io = Ioread()
    inputList = io.readFileContentList(inputList)

    for i, f in enumerate(inputList):
        api.setInputFile(f)
        api.prepareDocument(language)
        strUnformatted = api.getCleanedText()

        outputFile = "%s/%s.lab" % (outputDir,
                                    os.path.splitext(os.path.basename(f))[0])
        io.writeFileContent(outputFile, strUnformatted + "\n")
    setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt")

    #Api setup
    api = DataPreparationAPI(None, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setKeepNewWords(keepNewWords)

    if language == 0:
        api.trainClassifier()

    #Main processing
    MyFile.checkDirExists(outputDir)

    io = Ioread()
    inputList = io.readFileContentList(inputList)

    for i, f in enumerate(inputList):
        api.setInputFile(f)
        api.prepareDocument(language)
        strUnformatted = api.getCleanedText()

        outputFile = "%s/%s.lab" % (outputDir, os.path.splitext(os.path.basename(f))[0])
        io.writeFileContent(outputFile, strUnformatted + u"\n")
示例#21
0
    commonTestSuite = CommonTestSuite.getCommonTestSuite(unitTestList)
    frenchTestSuite = FrenchTestSuite.getFrenchTestSuite(unitTestList)
    germanTestSuite = GermanTestSuite.getGermanTestSuite(unitTestList)
    formulaTestSuite = FormulaTestSuite.getFormulaTestSuite(unitTestList)

    allTestSuite = []
    if commonTestSuite is not None:
        allTestSuite.extend(commonTestSuite)
    if formulaTestSuite is not None:
        allTestSuite.extend(formulaTestSuite)
    if frenchTestSuite is not None:
        allTestSuite.extend(frenchTestSuite)
    if germanTestSuite is not None:
        allTestSuite.extend(germanTestSuite)

    allTests = unittest.TestSuite(allTestSuite)

    return allTests

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print getUsage()
        print "    usage: %s 'unit test name 1 or all' 'unit test name 2' " % sys.argv[0]
        print ""
        sys.exit(0)

    MyFile.checkDirExists(TEMPDIRUNITTEST)

    runner = unittest.TextTestRunner(verbosity = 2)
    runner.run(asrtTestSuite(sys.argv[1:]))
示例#22
0
    allTestSuite = []
    if commonTestSuite is not None:
        allTestSuite.extend(commonTestSuite)
    if formulaTestSuite is not None:
        allTestSuite.extend(formulaTestSuite)
    if frenchTestSuite is not None:
        allTestSuite.extend(frenchTestSuite)
    if germanTestSuite is not None:
        allTestSuite.extend(germanTestSuite)
    if englishTestSuite is not None:
        allTestSuite.extend(englishTestSuite)

    allTests = unittest.TestSuite(allTestSuite)

    return allTests


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print((getUsage()))
        print(("    usage: %s 'unit test name 1 or all' 'unit test name 2' " %
               sys.argv[0]))
        print("")
        sys.exit(0)

    MyFile.checkDirExists(TEMPDIRUNITTEST)

    runner = unittest.TextTestRunner(verbosity=2)
    runner.run(asrtTestSuite(sys.argv[1:]))