Python setupLogging示例，asrt.common.LoggingSetup.setupLogging Python示例

示例#1

0

显示文件

文件： run_data_preparation.py 项目： idiap/asrt

    inputFile = args.inputFile[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    #Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage  = bool( args.filter2ndStage )
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    keepNewWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/task_log.txt")

    #Api setup
    api = DataPreparationAPI(inputFile, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setKeepNewWords(keepNewWords)

    if language == 0:
        api.trainClassifier()

示例#2

0

显示文件

文件： run_apply_regex.py 项目： d-unknown-processor/asrt

        count += 1
        if count % 50000 == 0:
            print "Processed %d values" % count

        #Read next line
        l = fd.readline()

    io.closeFile(fd)

    strContent = u"\n".join(linesList)
    io.writeFileContent(outputFile, strContent)

################
# main
#
if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-i", "--input", help="input file", nargs=1, dest="inputFile", required=True)
    parser.add_argument("-o", "--output", help="output file", nargs=1, dest="outputFile", required=True)
    parser.add_argument("-r", "--regex", help="regular expression file", nargs=1, dest="regexFile", required=True)
    
    args = parser.parse_args()

    inputFile = os.path.abspath(args.inputFile[0])
    outputFile = os.path.abspath(args.outputFile[0])
    regexFile = os.path.abspath(args.regexFile[0])

    setupLogging(logging.INFO)

    applyRegexes(inputFile, outputFile, regexFile)

示例#3

0

显示文件

# along with asrt. If not, see <http://opensource.org/licenses/>.

__author__ = "Alexandre Nanchen"
__version__ = "Revision: 1.0 "
__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest, re, string, logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging

setupLogging(logging.INFO, "./output.log")


class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEquals(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    #Tests
    #
    def testNormalizeUtf8(self):

示例#4

0

显示文件

#
if __name__ == "__main__":
    #Setup parser
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-t", "--target", help="target directory containing the data.olist and data.omap", 
                         nargs=1, dest="targetDir", required=True)
    parser.add_argument("-o", "--output", help="output directory", nargs=1, dest="outputDir", required=True)
    parser.add_argument("-r", "--regex", help="regex file", nargs=1, dest="regexFile", required=True)
    parser.add_argument("-f", "--filter", help="filter sentences", dest="filter",action="store_true")
    parser.add_argument("-d", "--debug", help="enable debug output", action="store_true")
    parser.add_argument("-n", "--rmpunctuation", help="remove punctuation", action="store_true")
    parser.add_argument("-p", "--vbpunctuation", help="verbalize punctuation", action="store_true")
    parser.add_argument("-s", "--rawseg", help="do not segment sentences with NLTK", dest="rawseg",action="store_true")
    parser.add_argument("-m", "--lm", help="prepare for lm modeling", dest="lm",action="store_true")

    #Parse arguments
    args = parser.parse_args()
    targetDir = args.targetDir[0]
    outputDir = args.outputDir[0]
    regexFile = args.regexFile[0]

    segmentWithNLTK = "True" if not args.rawseg else "False"

    setupLogging(logging.INFO, outputDir + "/task_log.txt")

    task = ImportDocumentTask(TaskInfo(STRPARAMETERS % (regexFile, str(args.debug), 
                                                        args.rmpunctuation, args.vbpunctuation,
                                                        segmentWithNLTK, args.filter, args.lm), 
                                       outputDir, targetDir))
    task.execute()

示例#5

0

显示文件

文件： run_data_preparation_individual_files.py 项目： idiap/asrt

    inputList = args.inputList[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    #Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage  = bool( args.filter2ndStage )
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    keepNewWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt")

    #Api setup
    api = DataPreparationAPI(None, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setKeepNewWords(keepNewWords)

    if language == 0:
        api.trainClassifier()

示例#6

0

显示文件

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-i",
                        "--input",
                        help="input file",
                        nargs=1,
                        dest="inputFile",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        help="output file",
                        nargs=1,
                        dest="outputFile",
                        required=True)
    parser.add_argument("-r",
                        "--regex",
                        help="regular expression file",
                        nargs=1,
                        dest="regexFile",
                        required=True)

    args = parser.parse_args()

    inputFile = os.path.abspath(args.inputFile[0])
    outputFile = os.path.abspath(args.outputFile[0])
    regexFile = os.path.abspath(args.regexFile[0])

    setupLogging(logging.INFO)

    applyRegexes(inputFile, outputFile, regexFile)

示例#7

0

显示文件

文件： FormulaLMPreparationUnitTest.py 项目： idiap/asrt

# along with asrt. If not, see <http://opensource.org/licenses/>.

__author__ = "Alexandre Nanchen"
__version__ = "Revision: 1.0 "
__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest, re, string, logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging

setupLogging(logging.INFO, "./output.log")

class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEquals(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    #Tests
    #
    def testNormalizeUtf8(self):
        languages = ['0', '1', '2']

示例#8

0

显示文件

__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest
import re
import string
import logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging
from asrt.config.AsrtConfig import TEMPDIRUNITTEST

setupLogging(logging.INFO, TEMPDIRUNITTEST + "/output.log")


class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEqual(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    # Tests
    #
    def testNormalizeUtf8(self):

示例#9

0

显示文件

文件： run_data_preparation_individual_files.py 项目： colincwilson/asrt

    inputList = args.inputList[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    # Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage = bool(args.filter2ndStage)
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    expandNumberInWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt")

    # Api setup
    api = DataPreparationAPI(None, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setExpandNumberInWords(expandNumberInWords)

    if language == 0:
        api.trainClassifier()