def parse(self, path, encoding):
	self.parseContent_ = self.parseHeader_
	self.err = uopen('stderr', encoding, 'w')
	fd = uopen(path, encoding, 'r')
	self.currentLine_ = 0
	for line in fd:
	    self.currentLine_ += 1
	    self.parseLine_(line)
	del self.currentLine_
	uclose(fd)
	uclose(self.err)
示例#2
0
    def endMain(self):
	defaultConfigPath = os.path.join(self.latticeArchiveDir, 'default.config')
	fd = uopen(defaultConfigPath, 'utf-8', 'w')
	self.writeDefaultConfig(fd)
	uclose(fd)
	if self.isLog:
	    print >> sys.stderr, '-->', defaultConfigPath
示例#3
0
 def countWords(self, which):
     cnt = counts.countWords(self.readText(which))
     cnt.printStat(sys.stdout)
     fname = os.path.join(self.target_, self.name + '-' + which + '.cov.gz')
     cnt.reportCoverage(uopen(fname, self.encoding, 'w'))
     fname = os.path.join(self.target_,
                          self.name + '-' + which + '.counts.gz')
     cnt.exportText(zopen(fname, 'w'))
示例#4
0
    def parse(self, pathIn, pathOut, encoding):
	self.category_ = []
	self.processLine_ = self.processHeaderLine_
	fd = uopen(pathIn, encoding, 'r')
	for line in fd:
	    self.processLine_(line)
	uclose(fd)
	xml = openXml(pathOut, encoding)
	self.writeBliss_(xml)
	closeXml(xml)
	if self.isLog:
	    print >> sys.stderr, pathIn, '-->', pathOut
 def startSegment(self, attr):
     htkLatticePath = os.path.join(
         self.recordingDir,
         attr.get('name', str(self.segmentCounter)) + self.htkLatticeSuffix)
     self.segmentCounter += 1
     if os.path.exists(htkLatticePath):
         fd = uopen(htkLatticePath, self.htkLatticeEncoding, 'r')
         self.htkExtractor.extract(fd)
         uclose(fd)
         if self.isLog:
             print >> sys.stderr, htkLatticePath, '-->'
     else:
         print >> sys.stderr, 'Warning:', htkLatticePath, 'does not exist'
示例#6
0
def main(options, args):
    if options.external:
	counts = ExternalCounts(options.external, True)
    else:
	counts = None

    if options.importFiles:
	for filename in options.importFiles:
	    c = InternalCounts()
	    c.importText(uopen(filename))
	    if counts is None:
		counts = c
	    else:
		counts.addCounts(c)

    if counts is None:
	counts = InternalCounts()

    for filename in args:
	for line in uopen(filename, options.encoding):
	    words = line.split()
	    for word in words:
		counts.add(word)

    counts.printStat(sys.stdout)

    if options.coverage:
	if options.vocabulary:
	    vocabulary = set(
		line.strip()
		for line in uopen(options.vocabulary, options.encoding))
	else:
	    vocabulary = None
	counts.reportCoverage(
	    uopen(options.coverage, options.encoding, mode='w'),
	    vocabulary)
    if options.out:
	counts.exportText(uopen(options.out, mode='w'))
示例#7
0
                         default="iso-8859-1",
                         help="default is 'iso-8859-1'",
                         metavar="ENCODING")
    optparser.add_option("",
                         "--force",
                         dest="force",
                         action="store_true",
                         default=False,
                         help="force re-creation of files and directories")

    if len(sys.argv) == 1:
        optparser.print_help()
        sys.exit(0)
    options, args = optparser.parse_args()

    stderr = uopen('stderr', 'utf-8', 'w')
    print >> stderr

    # create bliss corpus
    stmPath = getNormalizedPath(options.stmPath)
    if not valid(stmPath):
        corpusPath = getNormalizedPath(options.corpusPath)
        if not valid(corpusPath):
            print >> stderr, 'Error: Need either a valid stm- or bliss-corpus-file; see --help'
            sys.exit(1)
        else:
            print >> stderr, 'Use existing bliss corpus \"' + corpusPath + '\"'
    else:
        corpusPath = getNormalizedPath(
            getValue(
                options.corpusPath,
示例#8
0
 def writeText(self, which):
     fname = os.path.join(self.target_,
                          self.name + '-' + which + '.text.gz')
     return uopen(fname, self.encoding, 'w')