def parse(self, path, encoding): self.parseContent_ = self.parseHeader_ self.err = uopen('stderr', encoding, 'w') fd = uopen(path, encoding, 'r') self.currentLine_ = 0 for line in fd: self.currentLine_ += 1 self.parseLine_(line) del self.currentLine_ uclose(fd) uclose(self.err)
def endMain(self): defaultConfigPath = os.path.join(self.latticeArchiveDir, 'default.config') fd = uopen(defaultConfigPath, 'utf-8', 'w') self.writeDefaultConfig(fd) uclose(fd) if self.isLog: print >> sys.stderr, '-->', defaultConfigPath
def countWords(self, which): cnt = counts.countWords(self.readText(which)) cnt.printStat(sys.stdout) fname = os.path.join(self.target_, self.name + '-' + which + '.cov.gz') cnt.reportCoverage(uopen(fname, self.encoding, 'w')) fname = os.path.join(self.target_, self.name + '-' + which + '.counts.gz') cnt.exportText(zopen(fname, 'w'))
def parse(self, pathIn, pathOut, encoding): self.category_ = [] self.processLine_ = self.processHeaderLine_ fd = uopen(pathIn, encoding, 'r') for line in fd: self.processLine_(line) uclose(fd) xml = openXml(pathOut, encoding) self.writeBliss_(xml) closeXml(xml) if self.isLog: print >> sys.stderr, pathIn, '-->', pathOut
def startSegment(self, attr): htkLatticePath = os.path.join( self.recordingDir, attr.get('name', str(self.segmentCounter)) + self.htkLatticeSuffix) self.segmentCounter += 1 if os.path.exists(htkLatticePath): fd = uopen(htkLatticePath, self.htkLatticeEncoding, 'r') self.htkExtractor.extract(fd) uclose(fd) if self.isLog: print >> sys.stderr, htkLatticePath, '-->' else: print >> sys.stderr, 'Warning:', htkLatticePath, 'does not exist'
def main(options, args): if options.external: counts = ExternalCounts(options.external, True) else: counts = None if options.importFiles: for filename in options.importFiles: c = InternalCounts() c.importText(uopen(filename)) if counts is None: counts = c else: counts.addCounts(c) if counts is None: counts = InternalCounts() for filename in args: for line in uopen(filename, options.encoding): words = line.split() for word in words: counts.add(word) counts.printStat(sys.stdout) if options.coverage: if options.vocabulary: vocabulary = set( line.strip() for line in uopen(options.vocabulary, options.encoding)) else: vocabulary = None counts.reportCoverage( uopen(options.coverage, options.encoding, mode='w'), vocabulary) if options.out: counts.exportText(uopen(options.out, mode='w'))
default="iso-8859-1", help="default is 'iso-8859-1'", metavar="ENCODING") optparser.add_option("", "--force", dest="force", action="store_true", default=False, help="force re-creation of files and directories") if len(sys.argv) == 1: optparser.print_help() sys.exit(0) options, args = optparser.parse_args() stderr = uopen('stderr', 'utf-8', 'w') print >> stderr # create bliss corpus stmPath = getNormalizedPath(options.stmPath) if not valid(stmPath): corpusPath = getNormalizedPath(options.corpusPath) if not valid(corpusPath): print >> stderr, 'Error: Need either a valid stm- or bliss-corpus-file; see --help' sys.exit(1) else: print >> stderr, 'Use existing bliss corpus \"' + corpusPath + '\"' else: corpusPath = getNormalizedPath( getValue( options.corpusPath,
def writeText(self, which): fname = os.path.join(self.target_, self.name + '-' + which + '.text.gz') return uopen(fname, self.encoding, 'w')