import optparse import sys parser = optparse.OptionParser() parser.add_option("-s", "--swift", dest="swift", action="store_true", default=False, help="read data from a swift container") parser.add_option("-o", "--outdir", dest="outdir", action="store", default='index', help="output directory for index files") (options, args) = parser.parse_args() if not os.path.exists(options.outdir): os.makedirs(options.outdir) if options.swift: container = SwiftTextContainer() for doc in container.documents(): print doc base, ext = os.path.splitext(doc['name']) out = os.path.join(options.outdir, base + ".idx") TroveSwiftIndexBuilder(doc['name'], out=out) else: for doc in args: print doc base, ext = os.path.splitext(os.path.basename(doc)) out = os.path.join(options.outdir, base + ".idx") TroveIndexBuilder(doc, out=out)
count[year] += 1 else: count[year] = 0 if year in wordcount: wordcount[year] += wc else: wordcount[year] = wc n += 1 if n % INTERVAL == 0: write(count, wordcount, outfile) sys.stdout.write("%s|" % n) sys.stdout.flush() except: pass if __name__=='__main__': config = readconfig() INTERVAL = int(config.get('default', 'WC_INTERVAL')) processes = int(config.get('default', 'PROCESSES')) sw = SwiftTextContainer() pool = Pool(processes) pool.map(countwords, sw.documents())