Пример #1
0
    import optparse
    import sys

    parser = optparse.OptionParser()
    parser.add_option("-s", "--swift", dest="swift", action="store_true", default=False,
                      help="read data from a swift container")
    parser.add_option("-o", "--outdir", dest="outdir", action="store", default='index',
                      help="output directory for index files")

    (options, args) = parser.parse_args()


    if not os.path.exists(options.outdir):
        os.makedirs(options.outdir)

    if options.swift:
        container = SwiftTextContainer()

        for doc in container.documents():
            print doc
            base, ext = os.path.splitext(doc['name'])
            out = os.path.join(options.outdir, base + ".idx")
            TroveSwiftIndexBuilder(doc['name'], out=out)
    else:
        for doc in args:
            print doc
            base, ext = os.path.splitext(os.path.basename(doc))
            out = os.path.join(options.outdir, base + ".idx")
            TroveIndexBuilder(doc, out=out)
Пример #2
0
                count[year] += 1
            else:
                count[year] = 0

            if year in wordcount:
                wordcount[year] += wc
            else:
                wordcount[year] = wc

            n += 1

            if n % INTERVAL == 0:
                write(count, wordcount, outfile)
                sys.stdout.write("%s|" % n)
                sys.stdout.flush()
        except:
            pass


if __name__=='__main__':

    config = readconfig()
    INTERVAL = int(config.get('default', 'WC_INTERVAL'))
    processes = int(config.get('default', 'PROCESSES'))

    sw = SwiftTextContainer()

    pool = Pool(processes)

    pool.map(countwords, sw.documents())