Python FeatureReader.multiprocessing示例

编程语言: Python

命名空间/包名称: htrc_features

类/类型: FeatureReader

方法/功能: multiprocessing

hotexamples.com的示例: 3

Python FeatureReader.multiprocessing - 已找到3个示例。这些是从开源项目中提取的最受好评的htrc_features.FeatureReader.multiprocessing现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

FeatureReader(30)

volumes(15)

first(2)

multiprocessing(2)

jsons(1)

next(1)

tokenlist(1)

示例#1

显示文件

def main():
    # Get a list of json.bz2 files to read
    paths = glob.glob('data/*.json.bz2')
    paths = paths[0:4]  # Truncate list for example

    # Open file for writing results
    f = bz2.BZ2File('term_volume_counts.bz2', "w")

    # Start a feature reader with the paths and pass the mapping function
    feature_reader = FeatureReader(paths)
    results = feature_reader.multiprocessing(get_term_volume_counts)

    # Save the results
    for vol, result in results:
        for t, c in result.iteritems():  # result.items() in python3
            s = "{0}\t{1}\t{2}\t{3}\n".format(vol[0], vol[1], t, c)
            f.write(s.encode('UTF-8'))  # For python3, use str(s)

    f.close()

示例#2

显示文件

文件： save_for_mallet.py 项目： organisciak/htrc-book-models

def old():
    # Get a list of json.bz2 files to read
    paths = glob.glob('data/*.json.bz2')
    paths = paths[0:4] # Truncate list for example

    # Open file for writing results
    f = bz2.BZ2File('term_volume_counts.bz2', "w")

    # Start a feature reader with the paths and pass the mapping function
    feature_reader = FeatureReader(paths)
    results = feature_reader.multiprocessing(get_term_volume_counts)

    # Save the results
    for vol, result in results:
        for t,c in result.iteritems(): # result.items() in python3
            s = "{0}\t{1}\t{2}\t{3}\n".format(vol[0], vol[1],t,c)
            f.write(s.encode('UTF-8')) # For python3, use str(s)

    f.close()

示例#3

显示文件

文件： generic_processor.py 项目： edsu/htrc-feature-reader

def generic_processor(map_func,
                      result_func,
                      paths,
                      outpath=None,
                      batch_size=1000):
    if outpath:
        f = bz2.BZ2File(outpath, "w")
    else:
        f = sys.stdout
    csvf = csv.writer(f)
    n = 0
    m = math.ceil(float(len(paths)) / batch_size)

    logging.info("Script started")

    while (True):
        start = time.time()
        batch, paths = (paths[:batch_size], paths[batch_size:])
        n += 1
        logging.info("Starting batch {0}/{1}".format(n, m))
        feature_reader = FeatureReader(batch)

        results = feature_reader.multiprocessing(map_func)
        result_func(results, csvf)

        logging.info("Batch of {0} volumes finished in in {1}s".format(
            len(batch),
            time.time() - start))

        if outpath:
            logging.debug("Output filesize is currently: {0}Gb".format(
                os.stat(outpath).st_size / (1024**3)))

        if len(paths) == 0:
            break

    logging.info("Script done")
    f.close()