Python hadoop_results示例

编程语言: Python

命名空间/包名称: hadoop_utils

方法/功能: hadoop_results

hotexamples.com的示例: 3

Python hadoop_results - 已找到3个示例。这些是从开源项目中提取的最受好评的hadoop_utils.hadoop_results现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： TrendsetterRank.py 项目： huxiaoqian/case

def ts_rank(job_id, iter_count, input_path, top_n):
    if not(job_id and iter_count and input_path and os.path.exists(input_path)):
        print 'error'
        return []
    print 'job_id:', monitor(job_id)
    if monitor(job_id) == 'finished':
        print 'hadoop_results start'
        return hadoop_results(job_id, top_n)
    fs = HadoopFS()
    fs.rmr('%s' % job_id)
    fs.mkdir('%s' % job_id)
    fs.put(input_path, '%s/hat_init' % job_id) # input文件的路径
    #init
    ts_rank_iter = TsRankIter(input_path='%s/hat_init' % job_id, output_path='%s/hat_tmp1' % job_id)
    ts_rank_iter.run()
    #iter
    for i in range(iter_count-1):
        ts_rank_iter = TsRankIter(input_path='%s/hat_tmp%s' % (job_id, (i+1)), output_path='%s/hat_tmp%s' % (job_id, (i+2)))
        ts_rank_iter.run()
    #sort
    ts_rank_sorter = TsRankSorter(input_path='%s/hat_tmp%s' % (job_id, iter_count), output_path='%s/hat_results' % job_id) # 这里的input_path是不是错了?
    ts_rank_sorter.run()
    # clean init and temp files
    fs.rmr('%s/hat_tmp*' % job_id)
    fs.rmr('%s/hat_init' % job_id)
    sorted_uids, all_uid_tr = hadoop_results(job_id, top_n)

    return sorted_uids, all_uid_tr

示例#2

显示文件

def ts_rank(job_id, iter_count, input_path, top_n):
    if not (job_id and iter_count and input_path
            and os.path.exists(input_path)):
        print 'error'
        return []
    print 'job_id:', monitor(job_id)
    if monitor(job_id) == 'finished':
        print 'hadoop_results start'
        return hadoop_results(job_id, top_n)
    fs = HadoopFS()
    fs.rmr('%s' % job_id)
    fs.mkdir('%s' % job_id)
    fs.put(input_path, '%s/hat_init' % job_id)  # input文件的路径
    #init
    ts_rank_iter = TsRankIter(input_path='%s/hat_init' % job_id,
                              output_path='%s/hat_tmp1' % job_id)
    ts_rank_iter.run()
    #iter
    for i in range(iter_count - 1):
        ts_rank_iter = TsRankIter(input_path='%s/hat_tmp%s' % (job_id,
                                                               (i + 1)),
                                  output_path='%s/hat_tmp%s' % (job_id,
                                                                (i + 2)))
        ts_rank_iter.run()
    #sort
    ts_rank_sorter = TsRankSorter(
        input_path='%s/hat_tmp%s' % (job_id, iter_count),
        output_path='%s/hat_results' % job_id)  # 这里的input_path是不是错了?
    ts_rank_sorter.run()
    # clean init and temp files
    fs.rmr('%s/hat_tmp*' % job_id)
    fs.rmr('%s/hat_init' % job_id)
    sorted_uids, all_uid_tr = hadoop_results(job_id, top_n)

    return sorted_uids, all_uid_tr

示例#3

显示文件

文件： pagerank.py 项目： huxiaoqian/project

def pagerank(job_id, iter_count, input_path, top_n):
    if not (job_id and iter_count and input_path and os.path.exists(input_path)):
        return []

    if monitor(job_id) == 'finished':
        return hadoop_results(job_id, top_n)

    #set work dir and put input temp file into file system
    fs = HadoopFS()
    fs.rmr('%s' % job_id)
    fs.mkdir('%s' % job_id)
    fs.put(input_path, '%s/hat_init' % job_id)

    #init
    pr_iter = PageRankIter(input_path='%s/hat_init' % job_id, output_path='%s/hat_tmp1' % job_id)
    pr_iter.run()

    #iter
    for i in range(iter_count-1):
        pr_iter = PageRankIter(input_path='%s/hat_tmp%s' % (job_id, (i+1)), output_path='%s/hat_tmp%s' % (job_id, (i+2)))
        pr_iter.run()

    #sort
    pr_sorter = PageRankSorter(input_path='%s/hat_tmp%s' % (job_id, iter_count), output_path='%s/hat_results' % job_id)
    pr_sorter.run()

    #clean init and temp files
    fs.rmr('%s/hat_tmp*' % job_id)
    fs.rmr('%s/hat_init' % job_id)

    sorted_uids = hadoop_results(job_id, top_n)

    return sorted_uids