def main(argv): parser = make_parser() opt, _ = parser.parse_args(argv) if opt.output is not sys.stdout: opt.output = open(opt.output, 'w') logger = logging.getLogger("main") logger.setLevel(logging.DEBUG) runner = hadut.PipesRunner(prefix=PREFIX, logger=logger) with open(LOCAL_MR_SCRIPT) as f: pipes_code = pts.add_sys_path(f.read()) runner.set_input(opt.input, put=True) runner.set_exe(pipes_code) mr_options = get_mr_options(opt, runner.wd) runner.run(properties=mr_options, hadoop_conf_dir=HADOOP_CONF_DIR, logger=logger) mr_output = runner.collect_output() runner.clean() d = pts.parse_mr_output(mr_output, vtype=int) ip_list = sorted(d.iteritems(), key=operator.itemgetter(1), reverse=True) if opt.n_top: ip_list = ip_list[:opt.n_top] for ip, count in ip_list: opt.output.write("%s\t%d\n" % (ip, count)) if opt.output is not sys.stdout: opt.output.close()
def main(argv): parser = make_parser() opt, _ = parser.parse_args(argv) if opt.output is not sys.stdout: opt.output = open(opt.output, 'w') logger = logging.getLogger("main") logger.setLevel(logging.DEBUG) runner = hadut.PipesRunner(prefix=PREFIX, logger=logger) with open(LOCAL_MR_SCRIPT) as f: pipes_code = pts.add_sys_path(f.read()) runner.set_input(opt.input, put=True) runner.set_exe(pipes_code) mr_options = get_mr_options(opt, runner.wd) runner.run( properties=mr_options, hadoop_conf_dir=HADOOP_CONF_DIR, logger=logger ) mr_output = runner.collect_output() runner.clean() d = pts.parse_mr_output(mr_output, vtype=int) ip_list = sorted(d.iteritems(), key=operator.itemgetter(1), reverse=True) if opt.n_top: ip_list = ip_list[:opt.n_top] for ip, count in ip_list: opt.output.write("%s\t%d\n" % (ip, count)) if opt.output is not sys.stdout: opt.output.close()
def get_res(output_dir): fs = hdfs() data = [] for x in fs.list_directory(output_dir): if os.path.split(x['path'])[-1].startswith('part-'): with fs.open_file(x['path'], 'rt') as f: data.append(f.read()) all_data = ''.join(data) return pts.parse_mr_output(all_data, vtype=int)
def get_res(output_dir): fs = hdfs() data = [] for x in fs.list_directory(output_dir): if os.path.split(x['path'])[-1].startswith('part-'): with fs.open_file(x['path']) as f: data.append(f.read()) all_data = ''.join(data) return pts.parse_mr_output(all_data, vtype=int)
def check(res, expected_res): res = pts.compare_counts(pts.parse_mr_output(res, vtype=int), expected_res) if res: return "ERROR: %s" % res else: return "OK."
def get_res(output_dir): return pts.parse_mr_output(hadut.collect_output(output_dir), vtype=int)