Python HadoopEngine示例，crosscat.HadoopEngine Python示例

示例#1

0

显示文件

def get_CrossCatClient(client_type, **kwargs):
    """Helper which instantiates the appropriate Engine and returns a Client

    """

    client = None
    if client_type == 'local':
        import crosscat.LocalEngine as LocalEngine
        le = LocalEngine.LocalEngine(**kwargs)
        client = CrossCatClient(le)
    elif client_type == 'hadoop':
        import crosscat.HadoopEngine as HadoopEngine
        he = HadoopEngine.HadoopEngine(**kwargs)
        client = CrossCatClient(he)
    elif client_type == 'jsonrpc':
        import crosscat.JSONRPCEngine as JSONRPCEngine
        je = JSONRPCEngine.JSONRPCEngine(**kwargs)
        client = CrossCatClient(je)
    elif client_type == 'multiprocessing':
        import crosscat.MultiprocessingEngine as MultiprocessingEngine
        me = MultiprocessingEngine.MultiprocessingEngine(**kwargs)
        client = CrossCatClient(me)
    else:
        raise Exception('unknown client_type: %s' % client_type)
    return client

示例#2

0

显示文件

文件： runtime_scripting.py 项目： davidrichards/crosscat

    T, M_c, M_r, X_L, X_D = generate_clean_state(gen_seed,
                                                 num_clusters,
                                                 num_cols, num_rows,
                                                 num_splits,
                                                 max_mean=10, max_std=1)

    # write table_data
    table_data = dict(M_c=M_c, M_r=M_r, T=T)
    fu.pickle(table_data, table_data_filename)
    # write hadoop input
    n_tasks = write_hadoop_input(input_filename, X_L, X_D, n_steps, SEED=gen_seed)

    # actually run
    if do_local:
        xu.run_script_local(input_filename, script_filename, output_filename, table_data_filename)
    elif do_remote:
        hadoop_engine = HE.HadoopEngine(output_path=output_path,
                                        input_filename=input_filename,
                                        table_data_filename=table_data_filename,
                                        )
        hadoop_engine.send_hadoop_command(n_tasks)
        was_successful = hadoop_engine.get_hadoop_results()
        if was_successful:
            hu.copy_hadoop_output(output_path, output_filename)
        else:
            print('remote hadoop job NOT successful')
    else:
        hadoop_engine = HE.HadoopEngine()
        # print what the command would be
        print(HE.create_hadoop_cmd_str(hadoop_engine, n_tasks=n_tasks))

示例#3

0

显示文件

			for line in infile:
				key, test_dict = xu.parse_hadoop_line(line)
				ret_dict = run_mi_test_local.run_mi_test_local(test_dict)
				xu.write_hadoop_line(output_file_object, key, ret_dict)
				print "%s\n\t%s" % (str(test_dict), str(ret_dict))

		output_file_object.close()
		# generate the csv
		parse_mi.parse_data_to_csv(input_filename, params_dict, test_idx, output_filename)
		print "Done."
	elif do_remote:
		# generate the massive hadoop files
		hadoop_engine = HE.HadoopEngine(output_path=output_path,
                                    input_filename=input_filename,
                                    table_data_filename=table_data_filename,
                                    which_engine_binary=which_engine_binary,
                                    hdfs_uri=hdfs_uri,
                                    jobtracker_uri=jobtracker_uri,
                                    )
	
		xu.write_support_files(table_data, hadoop_engine.table_data_filename,
	                              dict(command='mi_analyze'), hadoop_engine.command_dict_filename)
		t_start = time.time()
		hadoop_engine.send_hadoop_command(n_tasks=len(testlist))
		was_successful = hadoop_engine.get_hadoop_results()
		if was_successful:
			t_end = time.time()
			t_total = t_end-t_start
			print "That took %i seconds." % t_total
			hu.copy_hadoop_output(hadoop_engine.output_path, output_filename)
			parse_mi.parse_data_to_csv(input_filename, params_dict, test_idx, output_filename)

示例#4

0

显示文件

文件： automated_runtime_tests.py 项目： manderle01/crosscat

    n_tasks = len(num_rows_list) * len(num_cols_list) * len(
        num_clusters_list) * len(num_splits_list) * 5
    # Create a dummy table data file
    table_data = dict(T=[], M_c=[], X_L=[], X_D=[])
    fu.pickle(table_data, table_data_filename)

    if do_local:
        xu.run_script_local(input_filename, script_filename, output_filename,
                            table_data_filename)
        print(
            'Local Engine for automated timing runs has not been completely implemented/tested'
        )
    elif do_remote:
        hadoop_engine = HE.HadoopEngine(
            which_engine_binary=which_engine_binary,
            output_path=output_path,
            input_filename=input_filename,
            table_data_filename=table_data_filename)
        xu.write_support_files(table_data, hadoop_engine.table_data_filename,
                               dict(command='time_analyze'),
                               hadoop_engine.command_dict_filename)
        hadoop_engine.send_hadoop_command(n_tasks=n_tasks)
        was_successful = hadoop_engine.get_hadoop_results()
        if was_successful:
            hu.copy_hadoop_output(hadoop_engine.output_path, output_filename)
            parse_timing.parse_timing_to_csv(output_filename,
                                             outfile=parsed_out_file)
            coeff_list = find_regression_coeff(parsed_out_file, parameter_list)

        else:
            print('remote hadoop job NOT successful')

示例#5

0

显示文件

    # write table_data
    table_data = dict(M_c=M_c, M_r=M_r, T=T)
    fu.pickle(table_data, table_data_filename)
    # write hadoop input
    n_tasks = write_hadoop_input(input_filename,
                                 X_L,
                                 X_D,
                                 n_steps,
                                 SEED=gen_seed)

    # actually run
    if do_local:
        xu.run_script_local(input_filename, script_filename, output_filename,
                            table_data_filename)
    elif do_remote:
        hadoop_engine = HE.HadoopEngine(
            output_path=output_path,
            input_filename=input_filename,
            table_data_filename=table_data_filename,
        )
        hadoop_engine.send_hadoop_command(n_tasks)
        was_successful = hadoop_engine.get_hadoop_results()
        if was_successful:
            hu.copy_hadoop_output(output_path, output_filename)
        else:
            print('remote hadoop job NOT successful')
    else:
        hadoop_engine = HE.HadoopEngine()
        # print what the command would be
        print(HE.create_hadoop_cmd_str(hadoop_engine, n_tasks=n_tasks))