def random_gaussian(size, miu=0, sigma=8, epsilon=0, seed=None): """Generate random array with absolution value obeys gaussian distribution.""" random_data_disk_path = None if os.environ.get("RANDOM_DATA_DISK_PATH") is not None: random_data_disk_path = os.environ.get( "RANDOM_DATA_DISK_PATH") + "/random_data_%s_%s.bin" % (str(miu), str(sigma)) if random_data_disk_path is None or ( not os.path.exists(random_data_disk_path)): if sigma <= 0: sys.stderr.write( "Error: Expect positive sigmal for gaussian distribution. but get %f\n" % sigma) sys.exit(1) size_c = 1 for x in size: size_c = size_c * x if seed is None: seed_ = [] for i in range(RANDOM_SEED_NUM): now = int(time.time() % 10000 * 10000) + random.randint(i, 100) seed_.append(now) else: seed_ = [seed] * RANDOM_SEED_NUM logging.debug("random_gaussian seeds: {}".format(seed_)) # In the profiling scenario, when a new process is used to run test cases, data generated by multiple processes # stops responding. To locate the fault, please set this parameter gen_data_multi_process to False. gen_data_multi_process = not bool(get_profiling_mode()) if gen_data_multi_process: with Pool(processes=8) as pool: ret = np.array( pool.starmap( func, zip(repeat(size_c), repeat(miu), repeat(sigma), seed_))) else: numbers = list() for s in seed_: numbers.extend(func(size_c, miu, sigma, s)) ret = np.array(numbers) ret = ret.flatten() return ret[:size_c].reshape(size) + epsilon data_len = functools.reduce(lambda x, y: x * y, size) data_pool = np.fromfile(random_data_disk_path) if data_len % len(data_pool) != 0: copy_num = (data_len // len(data_pool)) + 1 else: copy_num = data_len // len(data_pool) data_copy = np.copy(data_pool) data_copy_list = [] for _ in range(copy_num): np.random.shuffle(data_copy) data_copy_list.append(data_copy) data_pool = np.concatenate(tuple(data_copy_list), axis=0) return data_pool[0:data_len].reshape(size) + epsilon
def launch_json(debug_mode: bool = True, save_res: bool = False, json_input_dir=""): """composite json tuning launch""" iter_times = [3, 3, 3] if debug_mode else [80, 160, 320] json_dir = json_load.format(json_input_dir) files = os.listdir(json_dir) for input_file in files: with open(json_dir + '/' + input_file, 'r') as f: json_input = f.read() json_content = json.loads(json_input) for input_desc in json_content["input_desc"]: if input_desc[0]["shape"] == []: input_desc[0]["shape"] = [1] json_input = json.dumps(json_content) space_res = composite.get_tiling_space(json_input, 2) index_table = space_res['index'] tiling_spaces = space_res['tuning_space'] if not tiling_spaces: raise RuntimeError('empty tiling spaces') dim_names = ['tiling_' + str(i) for i in range(len(tiling_spaces[0]))] input_type = namedtuple("json", dim_names) space = ListConfigSpace(input_type) for tiling_space in tiling_spaces: config = input_type(*tiling_space) space.add(config) key = json_content["op"] input_for_mod, expect = gen_data(op_type="json", op_desc=json_input) print('space size:', space.length) print('index table:', index_table) output_para = None # this is for multi-output if len(json_content["output_desc"]) > 1: output_para = [] for i in range(len(json_content["output_desc"])): output_para.append(i - len(json_content["output_desc"])) runner = KernelRunner(op_type="json", op_desc=json_input, index_table=index_table, input_data=input_for_mod, expect=expect, mod_output_param=output_para, timeout=180, repeat_times=1) # we can only get a valid tiling, or accurate get cycles is_truly_profiling = utils.get_profiling_mode() # available device numbers, normally is 8 or 1 available_device_numbers = utils.get_available_devices_num() tuner = ModelBasedTuner(runner, index_table, space, n_parallel=available_device_numbers if is_truly_profiling else 1, plan_size=64, pre_model=None) least_try_times = iter_times[0 if space.length < 10 ** 4 else 1 if space.length < 10 ** 5 else 2] tuner.tune(least_try_times, output_file="json.log") print_tuning_result("json", space, index_table, tuner, key) if save_res: save_tuning_result(key, "json", None, index_table, tuner)
def jobs(op_type: str = 'add', desc=None, debug_mode: bool = True, save_res: bool = False, insert_key='', conf_of_set_dim=""): """AutoTuning jobs""" iter_times = [3, 3, 3] if debug_mode else [80, 160, 320] index_table, space, key, expect, input_for_mod = get_space(op_type, desc) print('space size:', space.length) print('index table:', index_table) key = key if insert_key == '' else insert_key # filter already tuned shape if isinstance(conf_of_set_dim, dict) and key in conf_of_set_dim.keys(): if isinstance(conf_of_set_dim[key], (list, tuple)) and conf_of_set_dim[key]: return if isinstance(conf_of_set_dim[key], dict): return output_para = None # this is for multi-output if isinstance(input_for_mod, dict): input_for_mod, output_para = input_for_mod['args'], input_for_mod['outputs'] runner = KernelRunner(op_type, desc, index_table, input_data=input_for_mod, expect=expect, mod_output_param=output_para, timeout=180, repeat_times=1) # we can only get a valid tiling, or accurate get cycles is_truly_profiling = utils.get_profiling_mode() # available device numbers, normally is 8 or 1 available_device_numbers = utils.get_available_devices_num() tuner = ModelBasedTuner(runner, index_table, space, n_parallel=available_device_numbers if is_truly_profiling else 1, plan_size=64, pre_model=None) least_try_times = iter_times[0 if space.length < 10 ** 4 else 1 if space.length < 10 ** 5 else 2] tuner.tune(least_try_times, output_file=op_type + ".log") print_tuning_result(op_type, space, index_table, tuner, key) if save_res: save_tuning_result(key, op_type, desc, index_table, tuner)
def launch_json(debug_mode: bool = True, save_res: bool = False, json_dir="", repo_path="", all_space=False, skip_exist=True, extra_tune=False, self_attrs=[], tuning_attrs=[]): """composite json tuning launch""" subprocess.run("mkdir -p res/", shell=True) iter_times = [3, 3, 3] if debug_mode else [80, 160, 320] files = os.listdir(json_dir) with open(repo_path, 'r') as f: repo = json.loads(f.read()) for input_file in files: print("----Start tuning for ", input_file) with open(json_dir + '/' + input_file, 'r') as f: json_input = f.read() json_content = json.loads(json_input) for input_desc in json_content["input_desc"]: if input_desc[0]["shape"] == []: input_desc[0]["shape"] = [1] json_input = json.dumps(json_content) # skip tuning for info in repo if skip_exist: compute, shape, dtype = generate_trait(json_content) if get_repo(repo, [compute, shape, dtype]): print("Info for %s already exists" % input_file) print("ops are ", str(compute)) print("shape is ", str(shape)) print("dtype is ", str(dtype)) with open('res/skip_file.txt', 'a') as fe: fe.write(input_file) fe.write("\n") continue # generate tuning space if not extra_tune: time_start_get_space = time.time() with Manager() as manager: space_dict = manager.dict() p = Process(target=get_json_space, args=(json_input, space_dict)) p.start() p.join(600) if 'res' not in space_dict: with open('res/error_space_list.txt', 'a') as fe: fe.write(input_file) fe.write("\n") continue space_res = space_dict['res'] time_end_get_space = time.time() print("get space time: ", time_end_get_space - time_start_get_space) index_table = space_res['index'] tiling_spaces = space_res['tuning_space'] if not isinstance(tiling_spaces, list): with open('res/empty_space_list.txt', 'a') as fe: fe.write(input_file) fe.write("\n") continue dim_names = [ 'tiling_' + str(i) for i in range(len(tiling_spaces[0])) ] use_tuning_attrs = len(tiling_spaces) < 10**5 if tuning_attrs and use_tuning_attrs: dim_names.extend(tuning_attrs) input_type = namedtuple("json", dim_names) space = ListConfigSpace(input_type) if tuning_attrs and use_tuning_attrs: attr_options = gen_bool_list(tuning_attrs) for tiling_space in tiling_spaces: for attr_option in attr_options: tmp = tiling_space[:] tmp.extend(attr_option) config = input_type(*tmp) space.add(config) else: for tiling_space in tiling_spaces: config = input_type(*tiling_space) space.add(config) else: index_table = [] pre_lists = gen_bool_list(self_attrs) pre_input_type = namedtuple("extra_tune", self_attrs) space = ListConfigSpace(pre_input_type) for item in pre_lists: config = pre_input_type(*item) space.add(config) key = json_content["op"] try: input_for_mod, expect = gen_data(op_type="json", op_desc=json_input) except BaseException as e: logger.debug("gen numpy data from [%s] failed: %s", input_file, str(e)) with open('res/error_gen_data_list.txt', 'a') as fe: fe.write(input_file) fe.write(": ") fe.write(str(e)) fe.write("\n") continue print('space size:', space.length) print('index table:', index_table) output_para = None # this is for multi-output if len(json_content["output_desc"]) > 1: output_para = [] for i in range(len(json_content["output_desc"])): output_para.append(i - len(json_content["output_desc"])) runner = KernelRunner(op_type="json", op_desc=json_input, index_table=index_table, self_attrs=self_attrs, input_data=input_for_mod, expect=expect, mod_output_param=output_para, timeout=180, repeat_times=1) # we can only get a valid tiling, or accurate get cycles is_truly_profiling = utils.get_profiling_mode( ) or os.environ['RUNTIME_MODE'] == "gpu" # available device numbers, normally is 8 or 1 available_device_numbers = utils.get_available_devices_num() if all_space: tuner = Tuner(runner, index_table, space, n_parallel=available_device_numbers) least_try_times = space.length else: tuner = ModelBasedTuner(runner, index_table, space, n_parallel=available_device_numbers if is_truly_profiling else 1, plan_size=64, pre_model=None) least_try_times = iter_times[0 if space.length < 10**4 else 1 if space.length < 10**5 else 2] tuner.tune(least_try_times, output_file="json.log") print_tuning_result("json", space, index_table, tuner, key) if save_res: if extra_tune: save_tuning_result(key, "extra_tune", json_content, index_table, tuner, repo_path) else: save_tuning_result(key, "json", json_content, index_table, tuner, repo_path)