def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True): # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'xgb_knob': tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning tsk_trial = min(n_trial, len(tsk.config_space)) tuner_obj.tune(n_trial=tsk_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(tsk_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune_kernels(tasks, measure_option, tuner='gridsearch', early_stopping=None, log_filename='tuning.log'): for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # converting conv2d tasks to conv2d_NCHWc tasks op_name = tsk.workload[0] if op_name == 'conv2d': func_create = 'topi_x86_conv2d_NCHWc' elif op_name == 'depthwise_conv2d_nchw': func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' else: raise ValueError( "Tuning {} is not supported on x86".format(op_name)) task = autotvm.task.create(func_create, args=tsk.args, target=target, template_key='direct') task.workload = tsk.workload # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) # do tuning n_trial = len(task.config_space) tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename) ])
def tune_tasks( tasks, measure_option, tuner="xgb", n_trial=2048, early_stopping=None, log_filename="tuning.log", use_transfer_learning=True, ): """Tune tasks with different tuners""" tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) if tuner in ("xgb", "xgb-rank"): tuner_obj = XGBTuner(tsk, loss_type="rank") elif tuner == "xgb_knob": tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob") elif tuner == "ga": tuner_obj = GATuner(tsk, pop_size=50) elif tuner == "random": tuner_obj = RandomTuner(tsk) elif tuner == "gridsearch": tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) tsk_trial = min(n_trial, len(tsk.config_space)) tuner_obj.tune( n_trial=tsk_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(tsk_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file), ], ) autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune_tasks(target): target = tvm.target.create(target) tasks = config_tasks(target) tuner = tuning_option['tuner'] use_transfer_learning = tuning_option['use_transfer_learning'] n_trial = tuning_option['n_trial'] early_stopping = tuning_option['early_stopping'] measure_option = tuning_option['measure_option'] tmp_log_file = log_file + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # Do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # Pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_file) os.remove(tmp_log_file)
def tune_tasks(tasks, measure_option, tuner='gridsearch', early_stopping=None, log_filename='tuning.log'): tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, task in enumerate(tasks): print(task) prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'xgb_knob': tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) # do tuning #n_trial=len(task.config_space) n_trial = 2 tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file)]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune(self): # create tuner if self.tuner == "xgb" or self.tuner == "xgb-rank": tuner_obj = XGBTuner(self.task, loss_type="rank") elif self.tuner == "xgb_knob": tuner_obj = XGBTuner(selftask, loss_type="rank", feature_type="knob") elif self.tuner == "ga": tuner_obj = GATuner(self.task, pop_size=50) elif self.tuner == "random": tuner_obj = RandomTuner(self.task) elif self.tuner == "gridsearch": tuner_obj = GridSearchTuner(self.task) else: raise ValueError("Invalid tuner: " + tuner) def _search_best_config(): def _callback(_, inputs, results): for inp, result in zip(inputs, results): new_latency = result.costs[0] if result.error_no ==0 else 1e9 if self.record == None or self.best_latency > new_latency: self.record = autotvm.record.encode(inp, result) self.best_config = inp.config.to_json_dict()['entity'] self.best_latency = new_latency return _callback # do tuning task_trial = min(self.n_trial, len(self.task.config_space)) tuner_obj.tune( n_trial=task_trial, early_stopping=self.early_stopping, measure_option=self.measure_option, callbacks=[ autotvm.callback.progress_bar(task_trial), _search_best_config(), ], ) print(self.record) kernel_db = KernelDB() kernel_db.write_task(self) del kernel_db
def tune_kernels( tasks, measure_option, tuner, n_trial, early_stopping, log_filename, use_transfer_learning, ): for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == "random" or n_trial >= len(tsk.config_space): tuner_obj = RandomTuner(tsk) elif tuner == "xgb" or tuner == "xgb-rank": tuner_obj = XGBTuner(tsk, loss_type="rank") # use history data to pre-train the cost model if use_transfer_learning: if os.path.isfile(log_filename): tuner_obj.load_history( autotvm.record.load_from_file(log_filename)) elif tuner == "ga": tuner_obj = GATuner(tsk, pop_size=100) elif tuner == "gridsearch": tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) # do tuning tsk_trial = min(n_trial, len(tsk.config_space)) tuner_obj.tune( n_trial=tsk_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(tsk_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename), ], )
def tune_kernels(tasks, measure_option, tuner="gridsearch", early_stopping=None, log_filename="tuning.log", mode=0): for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) name = "topi_nn_conv2d" if mode == 0 else "topi_x86_conv2d_NCHWc" print(name) task = autotvm.task.create(name, args=tsk.args, target=target, template_key="direct") task.workload = tsk.workload # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) # do tuning n_trial = min(len(task.config_space), 1000) print("trials=", n_trial) tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename) ])
def tune_kernels(tasks, measure_option, tuner='rf', early_stopping=None, log_filename='tuning.log'): for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(task, loss_type='rank', plan_size=32) elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) elif tuner == "ada": tuner_obj = RFTuner(task, feature_type="itervar", plan_size=32, dynamic_ep=True) else: raise ValueError("Invalid tuner: " + tuner) # do tuning n_trial = len(task.config_space) # n_trial=6 tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename) ])
def tune_tasks( tasks: List[autotvm.task.Task], log_file: str, measure_option: autotvm.measure_option, tuner: str, trials: int, early_stopping: Optional[int] = None, tuning_records: Optional[str] = None, ): """Tune a list of tasks and output the history to a log file. Parameters ---------- tasks : list A list of autotvm.Tasks to tune. log_file : str A file to output the tuning history, in JSON. measure_option : autotvm.measure_option Options to build and run a tuning task. tuner : str Which tuner to use. trials : int The maximum number of tuning trials to perform. early_stopping : int, optional The minimum number of tuning trials to perform. This will be equal to 'trials' if not specified. tuning_records: str, optional Path to the file produced by the tuning, to be used during tuning. """ if not tasks: logger.warning("there were no tasks found to be tuned") return if not early_stopping: early_stopping = trials for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # Create a tuner if tuner in ("xgb", "xgb-rank"): tuner_obj = XGBTuner(tsk, loss_type="rank") elif tuner == "xgb_knob": tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob") elif tuner == "ga": tuner_obj = GATuner(tsk, pop_size=50) elif tuner == "random": tuner_obj = RandomTuner(tsk) elif tuner == "gridsearch": tuner_obj = GridSearchTuner(tsk) else: raise TVMCException("invalid tuner: %s " % tuner) # If transfer learning is being used, load the existing results if tuning_records and os.path.exists(tuning_records): logger.info("loading tuning records from %s", tuning_records) start_time = time.time() tuner_obj.load_history( autotvm.record.load_from_file(tuning_records)) logging.info("loaded history in %.2f sec(s)", time.time() - start_time) tuner_obj.tune( n_trial=min(trials, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(trials, prefix=prefix), autotvm.callback.log_to_file(log_file), ], )
def tune_kernels(tasks, gen_graph_tuner_candidates, measure_top_n, measure_option, tuner='random', early_stopping=None, n_trial=5000, log_filename='tuning.log'): """Tune kernels with the ranking model.""" remeasure_option = None if tuner == 'round': # Setup another measure option for final remeasurment. remeasure_option = autotvm.measure_option( builder=LocalBuilder(), runner=measure_option['runner'].local_runner, ) assert isinstance(measure_option['runner'], RankModelRunner) best_results = [] for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) callbacks = [] if task.name in [ 'dense_small_batch.cuda', 'conv2d_cudnn.cuda', 'dense_cublas.cuda', 'dense_large_batch.cuda', 'conv2d_transpose_nchw.cuda', 'dense_tensorcore.cuda' ]: # Ignore these four tasks continue if task.name not in measure_option['runner'].models: print('not covered by cost models') continue # create tuner if tuner == 'round': tuner_obj = RoundTuner(task, n_cfg=measure_top_n) callbacks = [rank_progress(n_trial, prefix=prefix) ] # Use different callbacks. else: if tuner in ('xgb', 'xgb-rank'): tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) callbacks = [ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename) ] tic = time.time() # do tuning tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=callbacks) # Round tuner needs an extra measurement step to get the real throughputs. if tuner == 'round': max_n_layout = 20 if gen_graph_tuner_candidates else 1 top_cfgs = tuner_obj.get_top_rank_cfgs(max_n_layout) measure_batch = create_measure_batch(task, remeasure_option) inputs = [ MeasureInput(task.target, task, config) for config in top_cfgs ] sys.stderr.write('{} Measure Top {} Configs'.format( prefix, len(inputs))) results = measure_batch(inputs) best_idx, best_flops = max( [(idx, i.task.flop / np.mean(r.costs) / 1e9 if r.error_no == 0 else 0) for idx, (i, r) in enumerate(zip(inputs, results))], key=lambda x: x[1]) best_results.append((task.workload, best_idx, best_flops)) sys.stderr.write(' | Best %.2f GFLOPS at Top %d | %.2fs\n' % (best_flops, best_idx, time.time() - tic)) autotvm.callback.log_to_file(log_filename)(None, inputs, results) return best_results
def tune_model(mod, params, tune_settings, target, model_name): """ Tune a model for a specified number of trials along with other tune settings. Tune settings are specified using a json configuration, as per the TVM tools readme. """ early_stopping = tune_settings['early_stopping'] number = tune_settings["number"] save_path = tune_settings["save_path"] save_name = tune_settings["save_name"] repeat = tune_settings["repeat"] debug = tune_settings.get("debug_gadqn") or False trials = tune_settings["trials"] tuner = tune_settings["tuner"] target = tvm.target.Target(target) tasks = autotvm.task.extract_from_program( mod["main"], target=target, target_host="llvm", params=params) runner = autotvm.LocalRunner( number=number, repeat=repeat) measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(build_func="default"), runner=runner) for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # Create a tuner if tuner in ("xgb", "xgb-rank"): tuner_obj = XGBTuner(tsk, loss_type="rank") elif tuner == "xgb_knob": tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob") elif tuner == "ga": tuner_obj = GATuner(tsk, pop_size=50) elif tuner == "random": tuner_obj = RandomTuner(tsk) elif tuner == "gridsearch": tuner_obj = GridSearchTuner(tsk) elif tuner == "ga-dqn" and debug: tuner_obj = GADQNTunerDebug(tsk) elif tuner == "ga-dqn": tuner_obj = GADQNTuner(tsk) else: raise ValueError("invalid tuner: %s " % tuner) abs_path = Path(save_path + save_name).resolve() abs_path.mkdir(exist_ok=True, parents=True) abs_path_str = str(abs_path) tuner_obj.tune( n_trial=min(trials, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(trials, prefix=prefix), autotvm.callback.log_to_file(abs_path_str + f"/tuning_record_model={model_name}.json"), ], ) # Save debug info for rl tuner only if tuner == "ga-dqn" and debug: tuner_obj.save_model(save_path, save_name + f"_model={model_name}_layer={i}") del tuner_obj
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=500, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True, try_winograd=True): for i in range(len(tasks)): print(tasks[i].args) data, kernel, padding, stride, temp, layout, dtype = tasks[i].args new_args = (data, kernel, padding, stride, temp, layout, dtype) block_factor = 4 CO, CI, KH, KW = kernel[1] if CO % block_factor == 0 and CI % block_factor == 0: new_task = autotvm.task.create(tasks[i].name, new_args, tasks[i].target, tasks[i].target_host, 'fp16') tasks[i] = new_task if args.pretuned is not None: pretuned_ctx = autotvm.apply_history_best(args.pretuned) _tasks = [] for task in tasks: if pretuned_ctx._query_inside(target, task.workload) is None: _tasks.append(task) else: print('Ignoring {}'.format(task)) tasks = _tasks print(tasks) # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True, try_winograd=True): if try_winograd: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception: pass # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tunber_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning ''' n_trial = min(n_trial, len(tsk.config_space)) tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file)]) ''' done_flag = False while (not done_flag): try: tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file)]) done_flag = True except: print("\nexception happened... wait 20 seconds.") sleep(20) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True): # create tmp log file tmp_log_file = log_filename + ".tmp" #if os.path.exists(tmp_log_file): # os.remove(tmp_log_file) check_tmp_his = True ind = 0 '''if check_tmp_his: length = 0 if os.path.isfile(tmp_log_file): lines = list(open(tmp_log_file)) length = len(lines) else: check_tmp_his = False''' tasks = prune_old_tasks(tasks,log_filename) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " %(i+1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning tsk_trial = min(n_trial, len(tsk.config_space)) '''print(ind) if check_tmp_his: if ind < length: ret = autotvm.record.decode(lines[ind]) inp, _ = ret if inp.task.workload == tsk.workload: if (ind + tsk_trial - 1) < length: ind = (ind + tsk_trial - 1) ret_end = autotvm.record.decode(lines[ind]) inp_end , _= ret_end if inp.task.workload == inp_end.task.workload: ind = ind + 1 continue else: check_tmp_his = False tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) elif (ind + tsk_trial - 1) == length: ind = (ind + tsk_trial - 1) ret_end = autotvm.record.decode(lines[ind]) inp_end , _= ret_end if inp.task.workload == inp_end.task.workload: ind = ind + 1 check_tmp_his = False tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) continue else: check_tmp_his = False tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) else: cmtd = length - ind ind = length - 1 ret_end = autotvm.record.decode(lines[ind]) inp_end , _= ret_end if inp.task.workload == inp_end.task.workload: ind = ind + 1 check_tmp_his = False tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) tsk_trial = tsk_trial - cmtd else: check_tmp_his = False tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))''' with tempfile.NamedTemporaryFile() as tmp_task_log_file: tuner_obj.tune(n_trial=tsk_trial,early_stopping=early_stopping,measure_option=measure_option, callbacks=[autotvm.callback.progress_bar(tsk_trial, prefix=prefix),autotvm.callback.log_to_file(tmp_log_file)]) with open(tmp_log_file, 'a') as tmp_log: tmp_log.write(tmp_task_log_file.read().decode('utf8')) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=500, log_filename='tuning.log', use_transfer_learning=True, try_winograd=True): if try_winograd: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception: pass # create tmp log file tmp_log_file = log_filename + ".tmp" # if os.path.exists(tmp_log_file): # os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning n_trial = 1000 n_trial = min(n_trial, len(tsk.config_space)) res = [] for i in range(len(tsk.config_space)): res.append(tuner_obj.space.get(i + 1)) with open("res.txt", "w", encoding='utf-8') as f: for line in res: f.write(str(line) + '\n') f.close() #任务单独保存 # import shutil # shutil.copyfile('res.txt', 'res'+prefix+'.txt') early_stopping = len(tsk.config_space) // 2 print("n_trial=", n_trial) print("early_stopping=", early_stopping) tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) np.savetxt("feas" + prefix + ".txt", tuner_obj.cost_model.feas, fmt='%s', delimiter=' ') np.savetxt("x_train" + prefix + ".txt", tuner_obj.cost_model.x_train, fmt='%s', delimiter=' ') np.savetxt("y_train" + prefix + ".txt", tuner_obj.cost_model.y_train, fmt='%s', delimiter=' ') #他做到了同算子--》空间维度相同的迁移学习,那么实际上resnet18--->resnet50? # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename)
def tune_tasks(tasks, only_parse, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename=None, use_transfer_learning=True, try_nchwc=True, try_winograd=False, try_spatial_pack_depthwise=False): # create tmp log file tmp_log_file = log_filename + ".tmp" if not use_transfer_learning: if os.path.exists(tmp_log_file): os.remove(tmp_log_file) else: # select actual best logs if not os.path.exists(tmp_log_file): os.mknod(tmp_log_file) autotvm.record.pick_best(tmp_log_file, log_filename) if os.path.exists(tmp_log_file): # sort out best historic entries print("Load historic training logs...") best_context = autotvm.task.ApplyHistoryBest( autotvm.record.load_from_file(tmp_log_file)) best_tgtkeys = best_context.best_by_targetkey print("Total tasks: %s" % len(tasks)) if try_nchwc: for i in range(len(tasks)): # converting conv2d tasks to conv2d_NCHWc tasks op_name = tasks[i].workload[0] if op_name == 'conv2d': func_create = 'topi_x86_conv2d_NCHWc' elif op_name == 'depthwise_conv2d_nchw': func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' try: # try NCHWc template tsk = autotvm.task.create(func_create, tasks[i].args, tasks[i].target, tasks[i].target_host, 'direct') tsk.workload = tasks[i].workload print( "[Override Task %2d/%2d (%s) with NCHWc] {cfg.space: %i -> %i}" % (i + 1, len(tasks), tsk.workload[0], len(tasks[i].config_space), len(tsk.config_space))) tasks[i] = tsk except Exception: pass if try_winograd: for i in range(0, len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') tasks.append(tsk) print( "[New Task %2d->%2d (%s) winograd] {cfg.space: %i -> %i}" % (i + 1, len(tasks), tsk.workload[0], len(tasks[i].config_space), len(tsk.config_space))) except Exception: pass # if we want to use spatial pack for depthwise convolution if try_spatial_pack_depthwise: for i in range(len(tasks)): if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw': tuner = 'xgb_knob' tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'contrib_spatial_pack') tasks.append(tsk) print( "[New Task %2d->%2d (%s) contrib_spatial_pack] {cfg.space: %i -> %i}" % (i + 1, len(tasks), tsk.workload[0], len(tasks[i].config_space), len(tsk.config_space))) for i, tsk in enumerate(tasks): tsk_template = '' tsk_org_flop = -1 device_name = target.device_name if target.device_name else "cpu" try: # compute best historic entry GFLOPS tsk_template = best_tgtkeys[(device_name, tsk.workload)][0][2].template_key tsk_org_cost = np.mean(best_tgtkeys[(device_name, tsk.workload)][1].costs) tsk_org_flop = tsk.flop / tsk_org_cost / 1e9 except: pass if tsk_org_flop == -1: org_flop_str = "no history" else: org_flop_str = "%.2f GFLOPS /%s" % (tsk_org_flop, tsk_template) prefix = "[Task %2d/%2d %s|%s] (%s) {%s}" % ( i + 1, len(tasks), tsk.workload[1][:4], tsk.workload[2][:4], tsk.workload[0], org_flop_str) if only_parse: print("%s SKIP tunning" % prefix) continue # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'xgb_knob': tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) if not use_transfer_learning: os.remove(tmp_log_file)
def tvm_generic(N, H, W, C, kernel_size, K, stride=1, padding=0, dilation=1, groups=1, number=100, dev=0, timeout=4, target="llvm", trials=100): data_shape = (N, C, H, W) data = relay.var("data", shape=data_shape, dtype="float32") kernel_size = (kernel_size, kernel_size) stride = (stride, stride) padding = (padding, padding) body = layers.conv2d(data=data, channels=K, kernel_size=kernel_size, strides=stride, padding=padding, name="conv2d") op = relay.Function(relay.ir_pass.free_vars(body), body) sym, params = create_workload(op) tasks = autotvm.task.extract_from_program(op, target=target, params=params, ops=(relay.op.nn.conv2d, )) tuning_option = { "log_filename": "tvm_baseline_{}.log".format( (N, C, H, W, K, kernel_size, stride, padding, dilation, groups)), "tuner": "xgb", "early_stopping": 30, "measure_option": autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=timeout), runner=autotvm.LocalRunner(number=number, repeat=1, timeout=timeout, min_repeat_ms=150), # runner=autotvm.RPCRunner( # '1080ti', # change the device key to your key # '0.0.0.0', 9190, # number=20, repeat=3, timeout=4, min_repeat_ms=150) ), } log_filename = tuning_option["log_filename"] tuner = tuning_option["tuner"] early_stopping = tuning_option["early_stopping"] measure_option = tuning_option["measure_option"] # only support one task assert len(tasks) == 1 for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) # do tuning n_trial = trials length = len(task.config_space) print("config space length=", length) # tuner_obj.tune(n_trial=min(n_trial, length), # early_stopping=early_stopping, # measure_option=measure_option, # callbacks=[ # autotvm.callback.progress_bar(n_trial, prefix=prefix), # autotvm.callback.log_to_file(log_filename)]) if not os.path.exists(log_filename): raise RuntimeError( "the log file {} doesn't exists".format(log_filename)) with autotvm.apply_history_best(log_filename): with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build(op, target=target, params=params) ctx = tvm.device(str(target), 0) data_tvm = tvm.nd.array( (np.random.uniform(size=data_shape)).astype("float32")) module = runtime.create(graph, lib, ctx) module.set_input("data", data_tvm) module.set_input(**params) # evaluate ftimer = module.module.time_evaluator("run", ctx, number=number, repeat=1) prof_res = np.array(ftimer().results) * 1e3 return prof_res
def tune_kernels( tasks, measure_top_n, measure_option, tuner="random", early_stopping=None, n_trial=5000, log_filename="tuning.log", ): """Tune kernels with the ranking model.""" remeasure_option = None if tuner == "round": # Setup another measure option for final remeasurment. remeasure_option = autotvm.measure_option( builder=LocalBuilder(), runner=measure_option["runner"].local_runner, ) assert isinstance(measure_option["runner"], RankModelRunner) for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) callbacks = [] if task.name not in measure_option["runner"].models: print("%s %s not covered by cost models" % (prefix, task.name)) continue # create tuner if tuner == "round": tuner_obj = RoundTuner(task, n_cfg=measure_top_n) callbacks = [rank_progress(n_trial, prefix=prefix) ] # Use different callbacks. else: if tuner in ("xgb", "xgb-rank"): tuner_obj = XGBTuner(task, loss_type="rank") elif tuner == "ga": tuner_obj = GATuner(task, pop_size=50) elif tuner == "random": tuner_obj = RandomTuner(task) elif tuner == "gridsearch": tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) callbacks = [ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename), ] tic = time.time() # do tuning tuner_obj.tune( n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=callbacks, ) # Round tuner needs an extra measurement step to get the real throughputs. if tuner == "round": top_cfgs = tuner_obj.get_top_rank_cfgs(1) measure_batch = create_measure_batch(task, remeasure_option) inputs = [ MeasureInput(task.target, task, config) for config in top_cfgs ] sys.stderr.write("{} Measure Top {} Configs".format( prefix, len(inputs))) results = measure_batch(inputs) best_idx, best_flops = max( [(idx, i.task.flop / np.mean(r.costs) / 1e9 if r.error_no == 0 else 0) for idx, (i, r) in enumerate(zip(inputs, results))], key=lambda x: x[1], ) sys.stderr.write(" | Best %.2f GFLOPS at Top %d | %.2fs\n" % (best_flops, best_idx, time.time() - tic)) autotvm.callback.log_to_file(log_filename)(None, inputs, results)
def tune_tasks(mod, params, option): print('Creating tasks ...') tasks = autotvm.task.extract_from_program(mod["main"], target=option['target'], params=params, ops=(relay.op.nn.conv2d, relay.op.nn.dense)) if option['try_winograd']: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception as err: print(err) # create tmp log file tmp_log_file = option['log_file'] if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): # converting conv2d tasks to conv2d_NCHWc tasks # op_name = tsk.workload[0] # if op_name == 'conv2d': # func_create = 'topi_x86_conv2d_NCHWc' # elif op_name == 'depthwise_conv2d_nchw': # func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' # else: # func_create = tasks[i].name # task = autotvm.task.create(func_create, args=tsk.args, # target=target, template_key='direct') # task.workload = tsk.workload # tsk = task prefix = "[Task %2d/%2d] (%s)" %(i+1, len(tasks), tsk.name) # create tuner tuner = option['tuner'] if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if option['use_transfer_learning']: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning n_trial = min(option['n_trial'], len(tsk.config_space)) tuner_obj.tune(n_trial=n_trial, early_stopping=option['early_stopping'], measure_option=option['measure_option'], callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file)]) if os.path.exists(option['log_best_file']): os.remove(option['log_best_file']) autotvm.record.pick_best(option['log_file'], option['log_best_file'])
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=False, try_winograd=True, try_spatial_pack_depthwise=False): if try_winograd: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception: pass # if we want to use spatial pack for depthwise convolution if try_spatial_pack_depthwise: tuner = 'xgb_knob' for i in range(len(tasks)): if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw': tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'contrib_spatial_pack') tasks[i] = tsk # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'xgb_knob': tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file)]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def autotvm_tune(func, params, target): """ Parameters: ---------- func : relay.expr.Function params : dict of str to numpy array target : tvm.target.Target ops : List of relay.op """ # Array of autotvm.task.Task tasks = autotvm.task.extract_from_program(func, target=target, params=params, ops=(relay.op.nn.conv2d, )) # Check tasks. for i in range(len(tasks)): op_name = tasks[i].workload[0] if op_name == 'conv2d': func_create = 'topi_x86_conv2d_NCHWc' elif op_name == 'depthwise_conv2d_nchw': func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' else: raise ValueError( "Tuning {} is not supported on x86".format(op_name)) print("[Create Task %2d/%2d (%s, %s) ] " % (i + 1, len(tasks), tasks[i].name, tasks[i].workload[0])) tsk = autotvm.task.create(func_create, args=tasks[i].args, target=tasks[i].target, template_key='direct') tsk.workload = tasks[i].workload tasks[i] = tsk # turning option. tuner = 'xgb' n_trial = 100 early_stopping = None log_filename = 'tuning.log' use_transfer_learning = True measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.LocalRunner(number=10, repeat=1, min_repeat_ms=1000)) # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)