def autotvm_tuning_opt(target, log_file, dtype="float32"): if "cpu" in target.keys: print("enable cpu tuning options") measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(number=1, repeat=10, min_repeat_ms=0, enable_cpu_cache_flush=True), ) else: print("enable gpu tuning options") measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4, min_repeat_ms=150), ) tuning_option = { "log_filename": log_file, "tuner": "xgb", "early_stopping": None, "measure_option": measure_option } return tuning_option
def create_measure(device, flag="t4"): if device == 'arm' or device == 'aarch64': measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder( build_func='ndk' if use_android else 'default'), runner=autotvm.RPCRunner( "pi", host='0.0.0.0', port=9190, number=5, timeout=10, )) elif ('x86' in device): measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=1000), ) elif device == 'gpu': measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=1000), runner=autotvm.RPCRunner( flag, # change the device key to your key '0.0.0.0', 9190, number=20, repeat=3, timeout=1000, min_repeat_ms=150)) return measure_option
def _tune_topi_cuda(self, name, args, te_tensors, tune_kwargs): n_trial = tune_kwargs.get('n_trial', 40) preserve_log = tune_kwargs.get('preserve_log', False) tmp_file_name = slugify(name) + '.topi_cuda.log' if n_trial > 0: task = autotvm.task.create(self.topi_cuda_task_name, args=args, target='cuda') tuner = tune_kwargs.get('tuner', autotvm.tuner.XGBTuner(task)) tuner.tune( n_trial=n_trial, measure_option={ 'builder': tune_kwargs.get('builder', autotvm.LocalBuilder()), 'runner': tune_kwargs.get( 'runner', autotvm.LocalRunner(timeout=20, **default_tune_eval_settings)), }, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=f'TOPI {name}'), autotvm.callback.log_to_file(tmp_file_name), *tune_kwargs.get('callbacks', []) ]) with autotvm.apply_history_best(tmp_file_name): result = self._build_topi_cuda(name, args, te_tensors) if not preserve_log: os.remove(tmp_file_name) return result
def test_autotvm(hexagon_session): """Top level test function for testing autotvm""" logfilename = "./hexagon.autotvm.log" options = { "log_filename": logfilename, "early_stopping": None, "measure_option": autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=15), runner=autotvm.RPCRunner( module_loader=HexagonModuleLoader(hexagon_session), key=hexagon_session._remote_kw["key"], host=hexagon_session._remote_kw["host"], port=hexagon_session._remote_kw["port"], number=3, timeout=15, min_repeat_ms=150, # cooldown_interval=150 ), ), } target_hexagon = tvm.target.hexagon("v68") task = autotvm.task.create( "demo_template", args=[], target=target_hexagon, target_host=target_hexagon ) tune_tasks([task], **options)
def test_tuning_cpu(): ir_mod = tvm.parser.fromtext( textwrap.dedent(""" #[version = "0.0.5"] def @main(%a : Tensor[(1, 3, 32, 32), float32], %b : Tensor[(3, 3, 5, 5), float32]) { nn.conv2d(%a, %b, data_layout="NCHW", kernel_layout="OIHW") } """)) tasks = autotvm.task.relay_integration.extract_from_program( ir_mod, {}, tvm.target.create("llvm")) assert len(tasks) == 1, f"Extracted != 1 task from program: {tasks!r}" task = tasks[0] measure_option = autotvm.measure_option(autotvm.LocalBuilder(), autotvm.LocalRunner()) results = [] tuner = RandomTuner(task) tuner.tune( n_trial=20, measure_option=measure_option, callbacks=(lambda _tuner, _inputs, rs: results.extend(rs), ), ) assert len(results) == 20 successful_results = [ r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR ] assert len( successful_results) > 0, f"No successful tuning runs: {results!r}"
def test_task_tuner_without_measurement(): """test task and tuner without measurement""" task, target = get_sample_task() class DummyRunner(Runner): def __init__(self): super(DummyRunner, self).__init__(1, 1) def run(self, measure_inputs, build_results): return [ MeasureResult((np.random.random(), ), 0, 0.2, time.time()) for _ in range(len(measure_inputs)) ] def get_build_kwargs(self): return {} measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(), runner=DummyRunner()) logging.info("%s", task.config_space) for tuner_class in [ autotvm.tuner.RandomTuner, autotvm.tuner.GridSearchTuner, autotvm.tuner.GATuner, autotvm.tuner.XGBTuner ]: tuner = tuner_class(task) tuner.tune(n_trial=10, measure_option=measure_option) assert tuner.best_flops > 1
def test_check_correctness(): task, target = get_sample_task() measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(check_correctness=True)) def _callback_correct(tuner, measure_inputs, measure_results): for _, res in zip(measure_inputs, measure_results): assert res.error_no == 0 tuner = autotvm.tuner.RandomTuner(task) tuner.tune(n_trial=2, measure_option=measure_option, callbacks=[_callback_correct]) # a bad template n = 128 target = tvm.target.Target("llvm -device=bad_device") task = autotvm.task.create("testing/bad_matmul", args=(n, n, n, "float32"), target=target) def _callback_wrong(tuner, measure_inputs, measure_results): for _, res in zip(measure_inputs, measure_results): assert res.error_no == MeasureErrorNo.WRONG_ANSWER tuner = autotvm.tuner.RandomTuner(task) tuner.tune(n_trial=2, measure_option=measure_option, callbacks=[_callback_wrong])
def test_convolution(n_trial=2000, early_stopping=400, learn_start=50, memory_capacity=1000, update_frequency=50, discount=0.99, epsilon=(1.0, 0.01, 0.99)): """ Test simple convolution with RLTuner. """ mod, params = _get_relay_convolution() tasks = autotvm.task.extract_from_program( mod["main"], target=target, target_host=tvm.target.Target("llvm"), params=params) runner = autotvm.LocalRunner(number=1, repeat=4) measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(build_func="default"), runner=runner) prefix = f"[Task 1/1]" tuner_obj = GADQNTuner(tasks[0], learn_start=learn_start, memory_capacity=memory_capacity, update_frequency=update_frequency, discount=discount, epsilon=epsilon, debug=True) tuner_obj.tune( n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)])
def runner(target): # init task task, target = get_sample_task(target, None) logging.info("task config space: %s", task.config_space) measure_option = autotvm.measure_option(autotvm.LocalBuilder(), autotvm.LocalRunner()) results = [] tuner = RandomTuner(task) tuner.tune( n_trial=20, measure_option=measure_option, callbacks=(lambda _tuner, _inputs, rs: results.extend(rs), ), ) assert len(results) == 20 successful_results = [ r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR # We filter records before building if we know they won't work ahead of time. # We can't guarantee we get one good record so we count these as success too or r.error_no == autotvm.MeasureErrorNo.INSTANTIATION_ERROR ] assert len( successful_results) > 0, f"No successful tuning runs: {results!r}"
def main(): target = tvm.target.arm_cpu() batch_size = 1 dtype = 'uint8' quant_model_url = "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz" model_name = "mobilenet_v1_1.0_224_quant" log_file = "%s.log" % model_name input_tensor = "input" tuning_option = { 'log_filename': log_file, 'tuner': 'random', 'early_stopping': 800, 'measure_option': autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(number=10, repeat=1, min_repeat_ms=1000), ),} mod, params, data_shape = tune(tuning_option, target, quant_model_url, model_name, batch_size, input_tensor, need_tune=False) evaluate(log_file, mod, params, target, input_tensor, data_shape, input_dtype=dtype)
def _test_op_with_ga(save_path, save_name, workload_name, n_trial, early_stopping): """ Test a specified single workload with GA tuner. """ print(f"Running experiment with settings: n trial: {n_trial}, " f"early stopping: {early_stopping}") mod, params = _get_relay_workload(workload_name) tasks = autotvm.task.extract_from_program( mod["main"], target=target, target_host=tvm.target.Target("llvm"), params=params) runner = autotvm.LocalRunner(number=1, repeat=4) measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(build_func="default"), runner=runner) prefix = f"[Task 1/1]" tuner_obj = GATuner(tasks[0], debug=True) tuner_obj.tune( n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)]) tuner_obj.save_model(save_path, save_name)
def test_tuning_gpu(target, ctx): # init task task, target = get_sample_task(target, None) logging.info("task config space: %s", task.config_space) measure_option = autotvm.measure_option(autotvm.LocalBuilder(), autotvm.LocalRunner()) results = [] tuner = RandomTuner(task) tuner.tune( n_trial=20, measure_option=measure_option, callbacks=(lambda _tuner, _inputs, rs: results.extend(rs), ), ) assert len(results) == 20 successful_results = [ r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR ] assert len( successful_results) > 0, f"No successful tuning runs: {results!r}"
def tuning_model(model_path): dtype='float32' ox, shape_dict = get_model(model_path) input_name = list(shape_dict.keys())[0] device_key = None if args.target == 'gpu': device_key = 'V100' use_android = False log_file = get_logfile() other_option = { 'model_path': model_path, 'dtype': dtype, 'input_name': input_name, 'device_key': device_key, 'use_android': use_android } if args.target == 'x86' or args.target == 'cpu': measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner( number=10, repeat=1, min_repeat_ms=1000 ) ) elif args.target == 'gpu': measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.RPCRunner( device_key, '0.0.0.0', 9190, number=20, repeat=3, timeout=4, min_repeat_ms=150) ) n_trial = 200 tuning_option = { 'log_filename': log_file, 'tuner': 'xgb', 'n_trial': n_trial, 'early_stopping': 80, 'measure_option': measure_option } graph, lib, params = tuning(tuning_option, **other_option) return graph, lib, params
def run(name, N, H, W, CO, CI, KH, KW, stride, pad, dilation): N, H, W, CO, CI, KH, KW, strides, padding = N, H, W, CO, CI, KH, KW, (stride, stride), (pad, pad) task = autotvm.task.create(conv2d_nchw, args=(N, H, W, CO, CI, KH, KW, strides, padding, dilation), target='cuda') print(task.config_space) logfile = "conv2d_" + name + ".log" # Use local gpu, measure 10 times for every config to reduce variance # The timeout of compiling a program is 10 seconds, the timeout for running is 4 seconds measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(repeat=3, min_repeat_ms=100, timeout=10) ) # Begin tuning, log records to file `conv2d.log` # During tuning we will also try many invalid configs, so you are expected to # see many error reports. As long as you can see non-zero GFLOPS, it is okay. tuner = autotvm.tuner.XGBTuner(task) tuner.tune(n_trial=1000, measure_option=measure_option, callbacks=[autotvm.callback.log_to_file(logfile)]) ######################################################################### # Finally we can inspect the best config from log file, check correctness, # and measure running time. # inspect the best config dispatch_context = autotvm.apply_history_best(logfile) best_config = dispatch_context.query(task.target, task.workload) print("\nBest config:") print(best_config) # apply history best from log file with autotvm.apply_history_best(logfile): with tvm.target.create("cuda"): s, arg_bufs = conv2d_nchw(N, H, W, CO, CI, KH, KW, strides, padding, dilation) func = tvm.build(s, arg_bufs) # check correctness a_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32) w_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32) # c_np = conv2d_nchw_python(a_np, w_np, strides, padding) ctx = tvm.gpu() a_tvm = tvm.nd.array(a_np, ctx=ctx) w_tvm = tvm.nd.array(w_np, ctx=ctx) c_tvm = tvm.nd.empty((N, CO, (H + 2 * pad - KH) // stride + 1, (W + 2 * pad - KW) // stride + 1), ctx=ctx) # func(a_tvm, w_tvm, c_tvm) # tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2) # Evaluate running time. Here we choose a large repeat number (400) to reduce the noise # and the overhead of kernel launch. You can also use nvprof to validate the result. evaluator = func.time_evaluator(func.entry_name, ctx, number=10) cost = evaluator(a_tvm, w_tvm, c_tvm).mean * 1e3 print('Time cost of this operator: %f' % cost) with open("autotvm_conv_nchw.txt", "a") as f: f.write("name, {}\n".format(cost))
def test_tuning(target, ctx): # init task task, target = get_sample_task(target, None) logging.info("%s", task.config_space) measure_option = autotvm.measure_option(autotvm.LocalBuilder(), autotvm.LocalRunner()) tuner = RandomTuner(task) tuner.tune(n_trial=20, measure_option=measure_option)
def tune_cuda_tile(name, tree, kernel_args, parser, n_trial=40, tuner=None, measure_option=None, callbacks=None, preserve_log=False): tmp_file_name = slugify(name) + '.cuda_tile.log' task = CUDATileTask(name, tree.copy(), kernel_args, parser) from random import randint stmt, args = task.instantiate( task.config_space.get(randint(0, len(task.config_space) - 1))) kernel = tvm.build(stmt, name=name, target='cuda') if n_trial > 0: if tuner is None: tuner = autotvm.tuner.XGBTuner(task, feature_type='knob') else: tuner = tuner(task) tuner.tune( n_trial=n_trial, measure_option={ 'builder': autotvm.LocalBuilder(), 'runner': autotvm.LocalRunner(timeout=20, **default_tune_eval_settings), **(measure_option or {}), }, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=f'CUDATile {name}'), autotvm.callback.log_to_file(tmp_file_name), *(callbacks or []) ]) best, best_cost = load_best(tmp_file_name, task) import gc gc.collect() if not best: raise Exception('failed to build kernel') best = CUDATileConfigEntity.from_json_dict(best) print('CUDATile %s: best %s, best cost %.12f' % (name, repr(best), best_cost)) if not preserve_log: os.remove(tmp_file_name) return best, best_cost
def tune_kernels(tasks, builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(number=10, repeat=1, min_repeat_ms=1000), tuner='ga', early_stopping=None, log_filename=log_file): measure_option = autotvm.measure_option(builder, runner) for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # converting conv2d tasks to conv2d_NCHWc tasks if tsk.workload: op_name = tsk.workload[0] if op_name == 'conv2d': func_create = 'topi_x86_conv2d_NCHWc' elif op_name == 'depthwise_conv2d_nchw': func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' else: raise ValueError( "Tuning {} is not supported on x86".format(op_name)) task = autotvm.task.create(func_create, args=tsk.args, target=target, template_key='direct') task.workload = tsk.workload else: task = tsk # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=1000) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) # do tuning n_trial = len(task.config_space) print("n_trial", n_trial) tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename) ])
def test_gemm(mm, nn, ll): # correctness m, n, l = mm, nn, ll dtype = 'float32' logging.getLogger('autotvm').setLevel(logging.DEBUG) logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout)) log_file = 'gemm.log' task = autotvm.task.create('gemm-autotune/gemm_v2', args = (m, n, l), target='cuda') print(task.config_space) measure_option = autotvm.measure_option( builder = autotvm.LocalBuilder(), runner = autotvm.LocalRunner(repeat=3, min_repeat_ms=100, timeout=4) ) tuner = autotvm.tuner.XGBTuner(task, feature_type='knob') tuner.tune(n_trial=1000, measure_option = measure_option, callbacks = [autotvm.callback.log_to_file(log_file)]) dispatch_context = autotvm.apply_history_best(log_file) best_config = dispatch_context.query(task.target, task.workload) print('\nBest config:') print(best_config) with autotvm.apply_history_best(log_file): with tvm.target.create('cuda'): s, arg_bufs = gemm_autotune(m, n, l) f = tvm.build(s, arg_bufs) # launch the kernel. # a_np = np.random.uniform(size=(n, l)).astype(A.dtype) # b_np = np.random.uniform(size=(m, l)).astype(B.dtype) ctx = tvm.gpu(0) a_np = np.random.uniform(size=(l, n)).astype(dtype) b_np = np.random.uniform(size=(l, m)).astype(dtype) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.array(np.zeros((m, n), dtype=dtype), ctx) for i in range(2): f(a, b, c) print('function called') tvm.testing.assert_allclose( c.asnumpy(), np.dot(b_np.T, a_np), rtol=1e-5) num_flops = 2 * nn * mm * ll num_runs = 10 timer_f = f.time_evaluator(f.entry_name, ctx, number=num_runs) t = timer_f(a, b, c).mean GFLOPS = num_flops / (t * 1e3) / 1e9 print("average time cost of %d runs = %g ms, %g TFLOPS." % (num_runs, t * 1e3, GFLOPS))
def test_sparse_dense_bsr_autotune(M, N, K, BS_R, BS_C, density): """Benchmark sparse-dense matrix multiplication with auto tuning enabled""" print("testing param", M, N, K, BS_R, BS_C, density) X_np = np.random.randn(M, K).astype("float32") W_sp_np = random_bsr_matrix(N, K, BS_R, BS_C, density=density, dtype="float32") W_np = W_sp_np.todense() Y_np = X_np.dot(W_np.T) # logging config (for printing tuning log to screen) logging.getLogger('autotvm').setLevel(logging.DEBUG) logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout)) W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape = W_sp_np.data.shape, W_sp_np.indices.shape, W_sp_np.indptr.shape, X_np.shape task = autotvm.task.create("benchmark/block_sparse", args=(W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape), target='cuda') # Use local gpu, measure multiple times for every config to reduce variance # The timeout for running is 4 seconds measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(repeat=args.repeat, min_repeat_ms=100, timeout=4) ) # Begin tuning, log records to file `conv2d.log` # During tuning we will also try many invalid configs, so you are expected to # see many error reports. As long as you can see non-zero GFLOPS, it is okay. tuner = autotvm.tuner.XGBTuner(task) if args.tune: tuner.tune(n_trial=args.n_trial, measure_option=measure_option, callbacks=[autotvm.callback.log_to_file(args.autotvm_log)]) # apply history best from log file with autotvm.apply_history_best(args.autotvm_log): with tvm.target.create("cuda"): s, arg_bufs = block_sparse_template(W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape) func = tvm.build(s, arg_bufs) timer = func.time_evaluator(func.entry_name, context, number=20) Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype), ctx=context) mean_time = timer(tvm.nd.array(X_np, ctx=context), tvm.nd.array(W_sp_np.data, ctx=context), tvm.nd.array(W_sp_np.indices, ctx=context), tvm.nd.array(W_sp_np.indptr, ctx=context), Y_tvm).mean print('%g ms' % (mean_time * 1e3)) print("------------------------") tvm.testing.assert_allclose(Y_tvm.asnumpy(), Y_np, atol=1e-4, rtol=1e-4)
def _test_op_with_dqnga(save_path, save_name, workload_name, n_trial, early_stopping, learn_start, update_frequency, train_frequency, discount, epsilon_decay, agent_batch_size, hidden_sizes, learning_rate, reward_function=RewardFunction.R3): """ Test a specified single workload using RLTuner. """ print( f"Running experiment with settings: n trial: {n_trial}, " f"early stopping: {early_stopping}, learn start: {learn_start}, " f"update frequency: {update_frequency}, discount: {discount}, " f"ep decay: {epsilon_decay}, hidden sizes: {hidden_sizes}," f"agent batch size: {agent_batch_size}, learning rate: {learning_rate}" ) mod, params = _get_relay_workload(workload_name) tasks = autotvm.task.extract_from_program( mod["main"], target=target, target_host=tvm.target.Target("llvm"), params=params) runner = autotvm.LocalRunner(number=1, repeat=4) measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(build_func="default"), runner=runner) prefix = f"[Task 1/1]" tuner_obj = GADQNTuner(tasks[0], learn_start=learn_start, target_update_frequency=update_frequency, train_frequency=train_frequency, discount=discount, epsilon_decay=epsilon_decay, agent_batch_size=agent_batch_size, hidden_sizes=hidden_sizes, learning_rate=learning_rate, reward_function=reward_function) tuner_obj.tune( n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)]) tuner_obj.save_model(save_path, save_name)
def run_one_wkl(wkl, new_log_path, inputs): task = wkl.to_task() # Re-tune the best configs. log_writter = log_to_file(new_log_path) measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=1000)) measure_batch = create_measure_batch(task, measure_option) results = measure_batch(inputs) log_writter(None, inputs, results) del measure_batch return
def check(target, target_host): ctx = tvm.context(target, 0) if not ctx.exist: logging.info("Skip test because %s is not available" % target) return # init task task, target = get_sample_task(target, target_host) logging.info("%s", task.config_space) measure_option = autotvm.measure_option(autotvm.LocalBuilder(), autotvm.LocalRunner()) tuner = RandomTuner(task) tuner.tune(n_trial=20, measure_option=measure_option)
def test_task_tuner_without_measurement(): """test task and tuner without measurement""" task, _ = get_sample_task() measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(), runner=DummyRunner()) logging.info("%s", task.config_space) for tuner_class in [ autotvm.tuner.RandomTuner, autotvm.tuner.GridSearchTuner, autotvm.tuner.GATuner, autotvm.tuner.XGBTuner ]: tuner = tuner_class(task) tuner.tune(n_trial=10, measure_option=measure_option) assert tuner.best_flops > 1
def main(): parser = argparse.ArgumentParser(description='Tune ops') parser.add_argument('output', type=str) parser.add_argument('ops', type=str, nargs='+') parser.add_argument('--batchsize', type=int) parser.add_argument('--base', type=str) parser.add_argument('--target', type=str, default='cuda') args = parser.parse_args() tasks = read_tasks(args.ops, args.batchsize) print('Read %d tasks from %d files' % (len(tasks), len(args.ops))) if args.base: base_config = TVMConfig(args.base) discard_keys = [] for task_key, (filename, task) in tasks.items(): query = autotvm_key_from_task(task) if base_config.contains(query): print('%s is already tuned' % filename) discard_keys.append(task_key) for task_key in discard_keys: tasks.pop(task_key) print('Removed %d tasks. Will tune for %d tasks.' % (len(discard_keys), len(tasks))) tuning_opt = { 'log_filename': args.output, 'tuner': 'xgb', 'n_trial': 2000, 'early_stopping': 600, 'measure_option': autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4), ), } tvm_tasks = [] for task_key, (filename, task) in tasks.items(): print('Tuning for %s' % filename) tvm_tasks.append(autotvm_task(task, args.target)) tune_tasks(tvm_tasks, **tuning_opt)
def tune_and_evaluate(): df = pd.read_csv(args.layer_info) df = df[df['filename'] == args.layer] filenames = df.filename for net_fname in filenames: print('Tuning: ', net_fname) #### TUNING OPTION #### log_file = "models/%s/logs/%s.log" % (args.model, args.log_file) tuning_opt = { 'log_filename': log_file, 'n_trial': args.n_trials, 'measure_option': autotvm.measure_option(builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.RPCRunner( args.device_key, '0.0.0.0', 9190, number=20, repeat=3, timeout=4, min_repeat_ms=150)), } in_c = int(df.loc[df.filename == net_fname, 'in_channels']) in_x = int(df.loc[df.filename == net_fname, 'input_spatial_x']) out_c = int(df.loc[df.filename == net_fname, 'out_channels']) input_shape = (1, in_c, in_x, in_x) print(input_shape) # extract workloads from relay program print("\tExtract tasks...") net, params = get_network(net_fname, input_shape) tasks = autotvm.task.extract_from_program(net['main'], target=target, target_host=target_host, params=params, ops=(relay.op.nn.conv2d, )) # run tuning tasks print("\tTuning...") tune_tasks(tasks, **tuning_opt)
def test_min_repeat_ms(): task, target = get_sample_task() measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner( number=1, min_repeat_ms=100)) def _callback(tuner, measure_inputs, measure_results): for inp, res in zip(measure_inputs, measure_results): if res.error_no != 0: continue assert 1000 * np.mean(res.costs) * \ measure_option['runner'].cur_number >= 100 tuner = autotvm.tuner.RandomTuner(task) tuner.tune(n_trial=5, measure_option=measure_option, callbacks=[_callback])
def test_random_tuner(): """Test RandomTuner""" task, _ = get_sample_task() measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(), runner=DummyRunner()) tuner = autotvm.tuner.RandomTuner(task, range_idx=(8, 15)) assert tuner.range_length == 8 assert tuner.index_offset == 8 # Tuner should only focus on the specified range and should visit all indices tuner.tune(n_trial=8, measure_option=measure_option) assert tuner.counter == 8 assert not tuner.has_next() visited = set() for idx in tuner.visited: assert idx not in visited assert 8 <= idx <= 15
def test_gridsearch_tuner(): """Test GridSearchTuner""" task, _ = get_sample_task() measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(), runner=DummyRunner()) # When no range index, range_length should be the length of config space tuner = autotvm.tuner.GridSearchTuner(task) assert tuner.range_length == len(task.config_space) assert tuner.index_offset == 0 # With range index, range_length should be the length of the specified range tuner = autotvm.tuner.GridSearchTuner(task, range_idx=(8, 15)) assert tuner.range_length == 8 assert tuner.index_offset == 8 # Tuner should only focus on the specified range tuner.tune(n_trial=8, measure_option=measure_option) assert tuner.counter == 8 assert not tuner.has_next()
def get_tuning_opt(log_file="tuning.log", n_trial=200): """Returns tuning options""" tuning_opt = { "log_filename": log_file, "tuner": "random", "n_trial": n_trial, "early_stopping": 60, "measure_option": autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4, min_repeat_ms=150), ), } return tuning_opt
def __init__(self, task, target, device_key): self.task = task self.target = target self.device_key = device_key self.best_config = None self.best_latency = None self.early_stopping = None self.record = None self.tuner = 'xgb' self.n_trial = 30 self.measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(build_func="default"), runner=autotvm.RPCRunner( device_key, host="115.145.179.79", port=9090, number=5, timeout=10, ), )