示例#1
0
def autotvm_tuning_opt(target, log_file, dtype="float32"):
    if "cpu" in target.keys:
        print("enable cpu tuning options")
        measure_option = autotvm.measure_option(
            builder=autotvm.LocalBuilder(),
            runner=autotvm.LocalRunner(number=1,
                                       repeat=10,
                                       min_repeat_ms=0,
                                       enable_cpu_cache_flush=True),
        )
    else:
        print("enable gpu tuning options")
        measure_option = autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=10),
            runner=autotvm.LocalRunner(number=20,
                                       repeat=3,
                                       timeout=4,
                                       min_repeat_ms=150),
        )

    tuning_option = {
        "log_filename": log_file,
        "tuner": "xgb",
        "early_stopping": None,
        "measure_option": measure_option
    }
    return tuning_option
示例#2
0
def test_tuning_gpu(target, ctx):
    # init task
    task, target = get_sample_task(target, None)
    logging.info("task config space: %s", task.config_space)

    measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                            autotvm.LocalRunner())

    results = []

    tuner = RandomTuner(task)
    tuner.tune(
        n_trial=20,
        measure_option=measure_option,
        callbacks=(lambda _tuner, _inputs, rs: results.extend(rs),
                   ),
    )

    assert len(results) == 20

    successful_results = [
        r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR
    ]
    assert len(
        successful_results) > 0, f"No successful tuning runs: {results!r}"
示例#3
0
 def _tune_topi_cuda(self, name, args, te_tensors, tune_kwargs):
     n_trial = tune_kwargs.get('n_trial', 40)
     preserve_log = tune_kwargs.get('preserve_log', False)
     tmp_file_name = slugify(name) + '.topi_cuda.log'
     if n_trial > 0:
         task = autotvm.task.create(self.topi_cuda_task_name,
                                    args=args,
                                    target='cuda')
         tuner = tune_kwargs.get('tuner', autotvm.tuner.XGBTuner(task))
         tuner.tune(
             n_trial=n_trial,
             measure_option={
                 'builder':
                 tune_kwargs.get('builder', autotvm.LocalBuilder()),
                 'runner':
                 tune_kwargs.get(
                     'runner',
                     autotvm.LocalRunner(timeout=20,
                                         **default_tune_eval_settings)),
             },
             callbacks=[
                 autotvm.callback.progress_bar(n_trial,
                                               prefix=f'TOPI {name}'),
                 autotvm.callback.log_to_file(tmp_file_name),
                 *tune_kwargs.get('callbacks', [])
             ])
     with autotvm.apply_history_best(tmp_file_name):
         result = self._build_topi_cuda(name, args, te_tensors)
     if not preserve_log:
         os.remove(tmp_file_name)
     return result
示例#4
0
def test_tuning_cpu():
    ir_mod = tvm.parser.fromtext(
        textwrap.dedent("""
        #[version = "0.0.5"]
        def @main(%a : Tensor[(1, 3, 32, 32), float32], %b : Tensor[(3, 3, 5, 5), float32]) {
               nn.conv2d(%a, %b, data_layout="NCHW", kernel_layout="OIHW")
        }
        """))
    tasks = autotvm.task.relay_integration.extract_from_program(
        ir_mod, {}, tvm.target.create("llvm"))
    assert len(tasks) == 1, f"Extracted != 1 task from program: {tasks!r}"

    task = tasks[0]

    measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                            autotvm.LocalRunner())

    results = []

    tuner = RandomTuner(task)
    tuner.tune(
        n_trial=20,
        measure_option=measure_option,
        callbacks=(lambda _tuner, _inputs, rs: results.extend(rs),
                   ),
    )

    assert len(results) == 20

    successful_results = [
        r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR
    ]
    assert len(
        successful_results) > 0, f"No successful tuning runs: {results!r}"
示例#5
0
def test_convolution(n_trial=2000,
                     early_stopping=400,
                     learn_start=50,
                     memory_capacity=1000,
                     update_frequency=50,
                     discount=0.99,
                     epsilon=(1.0, 0.01, 0.99)):
    """
    Test simple convolution with RLTuner.
    """
    mod, params = _get_relay_convolution()
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        target_host=tvm.target.Target("llvm"),
        params=params)
    runner = autotvm.LocalRunner(number=1, repeat=4)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner)
    prefix = f"[Task 1/1]"
    tuner_obj = GADQNTuner(tasks[0],
                           learn_start=learn_start,
                           memory_capacity=memory_capacity,
                           update_frequency=update_frequency,
                           discount=discount,
                           epsilon=epsilon,
                           debug=True)
    tuner_obj.tune(
        n_trial=n_trial,
        early_stopping=early_stopping,
        measure_option=measure_option,
        callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)])
def main():

    target = tvm.target.arm_cpu()
    
    batch_size = 1
    dtype = 'uint8'
    
    quant_model_url = "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz"
    model_name = "mobilenet_v1_1.0_224_quant"
    log_file = "%s.log" % model_name
    
    
    input_tensor = "input"

    tuning_option = {
    'log_filename': log_file,
    'tuner': 'random',
    'early_stopping': 800,

    'measure_option': autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(number=10, repeat=1,
                                   min_repeat_ms=1000),
    ),}

    mod, params, data_shape = tune(tuning_option, target, quant_model_url, model_name, batch_size, 
        input_tensor, need_tune=False)
    
    evaluate(log_file, mod, params, target, input_tensor, data_shape, input_dtype=dtype)
示例#7
0
def _test_op_with_ga(save_path, save_name, workload_name, n_trial,
                     early_stopping):
    """
    Test a specified single workload with GA tuner.
    """
    print(f"Running experiment with settings: n trial: {n_trial}, "
          f"early stopping: {early_stopping}")

    mod, params = _get_relay_workload(workload_name)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        target_host=tvm.target.Target("llvm"),
        params=params)
    runner = autotvm.LocalRunner(number=1, repeat=4)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner)
    prefix = f"[Task 1/1]"
    tuner_obj = GATuner(tasks[0], debug=True)
    tuner_obj.tune(
        n_trial=n_trial,
        early_stopping=early_stopping,
        measure_option=measure_option,
        callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)])
    tuner_obj.save_model(save_path, save_name)
示例#8
0
    def runner(target):
        # init task
        task, target = get_sample_task(target, None)
        logging.info("task config space: %s", task.config_space)

        measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                                autotvm.LocalRunner())

        results = []

        tuner = RandomTuner(task)
        tuner.tune(
            n_trial=20,
            measure_option=measure_option,
            callbacks=(lambda _tuner, _inputs, rs: results.extend(rs),
                       ),
        )

        assert len(results) == 20

        successful_results = [
            r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR
            # We filter records before building if we know they won't work ahead of time.
            # We can't guarantee we get one good record so we count these as success too
            or r.error_no == autotvm.MeasureErrorNo.INSTANTIATION_ERROR
        ]
        assert len(
            successful_results) > 0, f"No successful tuning runs: {results!r}"
示例#9
0
def auto_schedule(func, args):
    global function
    function = func

    logFile = open("matmul.log", 'w', encoding="utf-8")
    logFile.truncate()
    logFile.close()

    # return s, bnfs
    autotvmFunc = None
    config_sp_size = 0
    if len(args) == 4:
        config_sp_size = 100
        autotvmFunc = GEMMAutoTVM
    else:
        config_sp_size = 200
        autotvmFunc = CONVAutoTVM

    task = autotvm.task.create(autotvmFunc, args=(args), target='llvm')
    print(task.config_space)
    measure_option = autotvm.measure_option(
        builder='local', runner=autotvm.LocalRunner(number=3))

    # begin tuning, log records to file `matmul.log`
    tuner = autotvm.tuner.GATuner(task)
    tuner.tune(n_trial=config_sp_size,
               measure_option=measure_option,
               callbacks=[autotvm.callback.log_to_file('matmul.log')])

    with autotvm.apply_history_best('matmul.log'):
        with tvm.target.create("llvm"):
            s, arg_bufs = autotvmFunc(*args)
            print(tvm.lower(s, arg_bufs, simple_mode=True))
            return s, arg_bufs
def test_check_correctness():
    task, target = get_sample_task()

    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(check_correctness=True))

    def _callback_correct(tuner, measure_inputs, measure_results):
        for _, res in zip(measure_inputs, measure_results):
            assert res.error_no == 0

    tuner = autotvm.tuner.RandomTuner(task)
    tuner.tune(n_trial=2,
               measure_option=measure_option,
               callbacks=[_callback_correct])

    # a bad template
    n = 128
    target = tvm.target.Target("llvm -device=bad_device")
    task = autotvm.task.create("testing/bad_matmul",
                               args=(n, n, n, "float32"),
                               target=target)

    def _callback_wrong(tuner, measure_inputs, measure_results):
        for _, res in zip(measure_inputs, measure_results):
            assert res.error_no == MeasureErrorNo.WRONG_ANSWER

    tuner = autotvm.tuner.RandomTuner(task)
    tuner.tune(n_trial=2,
               measure_option=measure_option,
               callbacks=[_callback_wrong])
示例#11
0
def create_measure(device, flag="t4"):
    if device == 'arm' or device == 'aarch64':
        measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(
            build_func='ndk' if use_android else 'default'),
                                                runner=autotvm.RPCRunner(
                                                    "pi",
                                                    host='0.0.0.0',
                                                    port=9190,
                                                    number=5,
                                                    timeout=10,
                                                ))
    elif ('x86' in device):
        measure_option = autotvm.measure_option(
            builder=autotvm.LocalBuilder(),
            runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=1000),
        )
    elif device == 'gpu':
        measure_option = autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=1000),
            runner=autotvm.RPCRunner(
                flag,  # change the device key to your key
                '0.0.0.0',
                9190,
                number=20,
                repeat=3,
                timeout=1000,
                min_repeat_ms=150))
    return measure_option
示例#12
0
def run(name, N, H, W, CO, CI, KH, KW, stride, pad, dilation):
    N, H, W, CO, CI, KH, KW, strides, padding = N, H, W, CO, CI, KH, KW, (stride, stride), (pad, pad)
    task = autotvm.task.create(conv2d_nchw,
                               args=(N, H, W, CO, CI, KH, KW, strides, padding, dilation),
                               target='cuda')
    print(task.config_space)
    logfile = "conv2d_" + name + ".log"

    # Use local gpu, measure 10 times for every config to reduce variance
    # The timeout of compiling a program is 10 seconds, the timeout for running is 4 seconds
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(repeat=3, min_repeat_ms=100, timeout=10)
    )

    # Begin tuning, log records to file `conv2d.log`
    # During tuning we will also try many invalid configs, so you are expected to
    # see many error reports. As long as you can see non-zero GFLOPS, it is okay.
    tuner = autotvm.tuner.XGBTuner(task)
    tuner.tune(n_trial=1000,
               measure_option=measure_option,
               callbacks=[autotvm.callback.log_to_file(logfile)])

    #########################################################################
    # Finally we can inspect the best config from log file, check correctness,
    # and measure running time.

    # inspect the best config
    dispatch_context = autotvm.apply_history_best(logfile)
    best_config = dispatch_context.query(task.target, task.workload)
    print("\nBest config:")
    print(best_config)

    # apply history best from log file
    with autotvm.apply_history_best(logfile):
        with tvm.target.create("cuda"):
            s, arg_bufs = conv2d_nchw(N, H, W, CO, CI, KH, KW, strides, padding, dilation)
            func = tvm.build(s, arg_bufs)

    # check correctness
    a_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32)
    w_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32)
    # c_np = conv2d_nchw_python(a_np, w_np, strides, padding)

    ctx = tvm.gpu()
    a_tvm = tvm.nd.array(a_np, ctx=ctx)
    w_tvm = tvm.nd.array(w_np, ctx=ctx)
    c_tvm = tvm.nd.empty((N, CO, (H + 2 * pad - KH) // stride + 1, (W + 2 * pad - KW) // stride + 1), ctx=ctx)
    # func(a_tvm, w_tvm, c_tvm)

    # tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)

    # Evaluate running time. Here we choose a large repeat number (400) to reduce the noise
    # and the overhead of kernel launch. You can also use nvprof to validate the result.
    evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
    cost = evaluator(a_tvm, w_tvm, c_tvm).mean * 1e3
    print('Time cost of this operator: %f' % cost)
    with open("autotvm_conv_nchw.txt", "a") as f:
        f.write("name, {}\n".format(cost))
示例#13
0
def test_tuning(target, ctx):
    # init task
    task, target = get_sample_task(target, None)
    logging.info("%s", task.config_space)

    measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                            autotvm.LocalRunner())

    tuner = RandomTuner(task)
    tuner.tune(n_trial=20, measure_option=measure_option)
示例#14
0
def tune_cuda_tile(name,
                   tree,
                   kernel_args,
                   parser,
                   n_trial=40,
                   tuner=None,
                   measure_option=None,
                   callbacks=None,
                   preserve_log=False):
    tmp_file_name = slugify(name) + '.cuda_tile.log'
    task = CUDATileTask(name, tree.copy(), kernel_args, parser)
    from random import randint
    stmt, args = task.instantiate(
        task.config_space.get(randint(0,
                                      len(task.config_space) - 1)))
    kernel = tvm.build(stmt, name=name, target='cuda')

    if n_trial > 0:
        if tuner is None:
            tuner = autotvm.tuner.XGBTuner(task, feature_type='knob')
        else:
            tuner = tuner(task)

        tuner.tune(
            n_trial=n_trial,
            measure_option={
                'builder':
                autotvm.LocalBuilder(),
                'runner':
                autotvm.LocalRunner(timeout=20, **default_tune_eval_settings),
                **(measure_option or {}),
            },
            callbacks=[
                autotvm.callback.progress_bar(n_trial,
                                              prefix=f'CUDATile {name}'),
                autotvm.callback.log_to_file(tmp_file_name), *(callbacks or [])
            ])

    best, best_cost = load_best(tmp_file_name, task)

    import gc
    gc.collect()

    if not best:
        raise Exception('failed to build kernel')

    best = CUDATileConfigEntity.from_json_dict(best)

    print('CUDATile %s: best %s, best cost %.12f' %
          (name, repr(best), best_cost))

    if not preserve_log:
        os.remove(tmp_file_name)

    return best, best_cost
示例#15
0
def tune_kernels(tasks,
                 builder=autotvm.LocalBuilder(),
                 runner=autotvm.LocalRunner(number=10,
                                            repeat=1,
                                            min_repeat_ms=1000),
                 tuner='ga',
                 early_stopping=None,
                 log_filename=log_file):
    measure_option = autotvm.measure_option(builder, runner)

    for i, tsk in enumerate(tasks):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

        # converting conv2d tasks to conv2d_NCHWc tasks
        if tsk.workload:
            op_name = tsk.workload[0]
            if op_name == 'conv2d':
                func_create = 'topi_x86_conv2d_NCHWc'
            elif op_name == 'depthwise_conv2d_nchw':
                func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw'
            else:
                raise ValueError(
                    "Tuning {} is not supported on x86".format(op_name))

            task = autotvm.task.create(func_create,
                                       args=tsk.args,
                                       target=target,
                                       template_key='direct')
            task.workload = tsk.workload
        else:
            task = tsk

        # create tuner
        if tuner == 'xgb' or tuner == 'xgb-rank':
            tuner_obj = XGBTuner(task, loss_type='rank')
        elif tuner == 'ga':
            tuner_obj = GATuner(task, pop_size=1000)
        elif tuner == 'random':
            tuner_obj = RandomTuner(task)
        elif tuner == 'gridsearch':
            tuner_obj = GridSearchTuner(task)
        else:
            raise ValueError("Invalid tuner: " + tuner)

        # do tuning
        n_trial = len(task.config_space)
        print("n_trial", n_trial)
        tuner_obj.tune(n_trial=n_trial,
                       early_stopping=early_stopping,
                       measure_option=measure_option,
                       callbacks=[
                           autotvm.callback.progress_bar(n_trial,
                                                         prefix=prefix),
                           autotvm.callback.log_to_file(log_filename)
                       ])
示例#16
0
def test_gemm(mm, nn, ll):
    # correctness
    m, n, l = mm, nn, ll
    dtype = 'float32'

    logging.getLogger('autotvm').setLevel(logging.DEBUG)
    logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout))
    log_file = 'gemm.log'

    task = autotvm.task.create('gemm-autotune/gemm_v2',
                               args = (m, n, l), target='cuda')
    print(task.config_space)

    measure_option = autotvm.measure_option(
        builder = autotvm.LocalBuilder(),
        runner = autotvm.LocalRunner(repeat=3, min_repeat_ms=100, timeout=4)
    )
    tuner = autotvm.tuner.XGBTuner(task, feature_type='knob')
    tuner.tune(n_trial=1000,
               measure_option = measure_option,
               callbacks = [autotvm.callback.log_to_file(log_file)])

    dispatch_context = autotvm.apply_history_best(log_file)
    best_config = dispatch_context.query(task.target, task.workload)
    print('\nBest config:')
    print(best_config)

    with autotvm.apply_history_best(log_file):
        with tvm.target.create('cuda'):
            s, arg_bufs = gemm_autotune(m, n, l)
            f = tvm.build(s, arg_bufs)
    # launch the kernel.
    # a_np = np.random.uniform(size=(n, l)).astype(A.dtype)
    # b_np = np.random.uniform(size=(m, l)).astype(B.dtype)
    ctx = tvm.gpu(0)
    a_np = np.random.uniform(size=(l, n)).astype(dtype)
    b_np = np.random.uniform(size=(l, m)).astype(dtype)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(b_np, ctx)
    c = tvm.nd.array(np.zeros((m, n), dtype=dtype), ctx)
    for i in range(2):
        f(a, b, c)
    print('function called')
    tvm.testing.assert_allclose(
        c.asnumpy(), np.dot(b_np.T, a_np), rtol=1e-5)

    num_flops = 2 * nn * mm * ll
    num_runs = 10
    timer_f = f.time_evaluator(f.entry_name, ctx, number=num_runs)
    t = timer_f(a, b, c).mean
    GFLOPS = num_flops / (t * 1e3) / 1e9
    print("average time cost of %d runs = %g ms, %g TFLOPS." % (num_runs, t * 1e3, GFLOPS))
def test_sparse_dense_bsr_autotune(M, N, K, BS_R, BS_C, density):
    """Benchmark sparse-dense matrix multiplication with auto tuning enabled"""
    print("testing param", M, N, K, BS_R, BS_C, density)
    X_np = np.random.randn(M, K).astype("float32")
    W_sp_np = random_bsr_matrix(N, K, BS_R, BS_C, density=density, dtype="float32")
    W_np = W_sp_np.todense()
    Y_np = X_np.dot(W_np.T)

    # logging config (for printing tuning log to screen)
    logging.getLogger('autotvm').setLevel(logging.DEBUG)
    logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout))

    W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape = W_sp_np.data.shape, W_sp_np.indices.shape, W_sp_np.indptr.shape, X_np.shape
    
    task = autotvm.task.create("benchmark/block_sparse",
                            args=(W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape),
                            target='cuda')
    
    # Use local gpu, measure multiple times for every config to reduce variance
    # The timeout for running is 4 seconds
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(repeat=args.repeat, min_repeat_ms=100, timeout=4)
    )

    # Begin tuning, log records to file `conv2d.log`
    # During tuning we will also try many invalid configs, so you are expected to
    # see many error reports. As long as you can see non-zero GFLOPS, it is okay.
    tuner = autotvm.tuner.XGBTuner(task)
    if args.tune:
        tuner.tune(n_trial=args.n_trial,
                measure_option=measure_option,
                callbacks=[autotvm.callback.log_to_file(args.autotvm_log)])

    # apply history best from log file
    with autotvm.apply_history_best(args.autotvm_log):
        with tvm.target.create("cuda"):
            s, arg_bufs = block_sparse_template(W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape)
            func = tvm.build(s, arg_bufs)

    timer = func.time_evaluator(func.entry_name, context, number=20)
    Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype), ctx=context)

    mean_time = timer(tvm.nd.array(X_np, ctx=context),
                      tvm.nd.array(W_sp_np.data, ctx=context),
                      tvm.nd.array(W_sp_np.indices, ctx=context),
                      tvm.nd.array(W_sp_np.indptr, ctx=context),
                      Y_tvm).mean
    
    print('%g ms' % (mean_time * 1e3))
    print("------------------------")
    tvm.testing.assert_allclose(Y_tvm.asnumpy(), Y_np, atol=1e-4, rtol=1e-4)
示例#18
0
def _test_op_with_dqnga(save_path,
                        save_name,
                        workload_name,
                        n_trial,
                        early_stopping,
                        learn_start,
                        update_frequency,
                        train_frequency,
                        discount,
                        epsilon_decay,
                        agent_batch_size,
                        hidden_sizes,
                        learning_rate,
                        reward_function=RewardFunction.R3):
    """
    Test a specified single workload using RLTuner.
    """
    print(
        f"Running experiment with settings: n trial: {n_trial}, "
        f"early stopping: {early_stopping}, learn start: {learn_start}, "
        f"update frequency: {update_frequency}, discount: {discount}, "
        f"ep decay: {epsilon_decay}, hidden sizes: {hidden_sizes},"
        f"agent batch size: {agent_batch_size}, learning rate: {learning_rate}"
    )

    mod, params = _get_relay_workload(workload_name)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        target_host=tvm.target.Target("llvm"),
        params=params)
    runner = autotvm.LocalRunner(number=1, repeat=4)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner)
    prefix = f"[Task 1/1]"
    tuner_obj = GADQNTuner(tasks[0],
                           learn_start=learn_start,
                           target_update_frequency=update_frequency,
                           train_frequency=train_frequency,
                           discount=discount,
                           epsilon_decay=epsilon_decay,
                           agent_batch_size=agent_batch_size,
                           hidden_sizes=hidden_sizes,
                           learning_rate=learning_rate,
                           reward_function=reward_function)
    tuner_obj.tune(
        n_trial=n_trial,
        early_stopping=early_stopping,
        measure_option=measure_option,
        callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)])
    tuner_obj.save_model(save_path, save_name)
示例#19
0
def run_one_wkl(wkl, new_log_path, inputs):
    task = wkl.to_task()

    # Re-tune the best configs.
    log_writter = log_to_file(new_log_path)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(timeout=10),
        runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=1000))
    measure_batch = create_measure_batch(task, measure_option)
    results = measure_batch(inputs)
    log_writter(None, inputs, results)

    del measure_batch
    return
    def check(target, target_host):
        ctx = tvm.context(target, 0)
        if not ctx.exist:
            logging.info("Skip test because %s is not available" % target)
            return

        # init task
        task, target = get_sample_task(target, target_host)
        logging.info("%s", task.config_space)

        measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                                autotvm.LocalRunner())

        tuner = RandomTuner(task)
        tuner.tune(n_trial=20, measure_option=measure_option)
示例#21
0
def execute_autotune_task(template_name, dtype, N, L, M, target, n_trial,
                          number, log_path):
    task = autotvm.task.create(template_name,
                               args=(N, L, M, dtype),
                               target=target)
    print(task.config_space)
    measure_option = autotvm.measure_option(
        builder="local", runner=autotvm.LocalRunner(number=number))
    # Begin tuning with RandomTuner, log records to file `matmul.log`
    # You can use alternatives like XGBTuner.
    tuner = autotvm.tuner.RandomTuner(task)
    tuner.tune(
        n_trial=n_trial,
        measure_option=measure_option,
        callbacks=[autotvm.callback.log_to_file(log_path)],
    )
示例#22
0
def main():
    parser = argparse.ArgumentParser(description='Tune ops')
    parser.add_argument('output', type=str)
    parser.add_argument('ops', type=str, nargs='+')
    parser.add_argument('--batchsize', type=int)
    parser.add_argument('--base', type=str)
    parser.add_argument('--target', type=str, default='cuda')
    args = parser.parse_args()

    tasks = read_tasks(args.ops, args.batchsize)

    print('Read %d tasks from %d files' % (len(tasks), len(args.ops)))

    if args.base:
        base_config = TVMConfig(args.base)
        discard_keys = []
        for task_key, (filename, task) in tasks.items():
            query = autotvm_key_from_task(task)
            if base_config.contains(query):
                print('%s is already tuned' % filename)
                discard_keys.append(task_key)
        for task_key in discard_keys:
            tasks.pop(task_key)
        print('Removed %d tasks. Will tune for %d tasks.' %
              (len(discard_keys), len(tasks)))

    tuning_opt = {
        'log_filename':
        args.output,
        'tuner':
        'xgb',
        'n_trial':
        2000,
        'early_stopping':
        600,
        'measure_option':
        autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=10),
            runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4),
        ),
    }

    tvm_tasks = []
    for task_key, (filename, task) in tasks.items():
        print('Tuning for %s' % filename)
        tvm_tasks.append(autotvm_task(task, args.target))
    tune_tasks(tvm_tasks, **tuning_opt)
示例#23
0
def tuning_model(model_path):
    dtype='float32'
    ox, shape_dict = get_model(model_path)
    input_name = list(shape_dict.keys())[0]
    device_key = None
    if args.target == 'gpu':
        device_key = 'V100'
    use_android = False

    log_file = get_logfile()

    other_option = {
        'model_path': model_path,
        'dtype': dtype,
        'input_name': input_name,
        'device_key': device_key,
        'use_android': use_android
    }

    if args.target == 'x86' or args.target == 'cpu':
        measure_option = autotvm.measure_option(
                builder=autotvm.LocalBuilder(),
                runner=autotvm.LocalRunner(
                    number=10, repeat=1,
                    min_repeat_ms=1000
                )
        )
    elif args.target == 'gpu':
        measure_option = autotvm.measure_option(
                builder=autotvm.LocalBuilder(timeout=10),
                runner=autotvm.RPCRunner(
                    device_key,
                    '0.0.0.0', 9190,
                    number=20, repeat=3, timeout=4, min_repeat_ms=150)
        )
    n_trial = 200

    tuning_option = {
        'log_filename': log_file,
        'tuner': 'xgb',
        'n_trial': n_trial,
        'early_stopping': 80,
        'measure_option': measure_option
    }

    graph, lib, params = tuning(tuning_option, **other_option)
    return graph, lib, params
示例#24
0
def test_min_repeat_ms():
    task, target = get_sample_task()

    measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(),
                                            runner=autotvm.LocalRunner(
                                                number=1, min_repeat_ms=100))

    def _callback(tuner, measure_inputs, measure_results):
        for inp, res in zip(measure_inputs, measure_results):
            if res.error_no != 0:
                continue

            assert 1000 * np.mean(res.costs) * \
                   measure_option['runner'].cur_number >= 100

    tuner = autotvm.tuner.RandomTuner(task)
    tuner.tune(n_trial=5, measure_option=measure_option, callbacks=[_callback])
示例#25
0
def test_graph_tuner_layout_transform_runner():
    log_file = "%s/test_tuner.log" % (os.getcwd())
    target = "llvm"
    dshape = (1, 3, 8, 8)
    dtype = "float32"
    layout = "NCHW"
    conv2d = relay.op.get("nn.conv2d")
    target_ops = [conv2d]

    g, records, ltf_records, ltf_keys, _ = _create_data(
        target, dshape, dtype, layout)
    executor = DPTuner(g, {"data": dshape},
                       records,
                       target_ops,
                       target=target,
                       log_file=log_file)
    runner = autotvm.LocalRunner(number=100, repeat=1, timeout=10)
    executor.benchmark_layout_transform(layout_records=ltf_records,
                                        infer_layout=True,
                                        runner=runner)
    out = executor._layout_transform_perf_records

    num_flops = 0
    total_time = 0
    for record in ltf_records:
        ltf_wkl = record[0].task.workload
        input_shape = ltf_wkl[1][1]
        flops = np.prod(input_shape)
        num_flops += flops
        total_time += record[1].costs[0]
    avg_time = total_time / num_flops

    for ltf_workload in out:
        input_shape = ltf_workload[1][1]
        flops = 1
        for i in input_shape:
            flops *= i
        expected_time = flops * avg_time
        out_time = out[ltf_workload][1].costs[0]
        assert (
            expected_time == out_time
        ), "Inferred layout transformation time mismatch for %s: " "expecting %f but got %f" % (
            str(ltf_workload),
            expected_time,
            out_time,
        )
示例#26
0
    def runner(target, dev):
        task, target = get_sample_task(target, None)
        logging.info("task config space: %s", task.config_space)

        # Note: we use the MockedLocalBuilder here instead of autotvm.LocalBuilder()
        measure_option = autotvm.measure_option(MockedLocalBuilder(),
                                                autotvm.LocalRunner())

        results = []

        tuner = RandomTuner(task)
        tuner.tune(
            n_trial=1,
            measure_option=measure_option,
            callbacks=(lambda _tuner, _inputs, rs: results.extend(rs),
                       ),
        )

        assert len(results) == 1
示例#27
0
def get_tuning_opt(log_file="tuning.log", n_trial=200):
    """Returns tuning options"""
    tuning_opt = {
        "log_filename":
        log_file,
        "tuner":
        "random",
        "n_trial":
        n_trial,
        "early_stopping":
        60,
        "measure_option":
        autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=10),
            runner=autotvm.LocalRunner(number=20,
                                       repeat=3,
                                       timeout=4,
                                       min_repeat_ms=150),
        ),
    }
    return tuning_opt
示例#28
0
    def _autotune_kernel(self, autokernel):
        params = self.test_parameters
        task = autotvm.task.create(autokernel,
                                   args=self._kernel_args(autokernel),
                                   target='llvm')

        TestSuite._write_to_infofile(
            autokernel, TestSuite._config_space_info(task.config_space), 'w')

        print(task.config_space)
        measure_option = autotvm.measure_option(
            builder='local',
            runner=autotvm.LocalRunner(number=params.variance_resistance_runs))

        tuner = autotvm.tuner.XGBTuner(task)
        tuner.tune(n_trial=params.trial_runs,
                   measure_option=measure_option,
                   callbacks=[
                       autotvm.callback.log_to_file(
                           TestSuite._logfile_path(autokernel))
                   ])
示例#29
0
def tune_and_evaluate():
    parser = argparse.ArgumentParser(description='Run conv2d benchmarks in TVM')
    parser.add_argument( '-b','--benchmark', help="Int. Number of Tensor Contraction benchmark (1-4)", default=1, type=int)
    parser.add_argument( '-f','--feature', help="Type of feature to use, one of 'datavol', 'itervar', 'datavol_itervar', 'itervar_silent_dv'", default='itervar', type=str)
    parser.add_argument( '-n','--num_iters', help="Int. number of times to run training", default=1, type=int)
    parser.add_argument( '-t','--trials', help="Int. Number of trials to sample", default=2000, type=int)
    parser.add_argument( '-l','--likwid_event', help='Likwid event to capture during training', default=None)
    parser.add_argument( '-r','--random', help="Use XGB+SA to select samples, or randomly select", default=False, action='store_true')
    parser.add_argument( '-k','--key_id', help="Key ID for RPC server.", default=None, type=str)
    parser.add_argument('--sa_num_iters', help="Number of iterations of simulated annealing", default=500, type=int)
    parser.add_argument('--no_save_features', help="Should save features", default=False, action='store_true')

    args = parser.parse_args()
    trials = args.trials
    ind = args.benchmark
    global matmul_index
    for size in [1000,4000]:
        #for ind in range(1,37):
        #for ind in [1,16,18,22,24,28,30,33,34,35,36,2]:
        matmul_index = ind
        print("Tuning TC %i..." % matmul_index)

        M,N,K = [size,size,size]

        tuning_option = {
            'tuner': 'xgb',
            'early_stopping': None,

            'measure_option': autotvm.measure_option(
                builder=autotvm.LocalBuilder(timeout=10, n_parallel=80),
                runner=autotvm.LocalRunner(repeat=10,number=4,),
            ),
        }


        print("M,N,K" , M,N,K)
        tune_kernels(args, M,N,K, trials, **tuning_option)
示例#30
0
tuning_option = {
    'log_filename':
    log_file,
    'tuner':
    'xgb',
    'n_trial':
    500,
    'early_stopping':
    100,
    'measure_option':
    autotvm.measure_option(
        builder=autotvm.LocalBuilder(
            build_func='ndk' if use_android else 'default'),
        runner=autotvm.LocalRunner(number=10,
                                   repeat=1,
                                   timeout=4,
                                   min_repeat_ms=150),
        # runner=autotvm.RPCRunner(
        #     device_key, host='localhost', port=9190,
        #     number=10,
        #     timeout=5,
        # ),
    ),
}

####################################################################
#
# .. note:: How to set tuning options
#
#   In general, the default values provided here work well.
#   If you have enough time budget, you can set :code:`n_trial`, :code:`early_stopping` larger,