Exemplo n.º 1
0
def autotvm_tuning_opt(target, log_file, dtype="float32"):
    if "cpu" in target.keys:
        print("enable cpu tuning options")
        measure_option = autotvm.measure_option(
            builder=autotvm.LocalBuilder(),
            runner=autotvm.LocalRunner(number=1,
                                       repeat=10,
                                       min_repeat_ms=0,
                                       enable_cpu_cache_flush=True),
        )
    else:
        print("enable gpu tuning options")
        measure_option = autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=10),
            runner=autotvm.LocalRunner(number=20,
                                       repeat=3,
                                       timeout=4,
                                       min_repeat_ms=150),
        )

    tuning_option = {
        "log_filename": log_file,
        "tuner": "xgb",
        "early_stopping": None,
        "measure_option": measure_option
    }
    return tuning_option
Exemplo n.º 2
0
def create_measure(device, flag="t4"):
    if device == 'arm' or device == 'aarch64':
        measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(
            build_func='ndk' if use_android else 'default'),
                                                runner=autotvm.RPCRunner(
                                                    "pi",
                                                    host='0.0.0.0',
                                                    port=9190,
                                                    number=5,
                                                    timeout=10,
                                                ))
    elif ('x86' in device):
        measure_option = autotvm.measure_option(
            builder=autotvm.LocalBuilder(),
            runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=1000),
        )
    elif device == 'gpu':
        measure_option = autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=1000),
            runner=autotvm.RPCRunner(
                flag,  # change the device key to your key
                '0.0.0.0',
                9190,
                number=20,
                repeat=3,
                timeout=1000,
                min_repeat_ms=150))
    return measure_option
Exemplo n.º 3
0
 def _tune_topi_cuda(self, name, args, te_tensors, tune_kwargs):
     n_trial = tune_kwargs.get('n_trial', 40)
     preserve_log = tune_kwargs.get('preserve_log', False)
     tmp_file_name = slugify(name) + '.topi_cuda.log'
     if n_trial > 0:
         task = autotvm.task.create(self.topi_cuda_task_name,
                                    args=args,
                                    target='cuda')
         tuner = tune_kwargs.get('tuner', autotvm.tuner.XGBTuner(task))
         tuner.tune(
             n_trial=n_trial,
             measure_option={
                 'builder':
                 tune_kwargs.get('builder', autotvm.LocalBuilder()),
                 'runner':
                 tune_kwargs.get(
                     'runner',
                     autotvm.LocalRunner(timeout=20,
                                         **default_tune_eval_settings)),
             },
             callbacks=[
                 autotvm.callback.progress_bar(n_trial,
                                               prefix=f'TOPI {name}'),
                 autotvm.callback.log_to_file(tmp_file_name),
                 *tune_kwargs.get('callbacks', [])
             ])
     with autotvm.apply_history_best(tmp_file_name):
         result = self._build_topi_cuda(name, args, te_tensors)
     if not preserve_log:
         os.remove(tmp_file_name)
     return result
Exemplo n.º 4
0
def test_autotvm(hexagon_session):
    """Top level test function for testing autotvm"""
    logfilename = "./hexagon.autotvm.log"

    options = {
        "log_filename": logfilename,
        "early_stopping": None,
        "measure_option": autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=15),
            runner=autotvm.RPCRunner(
                module_loader=HexagonModuleLoader(hexagon_session),
                key=hexagon_session._remote_kw["key"],
                host=hexagon_session._remote_kw["host"],
                port=hexagon_session._remote_kw["port"],
                number=3,
                timeout=15,
                min_repeat_ms=150,
                # cooldown_interval=150
            ),
        ),
    }
    target_hexagon = tvm.target.hexagon("v68")
    task = autotvm.task.create(
        "demo_template", args=[], target=target_hexagon, target_host=target_hexagon
    )
    tune_tasks([task], **options)
Exemplo n.º 5
0
def test_tuning_cpu():
    ir_mod = tvm.parser.fromtext(
        textwrap.dedent("""
        #[version = "0.0.5"]
        def @main(%a : Tensor[(1, 3, 32, 32), float32], %b : Tensor[(3, 3, 5, 5), float32]) {
               nn.conv2d(%a, %b, data_layout="NCHW", kernel_layout="OIHW")
        }
        """))
    tasks = autotvm.task.relay_integration.extract_from_program(
        ir_mod, {}, tvm.target.create("llvm"))
    assert len(tasks) == 1, f"Extracted != 1 task from program: {tasks!r}"

    task = tasks[0]

    measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                            autotvm.LocalRunner())

    results = []

    tuner = RandomTuner(task)
    tuner.tune(
        n_trial=20,
        measure_option=measure_option,
        callbacks=(lambda _tuner, _inputs, rs: results.extend(rs),
                   ),
    )

    assert len(results) == 20

    successful_results = [
        r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR
    ]
    assert len(
        successful_results) > 0, f"No successful tuning runs: {results!r}"
Exemplo n.º 6
0
def test_task_tuner_without_measurement():
    """test task and tuner without measurement"""
    task, target = get_sample_task()

    class DummyRunner(Runner):
        def __init__(self):
            super(DummyRunner, self).__init__(1, 1)

        def run(self, measure_inputs, build_results):
            return [
                MeasureResult((np.random.random(), ), 0, 0.2, time.time())
                for _ in range(len(measure_inputs))
            ]

        def get_build_kwargs(self):
            return {}

    measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(),
                                            runner=DummyRunner())

    logging.info("%s", task.config_space)

    for tuner_class in [
            autotvm.tuner.RandomTuner, autotvm.tuner.GridSearchTuner,
            autotvm.tuner.GATuner, autotvm.tuner.XGBTuner
    ]:
        tuner = tuner_class(task)
        tuner.tune(n_trial=10, measure_option=measure_option)
        assert tuner.best_flops > 1
def test_check_correctness():
    task, target = get_sample_task()

    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(check_correctness=True))

    def _callback_correct(tuner, measure_inputs, measure_results):
        for _, res in zip(measure_inputs, measure_results):
            assert res.error_no == 0

    tuner = autotvm.tuner.RandomTuner(task)
    tuner.tune(n_trial=2,
               measure_option=measure_option,
               callbacks=[_callback_correct])

    # a bad template
    n = 128
    target = tvm.target.Target("llvm -device=bad_device")
    task = autotvm.task.create("testing/bad_matmul",
                               args=(n, n, n, "float32"),
                               target=target)

    def _callback_wrong(tuner, measure_inputs, measure_results):
        for _, res in zip(measure_inputs, measure_results):
            assert res.error_no == MeasureErrorNo.WRONG_ANSWER

    tuner = autotvm.tuner.RandomTuner(task)
    tuner.tune(n_trial=2,
               measure_option=measure_option,
               callbacks=[_callback_wrong])
Exemplo n.º 8
0
def test_convolution(n_trial=2000,
                     early_stopping=400,
                     learn_start=50,
                     memory_capacity=1000,
                     update_frequency=50,
                     discount=0.99,
                     epsilon=(1.0, 0.01, 0.99)):
    """
    Test simple convolution with RLTuner.
    """
    mod, params = _get_relay_convolution()
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        target_host=tvm.target.Target("llvm"),
        params=params)
    runner = autotvm.LocalRunner(number=1, repeat=4)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner)
    prefix = f"[Task 1/1]"
    tuner_obj = GADQNTuner(tasks[0],
                           learn_start=learn_start,
                           memory_capacity=memory_capacity,
                           update_frequency=update_frequency,
                           discount=discount,
                           epsilon=epsilon,
                           debug=True)
    tuner_obj.tune(
        n_trial=n_trial,
        early_stopping=early_stopping,
        measure_option=measure_option,
        callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)])
Exemplo n.º 9
0
    def runner(target):
        # init task
        task, target = get_sample_task(target, None)
        logging.info("task config space: %s", task.config_space)

        measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                                autotvm.LocalRunner())

        results = []

        tuner = RandomTuner(task)
        tuner.tune(
            n_trial=20,
            measure_option=measure_option,
            callbacks=(lambda _tuner, _inputs, rs: results.extend(rs),
                       ),
        )

        assert len(results) == 20

        successful_results = [
            r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR
            # We filter records before building if we know they won't work ahead of time.
            # We can't guarantee we get one good record so we count these as success too
            or r.error_no == autotvm.MeasureErrorNo.INSTANTIATION_ERROR
        ]
        assert len(
            successful_results) > 0, f"No successful tuning runs: {results!r}"
Exemplo n.º 10
0
def main():

    target = tvm.target.arm_cpu()
    
    batch_size = 1
    dtype = 'uint8'
    
    quant_model_url = "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz"
    model_name = "mobilenet_v1_1.0_224_quant"
    log_file = "%s.log" % model_name
    
    
    input_tensor = "input"

    tuning_option = {
    'log_filename': log_file,
    'tuner': 'random',
    'early_stopping': 800,

    'measure_option': autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(number=10, repeat=1,
                                   min_repeat_ms=1000),
    ),}

    mod, params, data_shape = tune(tuning_option, target, quant_model_url, model_name, batch_size, 
        input_tensor, need_tune=False)
    
    evaluate(log_file, mod, params, target, input_tensor, data_shape, input_dtype=dtype)
Exemplo n.º 11
0
def _test_op_with_ga(save_path, save_name, workload_name, n_trial,
                     early_stopping):
    """
    Test a specified single workload with GA tuner.
    """
    print(f"Running experiment with settings: n trial: {n_trial}, "
          f"early stopping: {early_stopping}")

    mod, params = _get_relay_workload(workload_name)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        target_host=tvm.target.Target("llvm"),
        params=params)
    runner = autotvm.LocalRunner(number=1, repeat=4)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner)
    prefix = f"[Task 1/1]"
    tuner_obj = GATuner(tasks[0], debug=True)
    tuner_obj.tune(
        n_trial=n_trial,
        early_stopping=early_stopping,
        measure_option=measure_option,
        callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)])
    tuner_obj.save_model(save_path, save_name)
Exemplo n.º 12
0
def test_tuning_gpu(target, ctx):
    # init task
    task, target = get_sample_task(target, None)
    logging.info("task config space: %s", task.config_space)

    measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                            autotvm.LocalRunner())

    results = []

    tuner = RandomTuner(task)
    tuner.tune(
        n_trial=20,
        measure_option=measure_option,
        callbacks=(lambda _tuner, _inputs, rs: results.extend(rs),
                   ),
    )

    assert len(results) == 20

    successful_results = [
        r for r in results if r.error_no == autotvm.MeasureErrorNo.NO_ERROR
    ]
    assert len(
        successful_results) > 0, f"No successful tuning runs: {results!r}"
Exemplo n.º 13
0
def tuning_model(model_path):
    dtype='float32'
    ox, shape_dict = get_model(model_path)
    input_name = list(shape_dict.keys())[0]
    device_key = None
    if args.target == 'gpu':
        device_key = 'V100'
    use_android = False

    log_file = get_logfile()

    other_option = {
        'model_path': model_path,
        'dtype': dtype,
        'input_name': input_name,
        'device_key': device_key,
        'use_android': use_android
    }

    if args.target == 'x86' or args.target == 'cpu':
        measure_option = autotvm.measure_option(
                builder=autotvm.LocalBuilder(),
                runner=autotvm.LocalRunner(
                    number=10, repeat=1,
                    min_repeat_ms=1000
                )
        )
    elif args.target == 'gpu':
        measure_option = autotvm.measure_option(
                builder=autotvm.LocalBuilder(timeout=10),
                runner=autotvm.RPCRunner(
                    device_key,
                    '0.0.0.0', 9190,
                    number=20, repeat=3, timeout=4, min_repeat_ms=150)
        )
    n_trial = 200

    tuning_option = {
        'log_filename': log_file,
        'tuner': 'xgb',
        'n_trial': n_trial,
        'early_stopping': 80,
        'measure_option': measure_option
    }

    graph, lib, params = tuning(tuning_option, **other_option)
    return graph, lib, params
Exemplo n.º 14
0
def run(name, N, H, W, CO, CI, KH, KW, stride, pad, dilation):
    N, H, W, CO, CI, KH, KW, strides, padding = N, H, W, CO, CI, KH, KW, (stride, stride), (pad, pad)
    task = autotvm.task.create(conv2d_nchw,
                               args=(N, H, W, CO, CI, KH, KW, strides, padding, dilation),
                               target='cuda')
    print(task.config_space)
    logfile = "conv2d_" + name + ".log"

    # Use local gpu, measure 10 times for every config to reduce variance
    # The timeout of compiling a program is 10 seconds, the timeout for running is 4 seconds
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(repeat=3, min_repeat_ms=100, timeout=10)
    )

    # Begin tuning, log records to file `conv2d.log`
    # During tuning we will also try many invalid configs, so you are expected to
    # see many error reports. As long as you can see non-zero GFLOPS, it is okay.
    tuner = autotvm.tuner.XGBTuner(task)
    tuner.tune(n_trial=1000,
               measure_option=measure_option,
               callbacks=[autotvm.callback.log_to_file(logfile)])

    #########################################################################
    # Finally we can inspect the best config from log file, check correctness,
    # and measure running time.

    # inspect the best config
    dispatch_context = autotvm.apply_history_best(logfile)
    best_config = dispatch_context.query(task.target, task.workload)
    print("\nBest config:")
    print(best_config)

    # apply history best from log file
    with autotvm.apply_history_best(logfile):
        with tvm.target.create("cuda"):
            s, arg_bufs = conv2d_nchw(N, H, W, CO, CI, KH, KW, strides, padding, dilation)
            func = tvm.build(s, arg_bufs)

    # check correctness
    a_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32)
    w_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32)
    # c_np = conv2d_nchw_python(a_np, w_np, strides, padding)

    ctx = tvm.gpu()
    a_tvm = tvm.nd.array(a_np, ctx=ctx)
    w_tvm = tvm.nd.array(w_np, ctx=ctx)
    c_tvm = tvm.nd.empty((N, CO, (H + 2 * pad - KH) // stride + 1, (W + 2 * pad - KW) // stride + 1), ctx=ctx)
    # func(a_tvm, w_tvm, c_tvm)

    # tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)

    # Evaluate running time. Here we choose a large repeat number (400) to reduce the noise
    # and the overhead of kernel launch. You can also use nvprof to validate the result.
    evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
    cost = evaluator(a_tvm, w_tvm, c_tvm).mean * 1e3
    print('Time cost of this operator: %f' % cost)
    with open("autotvm_conv_nchw.txt", "a") as f:
        f.write("name, {}\n".format(cost))
Exemplo n.º 15
0
def test_tuning(target, ctx):
    # init task
    task, target = get_sample_task(target, None)
    logging.info("%s", task.config_space)

    measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                            autotvm.LocalRunner())

    tuner = RandomTuner(task)
    tuner.tune(n_trial=20, measure_option=measure_option)
Exemplo n.º 16
0
def tune_cuda_tile(name,
                   tree,
                   kernel_args,
                   parser,
                   n_trial=40,
                   tuner=None,
                   measure_option=None,
                   callbacks=None,
                   preserve_log=False):
    tmp_file_name = slugify(name) + '.cuda_tile.log'
    task = CUDATileTask(name, tree.copy(), kernel_args, parser)
    from random import randint
    stmt, args = task.instantiate(
        task.config_space.get(randint(0,
                                      len(task.config_space) - 1)))
    kernel = tvm.build(stmt, name=name, target='cuda')

    if n_trial > 0:
        if tuner is None:
            tuner = autotvm.tuner.XGBTuner(task, feature_type='knob')
        else:
            tuner = tuner(task)

        tuner.tune(
            n_trial=n_trial,
            measure_option={
                'builder':
                autotvm.LocalBuilder(),
                'runner':
                autotvm.LocalRunner(timeout=20, **default_tune_eval_settings),
                **(measure_option or {}),
            },
            callbacks=[
                autotvm.callback.progress_bar(n_trial,
                                              prefix=f'CUDATile {name}'),
                autotvm.callback.log_to_file(tmp_file_name), *(callbacks or [])
            ])

    best, best_cost = load_best(tmp_file_name, task)

    import gc
    gc.collect()

    if not best:
        raise Exception('failed to build kernel')

    best = CUDATileConfigEntity.from_json_dict(best)

    print('CUDATile %s: best %s, best cost %.12f' %
          (name, repr(best), best_cost))

    if not preserve_log:
        os.remove(tmp_file_name)

    return best, best_cost
Exemplo n.º 17
0
def tune_kernels(tasks,
                 builder=autotvm.LocalBuilder(),
                 runner=autotvm.LocalRunner(number=10,
                                            repeat=1,
                                            min_repeat_ms=1000),
                 tuner='ga',
                 early_stopping=None,
                 log_filename=log_file):
    measure_option = autotvm.measure_option(builder, runner)

    for i, tsk in enumerate(tasks):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

        # converting conv2d tasks to conv2d_NCHWc tasks
        if tsk.workload:
            op_name = tsk.workload[0]
            if op_name == 'conv2d':
                func_create = 'topi_x86_conv2d_NCHWc'
            elif op_name == 'depthwise_conv2d_nchw':
                func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw'
            else:
                raise ValueError(
                    "Tuning {} is not supported on x86".format(op_name))

            task = autotvm.task.create(func_create,
                                       args=tsk.args,
                                       target=target,
                                       template_key='direct')
            task.workload = tsk.workload
        else:
            task = tsk

        # create tuner
        if tuner == 'xgb' or tuner == 'xgb-rank':
            tuner_obj = XGBTuner(task, loss_type='rank')
        elif tuner == 'ga':
            tuner_obj = GATuner(task, pop_size=1000)
        elif tuner == 'random':
            tuner_obj = RandomTuner(task)
        elif tuner == 'gridsearch':
            tuner_obj = GridSearchTuner(task)
        else:
            raise ValueError("Invalid tuner: " + tuner)

        # do tuning
        n_trial = len(task.config_space)
        print("n_trial", n_trial)
        tuner_obj.tune(n_trial=n_trial,
                       early_stopping=early_stopping,
                       measure_option=measure_option,
                       callbacks=[
                           autotvm.callback.progress_bar(n_trial,
                                                         prefix=prefix),
                           autotvm.callback.log_to_file(log_filename)
                       ])
Exemplo n.º 18
0
def test_gemm(mm, nn, ll):
    # correctness
    m, n, l = mm, nn, ll
    dtype = 'float32'

    logging.getLogger('autotvm').setLevel(logging.DEBUG)
    logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout))
    log_file = 'gemm.log'

    task = autotvm.task.create('gemm-autotune/gemm_v2',
                               args = (m, n, l), target='cuda')
    print(task.config_space)

    measure_option = autotvm.measure_option(
        builder = autotvm.LocalBuilder(),
        runner = autotvm.LocalRunner(repeat=3, min_repeat_ms=100, timeout=4)
    )
    tuner = autotvm.tuner.XGBTuner(task, feature_type='knob')
    tuner.tune(n_trial=1000,
               measure_option = measure_option,
               callbacks = [autotvm.callback.log_to_file(log_file)])

    dispatch_context = autotvm.apply_history_best(log_file)
    best_config = dispatch_context.query(task.target, task.workload)
    print('\nBest config:')
    print(best_config)

    with autotvm.apply_history_best(log_file):
        with tvm.target.create('cuda'):
            s, arg_bufs = gemm_autotune(m, n, l)
            f = tvm.build(s, arg_bufs)
    # launch the kernel.
    # a_np = np.random.uniform(size=(n, l)).astype(A.dtype)
    # b_np = np.random.uniform(size=(m, l)).astype(B.dtype)
    ctx = tvm.gpu(0)
    a_np = np.random.uniform(size=(l, n)).astype(dtype)
    b_np = np.random.uniform(size=(l, m)).astype(dtype)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(b_np, ctx)
    c = tvm.nd.array(np.zeros((m, n), dtype=dtype), ctx)
    for i in range(2):
        f(a, b, c)
    print('function called')
    tvm.testing.assert_allclose(
        c.asnumpy(), np.dot(b_np.T, a_np), rtol=1e-5)

    num_flops = 2 * nn * mm * ll
    num_runs = 10
    timer_f = f.time_evaluator(f.entry_name, ctx, number=num_runs)
    t = timer_f(a, b, c).mean
    GFLOPS = num_flops / (t * 1e3) / 1e9
    print("average time cost of %d runs = %g ms, %g TFLOPS." % (num_runs, t * 1e3, GFLOPS))
def test_sparse_dense_bsr_autotune(M, N, K, BS_R, BS_C, density):
    """Benchmark sparse-dense matrix multiplication with auto tuning enabled"""
    print("testing param", M, N, K, BS_R, BS_C, density)
    X_np = np.random.randn(M, K).astype("float32")
    W_sp_np = random_bsr_matrix(N, K, BS_R, BS_C, density=density, dtype="float32")
    W_np = W_sp_np.todense()
    Y_np = X_np.dot(W_np.T)

    # logging config (for printing tuning log to screen)
    logging.getLogger('autotvm').setLevel(logging.DEBUG)
    logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout))

    W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape = W_sp_np.data.shape, W_sp_np.indices.shape, W_sp_np.indptr.shape, X_np.shape
    
    task = autotvm.task.create("benchmark/block_sparse",
                            args=(W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape),
                            target='cuda')
    
    # Use local gpu, measure multiple times for every config to reduce variance
    # The timeout for running is 4 seconds
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(repeat=args.repeat, min_repeat_ms=100, timeout=4)
    )

    # Begin tuning, log records to file `conv2d.log`
    # During tuning we will also try many invalid configs, so you are expected to
    # see many error reports. As long as you can see non-zero GFLOPS, it is okay.
    tuner = autotvm.tuner.XGBTuner(task)
    if args.tune:
        tuner.tune(n_trial=args.n_trial,
                measure_option=measure_option,
                callbacks=[autotvm.callback.log_to_file(args.autotvm_log)])

    # apply history best from log file
    with autotvm.apply_history_best(args.autotvm_log):
        with tvm.target.create("cuda"):
            s, arg_bufs = block_sparse_template(W_sp_np_data_shape, W_sp_np_indices_shape, W_sp_np_indptr_shape, X_np_shape)
            func = tvm.build(s, arg_bufs)

    timer = func.time_evaluator(func.entry_name, context, number=20)
    Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype), ctx=context)

    mean_time = timer(tvm.nd.array(X_np, ctx=context),
                      tvm.nd.array(W_sp_np.data, ctx=context),
                      tvm.nd.array(W_sp_np.indices, ctx=context),
                      tvm.nd.array(W_sp_np.indptr, ctx=context),
                      Y_tvm).mean
    
    print('%g ms' % (mean_time * 1e3))
    print("------------------------")
    tvm.testing.assert_allclose(Y_tvm.asnumpy(), Y_np, atol=1e-4, rtol=1e-4)
Exemplo n.º 20
0
def _test_op_with_dqnga(save_path,
                        save_name,
                        workload_name,
                        n_trial,
                        early_stopping,
                        learn_start,
                        update_frequency,
                        train_frequency,
                        discount,
                        epsilon_decay,
                        agent_batch_size,
                        hidden_sizes,
                        learning_rate,
                        reward_function=RewardFunction.R3):
    """
    Test a specified single workload using RLTuner.
    """
    print(
        f"Running experiment with settings: n trial: {n_trial}, "
        f"early stopping: {early_stopping}, learn start: {learn_start}, "
        f"update frequency: {update_frequency}, discount: {discount}, "
        f"ep decay: {epsilon_decay}, hidden sizes: {hidden_sizes},"
        f"agent batch size: {agent_batch_size}, learning rate: {learning_rate}"
    )

    mod, params = _get_relay_workload(workload_name)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        target_host=tvm.target.Target("llvm"),
        params=params)
    runner = autotvm.LocalRunner(number=1, repeat=4)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner)
    prefix = f"[Task 1/1]"
    tuner_obj = GADQNTuner(tasks[0],
                           learn_start=learn_start,
                           target_update_frequency=update_frequency,
                           train_frequency=train_frequency,
                           discount=discount,
                           epsilon_decay=epsilon_decay,
                           agent_batch_size=agent_batch_size,
                           hidden_sizes=hidden_sizes,
                           learning_rate=learning_rate,
                           reward_function=reward_function)
    tuner_obj.tune(
        n_trial=n_trial,
        early_stopping=early_stopping,
        measure_option=measure_option,
        callbacks=[autotvm.callback.progress_bar(n_trial, prefix=prefix)])
    tuner_obj.save_model(save_path, save_name)
Exemplo n.º 21
0
def run_one_wkl(wkl, new_log_path, inputs):
    task = wkl.to_task()

    # Re-tune the best configs.
    log_writter = log_to_file(new_log_path)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(timeout=10),
        runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=1000))
    measure_batch = create_measure_batch(task, measure_option)
    results = measure_batch(inputs)
    log_writter(None, inputs, results)

    del measure_batch
    return
Exemplo n.º 22
0
    def check(target, target_host):
        ctx = tvm.context(target, 0)
        if not ctx.exist:
            logging.info("Skip test because %s is not available" % target)
            return

        # init task
        task, target = get_sample_task(target, target_host)
        logging.info("%s", task.config_space)

        measure_option = autotvm.measure_option(autotvm.LocalBuilder(),
                                                autotvm.LocalRunner())

        tuner = RandomTuner(task)
        tuner.tune(n_trial=20, measure_option=measure_option)
Exemplo n.º 23
0
def test_task_tuner_without_measurement():
    """test task and tuner without measurement"""
    task, _ = get_sample_task()

    measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(),
                                            runner=DummyRunner())

    logging.info("%s", task.config_space)

    for tuner_class in [
            autotvm.tuner.RandomTuner, autotvm.tuner.GridSearchTuner,
            autotvm.tuner.GATuner, autotvm.tuner.XGBTuner
    ]:
        tuner = tuner_class(task)
        tuner.tune(n_trial=10, measure_option=measure_option)
        assert tuner.best_flops > 1
Exemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser(description='Tune ops')
    parser.add_argument('output', type=str)
    parser.add_argument('ops', type=str, nargs='+')
    parser.add_argument('--batchsize', type=int)
    parser.add_argument('--base', type=str)
    parser.add_argument('--target', type=str, default='cuda')
    args = parser.parse_args()

    tasks = read_tasks(args.ops, args.batchsize)

    print('Read %d tasks from %d files' % (len(tasks), len(args.ops)))

    if args.base:
        base_config = TVMConfig(args.base)
        discard_keys = []
        for task_key, (filename, task) in tasks.items():
            query = autotvm_key_from_task(task)
            if base_config.contains(query):
                print('%s is already tuned' % filename)
                discard_keys.append(task_key)
        for task_key in discard_keys:
            tasks.pop(task_key)
        print('Removed %d tasks. Will tune for %d tasks.' %
              (len(discard_keys), len(tasks)))

    tuning_opt = {
        'log_filename':
        args.output,
        'tuner':
        'xgb',
        'n_trial':
        2000,
        'early_stopping':
        600,
        'measure_option':
        autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=10),
            runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4),
        ),
    }

    tvm_tasks = []
    for task_key, (filename, task) in tasks.items():
        print('Tuning for %s' % filename)
        tvm_tasks.append(autotvm_task(task, args.target))
    tune_tasks(tvm_tasks, **tuning_opt)
Exemplo n.º 25
0
def tune_and_evaluate():
    df = pd.read_csv(args.layer_info)
    df = df[df['filename'] == args.layer]

    filenames = df.filename

    for net_fname in filenames:
        print('Tuning: ', net_fname)

        #### TUNING OPTION ####
        log_file = "models/%s/logs/%s.log" % (args.model, args.log_file)

        tuning_opt = {
            'log_filename':
            log_file,
            'n_trial':
            args.n_trials,
            'measure_option':
            autotvm.measure_option(builder=autotvm.LocalBuilder(timeout=10),
                                   runner=autotvm.RPCRunner(
                                       args.device_key,
                                       '0.0.0.0',
                                       9190,
                                       number=20,
                                       repeat=3,
                                       timeout=4,
                                       min_repeat_ms=150)),
        }

        in_c = int(df.loc[df.filename == net_fname, 'in_channels'])
        in_x = int(df.loc[df.filename == net_fname, 'input_spatial_x'])
        out_c = int(df.loc[df.filename == net_fname, 'out_channels'])

        input_shape = (1, in_c, in_x, in_x)
        print(input_shape)
        # extract workloads from relay program
        print("\tExtract tasks...")
        net, params = get_network(net_fname, input_shape)
        tasks = autotvm.task.extract_from_program(net['main'],
                                                  target=target,
                                                  target_host=target_host,
                                                  params=params,
                                                  ops=(relay.op.nn.conv2d, ))

        # run tuning tasks
        print("\tTuning...")
        tune_tasks(tasks, **tuning_opt)
Exemplo n.º 26
0
def test_min_repeat_ms():
    task, target = get_sample_task()

    measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(),
                                            runner=autotvm.LocalRunner(
                                                number=1, min_repeat_ms=100))

    def _callback(tuner, measure_inputs, measure_results):
        for inp, res in zip(measure_inputs, measure_results):
            if res.error_no != 0:
                continue

            assert 1000 * np.mean(res.costs) * \
                   measure_option['runner'].cur_number >= 100

    tuner = autotvm.tuner.RandomTuner(task)
    tuner.tune(n_trial=5, measure_option=measure_option, callbacks=[_callback])
Exemplo n.º 27
0
def test_random_tuner():
    """Test RandomTuner"""

    task, _ = get_sample_task()
    measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(), runner=DummyRunner())

    tuner = autotvm.tuner.RandomTuner(task, range_idx=(8, 15))
    assert tuner.range_length == 8
    assert tuner.index_offset == 8

    # Tuner should only focus on the specified range and should visit all indices
    tuner.tune(n_trial=8, measure_option=measure_option)
    assert tuner.counter == 8
    assert not tuner.has_next()
    visited = set()
    for idx in tuner.visited:
        assert idx not in visited
        assert 8 <= idx <= 15
Exemplo n.º 28
0
def test_gridsearch_tuner():
    """Test GridSearchTuner"""

    task, _ = get_sample_task()
    measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(), runner=DummyRunner())

    # When no range index, range_length should be the length of config space
    tuner = autotvm.tuner.GridSearchTuner(task)
    assert tuner.range_length == len(task.config_space)
    assert tuner.index_offset == 0

    # With range index, range_length should be the length of the specified range
    tuner = autotvm.tuner.GridSearchTuner(task, range_idx=(8, 15))
    assert tuner.range_length == 8
    assert tuner.index_offset == 8

    # Tuner should only focus on the specified range
    tuner.tune(n_trial=8, measure_option=measure_option)
    assert tuner.counter == 8
    assert not tuner.has_next()
Exemplo n.º 29
0
def get_tuning_opt(log_file="tuning.log", n_trial=200):
    """Returns tuning options"""
    tuning_opt = {
        "log_filename":
        log_file,
        "tuner":
        "random",
        "n_trial":
        n_trial,
        "early_stopping":
        60,
        "measure_option":
        autotvm.measure_option(
            builder=autotvm.LocalBuilder(timeout=10),
            runner=autotvm.LocalRunner(number=20,
                                       repeat=3,
                                       timeout=4,
                                       min_repeat_ms=150),
        ),
    }
    return tuning_opt
Exemplo n.º 30
0
    def __init__(self, task, target, device_key):
        self.task = task
        self.target = target
        self.device_key = device_key
        self.best_config = None
        self.best_latency = None

        self.early_stopping = None
        self.record = None
        self.tuner = 'xgb'
        self.n_trial = 30

        self.measure_option = autotvm.measure_option(
                builder=autotvm.LocalBuilder(build_func="default"),
                runner=autotvm.RPCRunner(
                    device_key,
                    host="115.145.179.79",
                    port=9090,
                    number=5,
                    timeout=10,
                ),
            )