예제 #1
0
def run_tuning():
    print("Begin tuning...")
    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tune_option = auto_scheduler.TuningOptions(
        num_measure_trials=
        200,  # change this to 20000 to achieve the best performance
        runner=auto_scheduler.LocalRunner(repeat=10,
                                          enable_cpu_cache_flush=True),
        measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
    )

    if use_sparse:
        from tvm.topi.sparse.utils import sparse_sketch_rules

        search_policy = [
            auto_scheduler.SketchPolicy(
                task,
                program_cost_model=auto_scheduler.XGBModel(),
                init_search_callbacks=sparse_sketch_rules(),
            ) for task in tasks
        ]

        tuner.tune(tune_option, search_policy=search_policy)
    else:
        tuner.tune(tune_option)
def test_measure_special_inputs_map_by_name_local_runner():
    @auto_scheduler.register_workload
    def foo():
        X = te.placeholder(shape=[10], dtype="int32")
        Index = te.placeholder(shape=[1], dtype="int32", name="Index")
        Y = te.compute((1, ), lambda i: X[Index[i]])
        return [X, Index, Y]

    # This workload cannot use random input for the `Index` input
    task = auto_scheduler.SearchTask(
        func=foo,
        target="llvm",
        task_inputs={
            "Index": tvm.nd.array(np.array([5], dtype="int32")),
        },
    )

    minp = auto_scheduler.MeasureInput(task, task.compute_dag.init_state)
    local_builder = auto_scheduler.LocalBuilder()
    local_runner = auto_scheduler.LocalRunner(timeout=10)

    bress = local_builder.build([minp])
    assert bress[0].error_no == 0
    mress = local_runner.run([minp], bress)
    assert mress[0].error_no == 0
예제 #3
0
def auto_scheduler_tune(network, target, input_name, log_file):
    if os.path.exists(log_file):
        os.remove(log_file)
    mod, net_params, input_shape, output_shape = get_network(network)
    if network not in ["bert"]:
        # convert to NHWC layout
        desired_layouts = {'nn.conv2d': ['NHWC', 'default']}
        seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(),
                                        relay.transform.ConvertLayout(desired_layouts)])
        with tvm.transform.PassContext(opt_level=3):
            mod = seq(mod)

    if "cpu" in target.keys:
        tuning_opt = auto_scheduler.TuningOptions(
            num_measure_trials=20000,  # change this to 20000 to achieve the best performance
            runner=auto_scheduler.LocalRunner(repeat=10, enable_cpu_cache_flush=True),
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )
    else:
        measure_ctx = auto_scheduler.LocalRPCMeasureContext(repeat=1, min_repeat_ms=300, timeout=10)
        tuning_opt = auto_scheduler.TuningOptions(
            num_measure_trials=20000,  # change this to 20000 to achieve the best performance
            runner=measure_ctx.runner,
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )

    tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], net_params, target)
    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tuner.tune(tuning_opt)
예제 #4
0
def run_tuning():
    print("Begin tuning...")
    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tune_option = auto_scheduler.TuningOptions(
        num_measure_trials=
        200,  # change this to 20000 to achieve the best performance
        runner=auto_scheduler.LocalRunner(repeat=10,
                                          enable_cpu_cache_flush=True),
        measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
    )

    tuner.tune(tune_option)
def test_measure_local_builder_runner():
    if not tvm.runtime.enabled("llvm"):
        return

    dag, s0 = get_tiled_matmul()
    tgt = tvm.target.create("llvm")
    task = auto_scheduler.SearchTask(dag, "test", tgt)

    minp = auto_scheduler.MeasureInput(task, s0)
    local_builder = auto_scheduler.LocalBuilder()
    local_runner = auto_scheduler.LocalRunner(timeout=60)

    bress = local_builder.build([minp])
    assert bress[0].error_no == 0
    mress = local_runner.run([minp], bress)
    assert mress[0].error_no == 0
예제 #6
0
def test_measure_local_builder_runner():
    if not tvm.testing.device_enabled("llvm"):
        return

    task = auto_scheduler.create_task(matmul_auto_scheduler_test, [512, 512, 512], "llvm")

    for enable_cpu_cache_flush in [True, False]:
        minp = auto_scheduler.MeasureInput(task, task.compute_dag.init_state)
        local_builder = auto_scheduler.LocalBuilder()
        local_runner = auto_scheduler.LocalRunner(
            timeout=60, enable_cpu_cache_flush=enable_cpu_cache_flush
        )

        bress = local_builder.build([minp])
        assert bress[0].error_no == 0
        mress = local_runner.run([minp], bress)
        assert mress[0].error_no == 0
예제 #7
0
def test_measure_local_builder_runner(enable_cpu_cache_flush=False):
    if not tvm.testing.device_enabled("llvm"):
        return

    dag, s0 = get_tiled_matmul()
    tgt = tvm.target.Target("llvm")
    task = auto_scheduler.SearchTask(dag, "test", tgt)

    minp = auto_scheduler.MeasureInput(task, s0)
    local_builder = auto_scheduler.LocalBuilder()
    local_runner = auto_scheduler.LocalRunner(
        timeout=60, enable_cpu_cache_flush=enable_cpu_cache_flush)

    bress = local_builder.build([minp])
    assert bress[0].error_no == 0
    mress = local_runner.run([minp], bress)
    assert mress[0].error_no == 0
예제 #8
0
def auto_scheduler_tune(network, batch_size, dtype, target, log_file):
    os.makedirs(os.path.dirname(log_file), exist_ok=True)
    #if os.path.exists(log_file):
    #    os.remove(log_file)

    layout = "NHWC"
    mod, params, input_name, input_shape, output_shape = get_network(
        network, batch_size, dtype, layout)

    n_trials = network_to_n_trials[(network, batch_size, dtype,
                                    str(target.kind))]

    if "cpu" in target.keys:
        tuning_opt = auto_scheduler.TuningOptions(
            num_measure_trials=n_trials,
            runner=auto_scheduler.LocalRunner(repeat=10,
                                              enable_cpu_cache_flush=True),
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )
    else:
        min_repeat_ms = 450 if network in ["bert"] else 300
        measure_ctx = auto_scheduler.LocalRPCMeasureContext(
            repeat=1, min_repeat_ms=min_repeat_ms, timeout=10)
        tuning_opt = auto_scheduler.TuningOptions(
            num_measure_trials=n_trials,
            runner=measure_ctx.runner,
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )

    tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params,
                                                       target)
    print(log_file)
    update_file(log_file, tasks)
    return
    for idx, task in enumerate(tasks):
        print("========== Task %d  (workload key: %s) ==========" %
              (idx, task.workload_key))
        print(task.compute_dag)

    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tuner.tune(tuning_opt)
def test_dag_measure_local_builder_runner():
    if not tvm.testing.device_enabled("llvm"):
        return

    A = te.placeholder((512, 512), name="A")
    B = te.placeholder((512, 512), name="B")
    k = te.reduce_axis((0, 512), name="k")
    C = te.compute((512, 512),
                   lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]),
                   name="C")
    D = topi.nn.relu(C)
    E = topi.nn.relu(D)

    tensors = [A, B, E]
    dag = auto_scheduler.ComputeDAG(tensors)
    key = workload_registry.register_workload_tensors(dag.workload_key(),
                                                      tensors)
    transfer_data = workload_registry.serialize_workload_registry_entry(key)
    f_data = pickle.dumps(transfer_data)
    f_new = pickle.loads(f_data)
    del workload_registry.WORKLOAD_FUNC_REGISTRY[key]
    workload_registry.deserialize_workload_registry_entry(f_new)

    target = tvm.target.Target("llvm")
    task = auto_scheduler.SearchTask(compute_dag=dag,
                                     workload_key=key,
                                     target=target)

    for enable_cpu_cache_flush in [True, False]:
        minp = auto_scheduler.MeasureInput(task, task.compute_dag.init_state)
        local_builder = auto_scheduler.LocalBuilder()
        local_runner = auto_scheduler.LocalRunner(
            timeout=60, enable_cpu_cache_flush=enable_cpu_cache_flush)

        bress = local_builder.build([minp])
        assert bress[0].error_no == 0
        mress = local_runner.run([minp], bress)
        assert mress[0].error_no == 0