예제 #1
0
def test_meta_schedule_error_handle_test_builder():
    """Test the error handing during building"""
    class TestBuilder(PyBuilder):
        def build(  # pylint: disable=no-self-use
            self,
            build_inputs: List[BuilderInput],
        ) -> List[BuilderResult]:
            return [BuilderResult(None, "error") for w in build_inputs]

    builder = TestBuilder()
    builder_inputs = [
        BuilderInput(MatmulModule, Target("llvm")),
        BuilderInput(MatmulReluModule, Target("llvm")),
        BuilderInput(BatchMatmulModule, Target("llvm")),
    ]
    builder_results = builder.build(builder_inputs)
    assert len(builder_results) == len(builder_inputs)
    for result in builder_results:
        artifact_path = result.artifact_path
        error_msg = result.error_msg
        assert artifact_path is None
        assert error_msg == "error"
예제 #2
0
def test_meta_schedule_local_runner_time_out():
    """Test meta schedule Local Runner time out"""
    mod = MatmulModule
    builder = LocalBuilder()
    (builder_result,) = builder.build([BuilderInput(mod, Target("llvm"))])
    assert builder_result.artifact_path is not None
    assert builder_result.error_msg is None

    runner_input = RunnerInput(
        builder_result.artifact_path,
        "llvm",
        [
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
        ],
    )

    def initializer():
        @register_func("meta_schedule.runner.test_time_out")
        def timeout_session_creator(  # pylint: disable=unused-variable
            device: Device,  # pylint: disable=unused-argument
            args_info: T_ARG_INFO_JSON_OBJ_LIST,  # pylint: disable=unused-argument
            alloc_repeat: int,  # pylint: disable=unused-argument
        ) -> RPCSession:
            time.sleep(2)

    evaluator_config = EvaluatorConfig(
        number=1,
        repeat=1,
        min_repeat_ms=0,
        enable_cpu_cache_flush=False,
    )

    runner = LocalRunner(
        timeout_sec=1,
        evaluator_config=evaluator_config,
        initializer=initializer,
        f_alloc_argument="meta_schedule.runner.test_time_out",
    )

    # Run the module
    (runner_future,) = runner.run([runner_input])
    runner_result = runner_future.result()

    assert runner_result.error_msg is not None and runner_result.error_msg.startswith(
        "LocalRunner: Timeout, killed after"
    )
    assert runner_result.run_secs is None
    _clean_build(builder_result.artifact_path)
예제 #3
0
def test_meta_schedule_error_handle_export_func():
    """Test the error handing during building"""
    def initializer():
        @register_func("meta_schedule.builder.test_export")
        def test_build(mod: Module) -> str:  # pylint: disable=unused-variable
            raise ValueError("Builder intended Test Error (export func).")

    builder = LocalBuilder(f_export="meta_schedule.builder.test_export",
                           initializer=initializer)
    builder_inputs = [BuilderInput(MatmulModule(), Target("llvm"))]
    builder_results = builder.build(builder_inputs)
    assert len(builder_results) == len(builder_inputs)
    for result in builder_results:
        artifact_path = result.artifact_path
        error_msg = result.error_msg
        assert artifact_path is None
        assert error_msg.startswith("LocalBuilder: An exception occurred")
def verify_meta_schedule_with_tensorrt(
    mod,
    params,
    data_shape,
    use_trt: bool = True,
):
    # Build
    builder = LocalBuilder(
        f_build=build_relay_with_tensorrt if use_trt else build_relay,
        timeout_sec=1000,
    )
    builder_input = BuilderInput(mod, Target("cuda"), params)
    builder_result = builder.build([builder_input])[0]
    assert builder_result.error_msg is None, builder_result.error_msg
    assert builder_result.artifact_path is not None

    # Run
    runner_input = RunnerInput(
        builder_result.artifact_path,
        device_type="cuda",
        args_info=[TensorInfo("float32", data_shape)],
    )
    runner = LocalRunner(
        evaluator_config=EvaluatorConfig(
            number=5,
            repeat=2,
            min_repeat_ms=0,
            enable_cpu_cache_flush=False,
        ),
        f_run_evaluator=run_with_graph_executor,
    )

    # Run the module
    runner_future = runner.run([runner_input])[0]
    runner_result = runner_future.result()
    assert runner_result is not None
    assert runner_result.error_msg is None, runner_result.error_msg
    assert runner_result.run_secs is not None

    for result in runner_result.run_secs:
        if isinstance(result, FloatImm):
            result = result.value
        assert isinstance(result, float)
        assert result >= 0.0
def test_meta_schedule_rpc_single_run():
    """Test meta schedule rpc runner for a single run"""
    # Build the module
    mod = MatmulModule
    builder = LocalBuilder()
    (builder_result, ) = builder.build([BuilderInput(mod, Target("llvm"))])
    assert builder_result.artifact_path is not None
    assert builder_result.error_msg is None

    runner_input = RunnerInput(
        builder_result.artifact_path,
        "llvm",
        [
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
        ],
    )

    with LocalRPC() as rpc:
        rpc_config = RPCConfig(
            tracker_host=rpc.tracker_host,
            tracker_port=rpc.tracker_port,
            tracker_key=rpc.tracker_key,
            session_priority=1,
            session_timeout_sec=100,
        )
        evaluator_config = EvaluatorConfig(
            number=1,
            repeat=1,
            min_repeat_ms=0,
            enable_cpu_cache_flush=False,
        )
        runner = RPCRunner(rpc_config, evaluator_config)
        # Run the module
        (runner_future, ) = runner.run([runner_input])
        runner_result = runner_future.result()
    assert runner_result.error_msg is None
    for result in runner_result.run_secs:
        if isinstance(result, FloatImm):
            result = result.value
        assert isinstance(result, float)
        assert result >= 0.0
    _clean_build(builder_result.artifact_path)
예제 #6
0
def test_meta_schedule_error_handle_time_out():
    """Test the error handing time out during building"""
    def initializer():
        @register_func("meta_schedule.builder.test_time_out")
        def timeout_build(mod, target):  # pylint: disable=unused-argument, unused-variable
            time.sleep(2)

    builder = LocalBuilder(
        timeout_sec=1,
        f_build="meta_schedule.builder.test_time_out",
        initializer=initializer,
    )
    builder_inputs = [BuilderInput(MatmulModule(), Target("llvm"))]
    builder_results = builder.build(builder_inputs)
    assert len(builder_results) == len(builder_inputs)
    for result in builder_results:
        artifact_path = result.artifact_path
        error_msg = result.error_msg
        assert artifact_path is None
        assert error_msg.startswith("LocalBuilder: Timeout")
def test_meta_schedule_local_runner_add_test():
    """Test meta schedule local runner with add module"""
    def _check_correct_add(args_before: List[np.array],
                           args_after: List[np.array]) -> None:
        a_before, b_before, c_before = args_before
        a_after, b_after, c_after = args_after
        c_before = a_before + b_before
        assert (a_before == a_after).all()
        assert (b_before == b_after).all()
        assert (c_before == c_after).all()

    def test_alloc_argument(
        device: Device,
        args_info: T_ARG_INFO_JSON_OBJ_LIST,  # pylint: disable=unused-argument
        alloc_repeat: int,
    ) -> List[T_ARGUMENT_LIST]:
        global repeated_args_before  # pylint: disable=global-variable-undefined, invalid-name
        repeated_args_before = []
        repeated_args = local_default_alloc_argument(device, args_info,
                                                     alloc_repeat)
        for args in repeated_args:
            repeated_args_before.append([arg.asnumpy() for arg in args])
        return repeated_args

    def test_run_evaluator(
        rt_mod: Module,
        device: Device,
        evaluator_config: EvaluatorConfig,
        repeated_args: List[Any],
    ) -> List[float]:
        global repeated_args_before  # pylint: disable=global-variable-undefined, invalid-name
        repeated_args_after = []
        evaluator = rt_mod.time_evaluator(
            func_name=rt_mod.entry_name,
            dev=device,
            number=evaluator_config.number,
            repeat=evaluator_config.repeat,
            min_repeat_ms=evaluator_config.min_repeat_ms,
            f_preproc="cache_flush_cpu_non_first_arg"
            if evaluator_config.enable_cpu_cache_flush else "",
        )
        repeated_costs: List[List[float]] = []
        for args in repeated_args:
            device.sync()
            profile_result = evaluator(*args)
            repeated_costs.append(profile_result.results)
            repeated_args_after.append([arg.asnumpy() for arg in args])
        costs = [
            float(cost)
            for cost in itertools.chain.from_iterable(repeated_costs)
        ]
        for args_before, args_after in zip(repeated_args_before,
                                           repeated_args_after):
            _check_correct_add(args_before, args_after)
        del repeated_args_before
        return costs

    # Build the module
    mod = AddModule
    builder = LocalBuilder()
    (builder_result, ) = builder.build([BuilderInput(mod, Target("llvm"))])
    assert builder_result.artifact_path is not None
    assert builder_result.error_msg is None

    runner_input = RunnerInput(
        builder_result.artifact_path,
        "llvm",
        [
            TensorInfo("float32", [MATMUL_M]),
            TensorInfo("float32", [MATMUL_M]),
            TensorInfo("float32", [MATMUL_M]),
        ],
    )

    evaluator_config = EvaluatorConfig(
        number=1,
        repeat=1,
        min_repeat_ms=0,
        enable_cpu_cache_flush=False,
    )
    runner = LocalRunner(
        timeout_sec=100,
        evaluator_config=evaluator_config,
        f_alloc_argument=test_alloc_argument,
        f_run_evaluator=test_run_evaluator,
    )
    # Run the module
    (runner_future, ) = runner.run([runner_input])
    runner_result = runner_future.result()
    assert runner_result.error_msg is None
    for result in runner_result.run_secs:
        if isinstance(result, FloatImm):
            result = result.value
        assert isinstance(result, float)
        assert result >= 0.0
    _clean_build(builder_result.artifact_path)
def test_meta_schedule_runner_matmul_test():
    """Test meta schedule runner with add module"""
    def _check_correct_matmul(
        args_before: List[np.ndarray],
        args_after: List[np.ndarray],
    ) -> None:
        a_before, b_before, c_before = args_before
        a_after, b_after, c_after = args_after
        c_before = np.matmul(a_before, b_before)
        assert (a_before == a_after).all()
        assert (b_before == b_after).all()
        tvm.testing.assert_allclose(c_before, c_after, rtol=1e-5)

    def test_alloc_argument(
        session: RPCSession,
        device: Device,
        args_info: Any,
        alloc_repeat: int,
    ) -> List[Any]:
        global repeated_args_before  # pylint: disable=global-variable-undefined, invalid-name
        repeated_args_before = []  # type: ignore
        repeated_args = rpc_default_alloc_argument(session, device, args_info,
                                                   alloc_repeat)
        for args in repeated_args:
            repeated_args_before.append([arg.numpy()
                                         for arg in args])  # type: ignore
        return repeated_args

    def test_run_evaluator(
        session: RPCSession,  # pylint: disable=unused-argument
        rt_mod: Module,
        device: Device,
        evaluator_config: EvaluatorConfig,
        repeated_args: List[Any],
    ) -> List[float]:
        global repeated_args_before  # pylint: disable=global-variable-undefined, invalid-name
        repeated_args_after = []
        evaluator = rt_mod.time_evaluator(
            func_name=rt_mod.entry_name,
            dev=device,
            number=evaluator_config.number,
            repeat=evaluator_config.repeat,
            min_repeat_ms=evaluator_config.min_repeat_ms,
            f_preproc="cache_flush_cpu_non_first_arg"
            if evaluator_config.enable_cpu_cache_flush else "",
        )
        repeated_costs: List[List[float]] = []
        for args in repeated_args:
            device.sync()
            profile_result = evaluator(*args)
            repeated_costs.append(profile_result.results)
            repeated_args_after.append([arg.numpy() for arg in args])
        costs = [
            float(cost)
            for cost in itertools.chain.from_iterable(repeated_costs)
        ]
        for args_before, args_after in zip(
                repeated_args_before,  # type: ignore
                repeated_args_after,
        ):
            _check_correct_matmul(args_before, args_after)
        del repeated_args_before  # type: ignore
        return costs

    # Build the module
    mod = MatmulModule
    builder = LocalBuilder()
    (builder_result, ) = builder.build([BuilderInput(mod, Target("llvm"))])
    assert builder_result.artifact_path is not None
    assert builder_result.error_msg is None

    runner_input = RunnerInput(
        builder_result.artifact_path,
        "llvm",
        [
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
        ],
    )

    with LocalRPC() as rpc:
        rpc_config = RPCConfig(
            tracker_host=rpc.tracker_host,
            tracker_port=rpc.tracker_port,
            tracker_key=rpc.tracker_key,
            session_priority=1,
            session_timeout_sec=100,
        )
        evaluator_config = EvaluatorConfig(
            number=1,
            repeat=1,
            min_repeat_ms=0,
            enable_cpu_cache_flush=False,
        )
        runner = RPCRunner(
            rpc_config,
            evaluator_config,
            f_alloc_argument=test_alloc_argument,
            f_run_evaluator=test_run_evaluator,
        )
        # Run the module
        (runner_future, ) = runner.run([runner_input])
        runner_result = runner_future.result()
    assert runner_result.error_msg is None
    for result in runner_result.run_secs:
        if isinstance(result, FloatImm):
            result = result.value
        assert isinstance(result, float)
        assert result >= 0.0
    _clean_build(builder_result.artifact_path)
def test_meta_schedule_local_multiple_runs():
    """Test meta schedule local runner for multiple runs"""
    # Build the module
    mods = [
        MatmulModule,
        MatmulReluModule,
        BatchMatmulModule,
    ]
    builder = LocalBuilder()
    builder_inputs = [BuilderInput(mod, Target("llvm")) for mod in mods]
    builder_results = builder.build(builder_inputs)
    for builder_result in builder_results:
        assert builder_result.artifact_path is not None
        assert builder_result.error_msg is None

    args_infos = [
        [
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
        ],
        [
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
            TensorInfo("float32", (MATMUL_N, MATMUL_N)),
        ],
        [
            TensorInfo("float32", [16, MATMUL_M, MATMUL_M]),
            TensorInfo("float32", [16, MATMUL_M, MATMUL_M]),
            TensorInfo("float32", [16, MATMUL_M, MATMUL_M]),
        ],
    ]

    runner_inputs = [
        RunnerInput(builder_results[i].artifact_path, "llvm", args_infos[i])
        for i in range(len(mods))
    ]

    evaluator_config = EvaluatorConfig(
        number=1,
        repeat=1,
        min_repeat_ms=0,
        enable_cpu_cache_flush=False,
    )

    runner = LocalRunner(timeout_sec=100, evaluator_config=evaluator_config)

    # Run the module
    runner_futures = runner.run(runner_inputs)
    runner_results = [
        runner_future.result() for runner_future in runner_futures
    ]

    for runner_result in runner_results:
        assert runner_result.error_msg is None
        for result in runner_result.run_secs:
            if isinstance(result, FloatImm):
                result = result.value
            assert isinstance(result, float)
            assert result >= 0.0

    for builder_result in builder_results:
        _clean_build(builder_result.artifact_path)
예제 #10
0
def verify_meta_schedule_with_tensorrt(
    mod, params, data_shape, use_meta_sched: bool = True, use_trt: bool = True, mode: str = "vm"
):
    if use_meta_sched:
        # With meta_schedule
        dev = "cuda"

        # Build
        if use_trt:
            from tvm.meta_schedule.testing import relay_build_with_tensorrt

            builder = LocalBuilder(f_build=relay_build_with_tensorrt)
        else:

            def relay_build_without_tensorrt(
                mod: Module,
                target: Target,
                params: dict,
            ) -> List[BuilderResult]:
                return tvm.relay.build_module._build_module_no_factory(mod, "cuda", "llvm", params)

            builder = LocalBuilder(f_build=relay_build_without_tensorrt)

        builder_input = BuilderInput(mod, Target(dev, host="llvm"), params)

        (builder_result,) = builder.build([builder_input])
        assert builder_result.error_msg is None
        assert builder_result.artifact_path is not None

        # Run
        evaluator_config = EvaluatorConfig(
            number=5,
            repeat=2,
            min_repeat_ms=0,
            enable_cpu_cache_flush=False,
        )

        runner_input = RunnerInput(
            builder_result.artifact_path, "cuda", [TensorInfo("float32", data_shape)]
        )

        def eval_func(rt_mod, device, evaluator_config, repeated_args):
            rt_mod = tvm.contrib.graph_executor.GraphModule(rt_mod["default"](device))

            eval = rt_mod.module.time_evaluator(
                func_name="run",
                dev=device,
                number=evaluator_config.number,
                repeat=evaluator_config.repeat,
                min_repeat_ms=evaluator_config.min_repeat_ms,
                f_preproc="cache_flush_cpu_non_first_arg"
                if evaluator_config.enable_cpu_cache_flush
                else "",
            )
            repeated_costs: List[List[float]] = []
            for args in repeated_args:
                profile_result = eval(*args)
                repeated_costs.append(profile_result.results)

            costs = [float(cost) for cost in itertools.chain.from_iterable(repeated_costs)]
            return costs

        runner = LocalRunner(
            evaluator_config=evaluator_config,
            f_run_evaluator=eval_func,
        )

        # Run the module
        (runner_future,) = runner.run([runner_input])
        runner_result = runner_future.result()
        assert runner_result is not None
        assert runner_result.run_secs is not None
        assert runner_result.error_msg is None

        for result in runner_result.run_secs:
            if isinstance(result, FloatImm):
                result = result.value
            assert isinstance(result, float)
            assert result >= 0.0

    else:
        # Without meta_schedule
        if use_trt:
            mod, config = tensorrt.partition_for_tensorrt(mod)
            with tvm.transform.PassContext(
                opt_level=3, config={"relay.ext.tensorrt.options": config}
            ):
                func = relay.create_executor(
                    mode, mod=mod, device=tvm.cuda(0), target="cuda"
                ).evaluate()
        else:
            with tvm.transform.PassContext(opt_level=3):
                func = relay.create_executor(
                    mode, mod=mod, device=tvm.cuda(0), target="cuda", params=params
                ).evaluate()