예제 #1
0
    def codegen(self, func):
        """Compile a single function into a graph.

        Parameters
        ----------
        func: tvm.relay.Expr
            The function to compile.

        Returns
        -------
        graph_json : str
            The graph json that can be consumed by runtime.
        mod : IRModule or Dict[str, IRModule]
            The lowered functions.
        params : Dict[str, tvm.nd.NDArray]
            Additional constant parameters.
        """
        self._codegen(func)
        graph_json = self._get_graph_json()
        lowered_func = self._get_irmodule()
        param_names = self._list_params_name()
        params = {}
        for key in param_names:
            arr = self._get_param_by_name(key)
            param = empty(arr.shape, dtype=arr.dtype, device=arr.device)
            arr.copyto(param)
            params[key] = param
        return graph_json, lowered_func, params
예제 #2
0
    def timed_func():
        tic = time.time()
        error_no = 0
        error_msg = None
        try:
            # upload built module
            remote = request_remote(key, host, port, priority, timeout)
            remote.upload(build_res.filename)
            func = remote.load_module(os.path.split(build_res.filename)[1])
            ctx = remote.context(str(inp.task.target), 0)
            # Limitation:
            # We can not get PackFunction directly in the remote mode as it is wrapped
            # under the std::function. We could lift the restriction later once we fold
            # the PackedFunc as an object. Currently, we pass function name to work
            # around it.
            f_prepare = 'cache_flush_cpu_non_first_arg' if enable_cpu_cache_flush else ''
            time_f = func.time_evaluator(
                func.entry_name, ctx, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms,
                f_preproc=f_prepare)
        # pylint: disable=broad-except
        except Exception:
            costs = (max_float,)
            error_no = MeasureErrorNo.COMPILE_DEVICE
            error_msg = make_error_msg()

        if error_no == 0:
            try:
                args = [ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx) for x in
                        build_res.args]
                try:
                    random_fill = remote.get_function("tvm.contrib.random.random_fill")
                except AttributeError:
                    raise AttributeError("Please make sure USE_RANDOM is ON in the config.cmake "
                                         "on the remote devices")
                for arg in args:
                    random_fill(arg)
                ctx.sync()

                costs = time_f(*args).results
                # clean up remote files
                remote.remove(build_res.filename)
                remote.remove(os.path.splitext(build_res.filename)[0] + '.so')
                remote.remove('')
            # pylint: disable=broad-except
            except Exception:
                costs = (max_float,)
                error_no = MeasureErrorNo.RUNTIME_DEVICE
                error_msg = make_error_msg()

        shutil.rmtree(os.path.dirname(build_res.filename))
        toc = time.time()

        time.sleep(cooldown_interval)
        if verbose >= 1:
            if error_no == MeasureErrorNo.NO_ERROR:
                print("*", end="")
            else:
                print("*E", end="")  # Run error

        return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
예제 #3
0
    def codegen(self, func):
        """Compile a single function into a graph.

        Parameters
        ----------
        func: tvm.relay.Expr
            The function to compile.

        Returns
        -------
        graph_json : str
            The graph json that can be consumed by runtime.
        lowered_funcs : List[tvm.LoweredFunc] or Dict[str, List[tvm.LoweredFunc]]
            The lowered functions.
        params : Dict[str, tvm.nd.NDArray]
            Additional constant parameters.
        """
        self._codegen(func)
        graph_json = self._get_graph_json()
        lowered_func = self._get_lowered_funcs()
        param_names = self._list_params_name()
        params = {}
        for name in param_names:
            key = name.value
            arr = self._get_param_by_name(key)
            param = empty(arr.shape, dtype=arr.dtype, ctx=arr.ctx)
            arr.copyto(param)
            params[key] = param
        return graph_json, lowered_func, params
예제 #4
0
    def timed_func(inp, build_res):
        tic = time.time()
        error_no = 0
        error_msg = None
        try:
            func = module.load_module(build_res.filename)
            ctx = ndarray.context(str(inp.task.target), 0)
            # Limitation:
            # We can not get PackFunction directly in the remote mode as it is wrapped
            # under the std::function. We could lift the restriction later once we fold
            # the PackedFunc as an object. Currently, we pass function name to work
            # around it.
            f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
            time_f = func.time_evaluator(
                func.entry_name,
                ctx,
                number=number,
                repeat=repeat,
                min_repeat_ms=min_repeat_ms,
                f_preproc=f_prepare,
            )
        # pylint: disable=broad-except
        except Exception:
            costs = (max_float, )
            error_no = MeasureErrorNo.COMPILE_DEVICE
            error_msg = make_error_msg()

        if error_no == 0:
            try:
                args = [
                    ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx)
                    for x in build_res.args
                ]
                random_fill = tvm.get_global_func(
                    "tvm.contrib.random.random_fill", True)
                assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
                for arg in args:
                    random_fill(arg)
                ctx.sync()
                costs = time_f(*args).results
            # pylint: disable=broad-except
            except Exception:
                costs = (max_float, )
                error_no = MeasureErrorNo.RUNTIME_DEVICE
                error_msg = make_error_msg()

        shutil.rmtree(os.path.dirname(build_res.filename))
        toc = time.time()
        time.sleep(cooldown_interval)

        if verbose >= 1:
            if error_no == MeasureErrorNo.NO_ERROR:
                print("*", end="")
            else:
                print("*E", end="")  # Run error
        return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
예제 #5
0
    def timed_func():
        tic = time.time()
        error_no = 0
        error_msg = None
        try:
            # upload built module
            remote = request_remote(key, host, port, priority, timeout)
            remote.upload(build_res.filename)
            func = remote.load_module(os.path.split(build_res.filename)[1])
            ctx = remote.context(str(inp.task.target), 0)
            # TODO(FrozenGene): Add cpu cache flush to this function.
            time_f = func.time_evaluator(func.entry_name,
                                         ctx,
                                         number=number,
                                         repeat=repeat,
                                         min_repeat_ms=min_repeat_ms)
        # pylint: disable=broad-except
        except Exception:
            costs = (max_float, )
            error_no = MeasureErrorNo.COMPILE_DEVICE
            error_msg = make_error_msg()

        if error_no == 0:
            try:
                # TODO(FrozenGene): Update to ndarray.non-empty.
                args = [
                    ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx)
                    for x in build_res.args
                ]
                ctx.sync()

                costs = time_f(*args).results
                # clean up remote files
                remote.remove(build_res.filename)
                remote.remove(os.path.splitext(build_res.filename)[0] + '.so')
                remote.remove('')
            # pylint: disable=broad-except
            except Exception:
                costs = (max_float, )
                error_no = MeasureErrorNo.RUNTIME_DEVICE
                error_msg = make_error_msg()

        shutil.rmtree(os.path.dirname(build_res.filename))
        toc = time.time()

        time.sleep(cooldown_interval)
        if verbose >= 1:
            if error_no == MeasureErrorNo.NO_ERROR:
                print("*", end="")
            else:
                print("*E", end="")  # Run error

        return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
예제 #6
0
    def timed_func(inp, build_res):
        tic = time.time()
        error_no = 0
        error_msg = None
        try:
            func = module.load_module(build_res.filename)
            ctx = ndarray.context(str(inp.task.target), 0)
            time_f = func.time_evaluator(func.entry_name,
                                         ctx,
                                         number=number,
                                         repeat=repeat,
                                         min_repeat_ms=min_repeat_ms)
        # pylint: disable=broad-except
        except Exception:
            costs = (max_float, )
            error_no = MeasureErrorNo.COMPILE_DEVICE
            error_msg = make_error_msg()

        if error_no == 0:
            try:
                args = [
                    ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx)
                    for x in build_res.args
                ]
                ctx.sync()
                costs = time_f(*args).results
            # pylint: disable=broad-except
            except Exception:
                costs = (max_float, )
                error_no = MeasureErrorNo.RUNTIME_DEVICE
                error_msg = make_error_msg()

        shutil.rmtree(os.path.dirname(build_res.filename))
        toc = time.time()
        time.sleep(cooldown_interval)

        if verbose >= 1:
            if error_no == MeasureErrorNo.NO_ERROR:
                print("*", end="")
            else:
                print("*E", end="")  # Run error
        return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
예제 #7
0
파일: measure.py 프로젝트: zlin888/tvm
def _timed_eval_func(
    inp_serialized,
    build_res,
    number,
    repeat,
    min_repeat_ms,
    cooldown_interval,
    enable_cpu_cache_flush,
    verbose,
):
    # pylint: disable=import-outside-toplevel
    from .search_task import get_task_input_buffer  # lazily import to avoid recursive dependency

    inp = MeasureInput.deserialize(inp_serialized)
    task_input_names = inp.task.task_input_names
    tic = time.time()
    error_no = 0
    error_msg = None
    try:
        func = module.load_module(build_res.filename)
        dev = ndarray.device(str(inp.task.target), 0)
        # Limitation:
        # We can not get PackFunction directly in the remote mode as it is wrapped
        # under the std::function. We could lift the restriction later once we fold
        # the PackedFunc as an object. Currently, we pass function name to work
        # around it.
        f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
        time_f = func.time_evaluator(
            func.entry_name,
            dev,
            number=number,
            repeat=repeat,
            min_repeat_ms=min_repeat_ms,
            f_preproc=f_prepare,
        )
    # pylint: disable=broad-except
    except Exception:
        costs = (MAX_FLOAT, )
        error_no = MeasureErrorNo.COMPILE_DEVICE
        error_msg = make_traceback_info()

    if error_no == 0:
        try:
            random_fill = tvm.get_global_func("tvm.contrib.random.random_fill",
                                              True)
            assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"

            tensor_input_map = prepare_input_map(
                build_res.args) if task_input_names else {}
            args = []
            task_inputs_count = 0
            for arg in build_res.args:
                if arg in tensor_input_map:
                    tensor_name = tensor_input_map[arg]
                    if tensor_name in task_input_names:
                        args.append(
                            ndarray.array(
                                get_task_input_buffer(inp.task.workload_key,
                                                      tensor_name), dev))
                        task_inputs_count += 1
                    else:
                        raise ValueError(
                            "%s not found in task_inputs, " % (tensor_name) +
                            "should provide with `SearchTask(..., task_inputs={...})`"
                        )
                else:
                    empty_array = ndarray.empty(get_const_tuple(arg.shape),
                                                arg.dtype, dev)
                    random_fill(empty_array)
                    args.append(empty_array)
            if task_inputs_count != len(task_input_names):
                logger.warning(
                    "task_inputs not fully matched, check if there's any unexpected error"
                )
            dev.sync()
            costs = time_f(*args).results
        # pylint: disable=broad-except
        except Exception:
            costs = (MAX_FLOAT, )
            error_no = MeasureErrorNo.RUNTIME_DEVICE
            error_msg = make_traceback_info()

    shutil.rmtree(os.path.dirname(build_res.filename))
    toc = time.time()
    time.sleep(cooldown_interval)

    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print("*", end="", flush=True)
        else:
            print("*E", end="", flush=True)  # Run error
    return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
예제 #8
0
def _timed_eval_func(
    inp_serialized,
    build_res,
    args,
    number,
    repeat,
    min_repeat_ms,
    cooldown_interval,
    enable_cpu_cache_flush,
    verbose,
):
    inp = MeasureInput.deserialize(inp_serialized)
    tic = time.time()
    error_no = 0
    error_msg = None
    try:
        func = module.load_module(build_res.filename)
        dev = ndarray.device(str(inp.task.target), 0)
        # Limitation:
        # We can not get PackFunction directly in the remote mode as it is wrapped
        # under the std::function. We could lift the restriction later once we fold
        # the PackedFunc as an object. Currently, we pass function name to work
        # around it.
        f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
        time_f = func.time_evaluator(
            func.entry_name,
            dev,
            number=number,
            repeat=repeat,
            min_repeat_ms=min_repeat_ms,
            f_preproc=f_prepare,
        )
    # pylint: disable=broad-except
    except Exception:
        costs = (MAX_FLOAT, )
        error_no = MeasureErrorNo.COMPILE_DEVICE
        error_msg = make_traceback_info()

    if error_no == 0:
        try:
            random_fill = tvm.get_global_func("tvm.contrib.random.random_fill",
                                              True)
            assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
            assert len(args) == len(build_res.args)
            # pylint: disable=consider-using-enumerate
            for idx in range(len(args)):
                if args[idx] is None:
                    build_res_arg = build_res.args[idx]
                    empty_array = ndarray.empty(
                        get_const_tuple(build_res_arg.shape),
                        build_res_arg.dtype, dev)
                    random_fill(empty_array)
                    args[idx] = empty_array
                else:
                    args[idx] = ndarray.array(args[idx], dev)
            dev.sync()
            costs = time_f(*args).results
        # pylint: disable=broad-except
        except Exception:
            costs = (MAX_FLOAT, )
            error_no = MeasureErrorNo.RUNTIME_DEVICE
            error_msg = make_traceback_info()

    shutil.rmtree(os.path.dirname(build_res.filename))
    toc = time.time()
    time.sleep(cooldown_interval)

    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print("*", end="", flush=True)
        else:
            print("*E", end="", flush=True)  # Run error
    return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
예제 #9
0
def _rpc_run(
    inp_serialized,
    build_res,
    args,
    key,
    host,
    port,
    priority,
    timeout,
    number,
    repeat,
    min_repeat_ms,
    cooldown_interval,
    enable_cpu_cache_flush,
    verbose,
):
    inp = MeasureInput.deserialize(inp_serialized)
    tic = time.time()
    error_no = 0
    error_msg = None
    try:
        # upload built module
        remote = request_remote(key, host, port, priority, timeout)
        remote.upload(build_res.filename)
        func = remote.load_module(os.path.split(build_res.filename)[1])
        dev = remote.device(str(inp.task.target), 0)
        # Limitation:
        # We can not get PackFunction directly in the remote mode as it is wrapped
        # under the std::function. We could lift the restriction later once we fold
        # the PackedFunc as an object. Currently, we pass function name to work
        # around it.
        f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
        time_f = func.time_evaluator(
            func.entry_name,
            dev,
            number=number,
            repeat=repeat,
            min_repeat_ms=min_repeat_ms,
            f_preproc=f_prepare,
        )
    # pylint: disable=broad-except
    except Exception:
        costs = (MAX_FLOAT, )
        error_no = MeasureErrorNo.COMPILE_DEVICE
        error_msg = make_traceback_info()

    if error_no == 0:
        try:
            stream = dev.create_raw_stream()
            dev.set_raw_stream(stream)
            random_fill = remote.get_function("tvm.contrib.random.random_fill")
            assert (
                random_fill
            ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices"

            assert len(args) == len(build_res.args)
            # pylint: disable=consider-using-enumerate
            for idx in range(len(args)):
                if args[idx] is None:
                    build_res_arg = build_res.args[idx]
                    empty_array = ndarray.empty(
                        get_const_tuple(build_res_arg.shape),
                        build_res_arg.dtype, dev)
                    random_fill(empty_array)
                    args[idx] = empty_array
                else:
                    args[idx] = ndarray.array(args[idx], dev)
            dev.sync()

            # First run for check that the kernel is correct
            func.entry_func(*args)
            dev.sync()

            costs = time_f(*args).results

            # clean up remote files
            remote.remove(build_res.filename)
            remote.remove(os.path.splitext(build_res.filename)[0] + ".so")
            remote.remove("")
            dev.free_raw_stream(stream)
        # pylint: disable=broad-except
        except Exception:
            dev.free_raw_stream(stream)
            costs = (MAX_FLOAT, )
            error_no = MeasureErrorNo.RUNTIME_DEVICE
            error_msg = make_traceback_info()

    shutil.rmtree(os.path.dirname(build_res.filename))
    toc = time.time()

    time.sleep(cooldown_interval)
    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print("*", end="")
        else:
            print("*E", end="")  # Run error

    return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc