Пример #1
0
    def __call__(self, *inputs, **kwargs):
        r"""Runs the defined TC language on given inputs.

        Args:
            *inputs (required):
                PyTorch Tensors or Variables that TC should
                execute on. The inputs should be passed in the order they
                are also passed in the definition of TC language.

            options (optional):
                Kernel mapping options of type :attr:`tc.Options`. These options
                provide mapping for kernel like grid, blocks, memory etc. It
                is recommended to always pass kernel options. The options can be
                obtained by:

                * Autotuning, (recommended) OR

                * You can create `Options` object by chosing the closely matching "type" of kernel. For example:

                 .. code::

                     import tensor_comprehensions as tc
                     options = tc.Options(type)

                where :attr:`type` is a string with value one of below:

                * :attr:`pointwise`:  if kernel resembles a pointwise operation

                * :attr:`mlp`: if kernel resembles an Linear layer operation

                * :attr:`conv`: if kernel resembles a convolution operation

                * :attr:`group_conv`: if kernel resembles a group convolution operation

                * :attr:`naive`: if none of the above, then chose naive *Default*

                If no :attr:`Options` are passed, the naive options will be used which
                might not yield great performance.

            outputs (optional):
                List of Pytorch tensors/Variables. The number of outputs is
                the same as defined in the TC language and are in the same
                order as in TC language. You can chose to allocate the outputs
                tensors/Variables beforehand. Most common use case is to
                reuse output from a previous operation.

            cache (string, optional):
                A string denoting the absolute filepath which
                contains the mapping options for the kernel. Such file can be created by running
                autotuning.

                 If :attr:`training` = True, then the backward options will be obtained
                 from file cache + '_backward'. For the backward, separate filename
                 is not accepted for now.

            grid (int, 3D list):
                If :attr:`inject_kernel` is `True`, then user
                needs to specify the kernel grid options for running it. TC
                will simply use those options and will not add any optimizations

            block (int, 3D list):
                If :attr:`inject_kernel` is `True`, then user
                needs to specify the kernel `block` options for running it. TC
                will simply use those options and will not add any optimizations

            reorder_function (optional):
                If :attr:`training` is set to true in :attr:`define` call,
                then TC infers the inputs for backward layer for compilation
                (1st time the layer is run). The backward layer should typically
                contain the grad_outputs of the forward layer. The backward
                layer should take TC forward inputs + grad_outputs in the same
                order as the forward TC takes inputs and emits outputs. If
                the order of the outputs is changed, or some output grad are
                not required in backwards, then you can pass a function which
                can reorder/drop the layer grad_outputs according to backwards
                layer inputs your TC needs. The function should return a :attr:`list`.

        Returns:
            List of PyTorch tensors/Variables which is the output of running
            TC layer. The number of outputs is the same as defined in the TC
            language and are in the same order as in TC language.

        Example:
            >>> LANG = MATMUL_LANG
            >>> matmul = tc.define(lang, name="matmul")
            >>> mat1, mat2 = torch.randn(3, 4).cuda(), torch.randn(4, 5).cuda()
            >>> out = matmul(mat1, mat2, options=Options("mlp"))
        """
        try:
            validate_input(*inputs)
            kwargs.update(self.kwargs_define)
            name, backward_name = get_tc_names_from_kwargs(**kwargs)
            kwargs.pop("name", None)
            backward = True if backward_name is not None else False

            hash_key = get_tc_hash_key(name, *inputs)

            if self.tuner and self.tuner.tuner_cache and hash_key in self.tuner.tuner_cache:
                options_cache = self.tuner.tuner_cache[hash_key]
            else:
                options_cache = {}

            kwargs["options_cache"] = options_cache
            if hash_key in self.cu.compilation_cache:
                tc_info = self.cu.compilation_cache[hash_key]
            else:
                tc_info = {}
                kwargs["type"] = "forward"
                input_tensors = unpack_variables(list(inputs))

                if "inject_kernel" in kwargs and "cuda_code" in kwargs:
                    assert "grid" in kwargs and "block" in kwargs, \
                        "For manual cuda injection, please specify the grid and block settings"
                    self.cu.manual_cuda_injection(
                        name, kwargs["inject_kernel"], kwargs["cuda_code"],
                        input_tensors, kwargs["grid"], kwargs["block"])
                handle_forward = self.cu.compile(name, input_tensors, **kwargs)
                tc_info["forward_name"], tc_info[
                    "handle_forward"] = name, handle_forward

                if backward:
                    tc_info["backward_name"] = backward_name
                self.cu.compilation_cache[hash_key] = tc_info

            if "outputs" in kwargs and kwargs["outputs"] is not None:
                out = kwargs["outputs"]
                tc_info["outputs"] = out
                if not isinstance(out, list):
                    tc_info["outputs"] = [out]
            out = TCFunction.apply(self.cu, tc_info, kwargs, *inputs)
            out = list(out) if (len(out) > 1) else out[0]
            return out
        except Exception as e:
            logger.error("Caught Exception: {}".format(e))
            return None
Пример #2
0
    def autotune(self, *inputs, **kwargs):
        input_tensors = get_tensors(list(inputs))
        kwargs.update(self.kwargs)
        name, backward_name = get_tc_names_from_kwargs(**kwargs)
        kwargs.pop("name", None)
        backward = True if backward_name is not None else False
        hash_key = get_tc_hash_key(name, *input_tensors)
        # lookup for the options in the cache. Whenever we make the call to
        # autotune, tuning must happen. But if the kernel has been tuned earlier
        # then we can use previous options to seed the tuning.
        if hash_key in self.tuner_cache:
            options_cache = self.tuner_cache[hash_key]
        else:
            options_cache = {}

        # we give priority to the options user might have passed via file, or
        # Options object.
        cache_file = ""
        if "cache" in kwargs and kwargs["cache"]:
            if isinstance(kwargs["cache"], bool):
                hash_key = get_tc_hash_key(name, *input_tensors)
                cache_file = "/tmp/{}_{}".format(hash_key, str(uuid.uuid4()))
            elif isinstance(kwargs["cache"], str):
                cache_file = kwargs["cache"]
            logger.info(
                'Autotuning cache will be saved to: {}.cuda/options'.format(
                    cache_file))
        else:
            logger.warning(
                "Autotuning results won't be cached. 'cache' option is not set"
            )

        # we will first run the autotuning on the forward layer, the inputs are given
        # for that, we will tune those
        kwargs["type"] = "forward"
        # we pass this tuner object so we can load from file without having to
        # create special object
        kwargs["tuner"] = self.autotuner
        options = get_options_from_kwargs_and_tuner_cache(
            name, cache_file, options_cache, *input_tensors, **kwargs)
        forward_best_options = self.tune_and_store(name,
                                                   input_tensors,
                                                   mapping_options=options,
                                                   cache_file=cache_file)
        # update the cache with the options
        options_cache["forward"] = forward_best_options
        if not backward:
            self.tuner_cache[hash_key] = options_cache
            return forward_best_options

        # now, we have to tune the backward layer, for that, we need to run
        # the forward layer first, get its output,
        logger.info('Autotuning the backward layer now')
        cu = TcCompilationUnit()
        cu.define(self.tc_lang)

        if "options" in kwargs:
            orig_options = kwargs["options"]
            kwargs["options"] = forward_best_options
            outputs = cu.compile_and_run(name, input_tensors, **kwargs)
            kwargs["options"] = orig_options
        else:
            outputs = cu.compile_and_run(name,
                                         input_tensors,
                                         options=forward_best_options,
                                         **kwargs)
        # now that we have the outputs of the forward pass, we have the inputs
        # for the backward layer and we can now tune the backward layer
        reorder_function = kwargs[
            "reorder_function"] if "reorder_function" in kwargs else None
        rearranged_outputs = list(outputs)
        if reorder_function is not None:
            rearranged_outputs = reorder_function(list(outputs))
        inputs = make_contiguous(
            unpack_variables(input_tensors + list(rearranged_outputs)))

        if cache_file:
            cache_file = cache_file + "_backward"
            logger.info(
                'Backwards autotuning cache will be saved to: {}.cuda/options'.
                format(cache_file))
        kwargs["type"] = "backward"
        options = get_options_from_kwargs_and_tuner_cache(
            backward_name, cache_file, options_cache, *inputs, **kwargs)
        backward_best_options = self.tune_and_store(backward_name,
                                                    inputs,
                                                    mapping_options=options,
                                                    cache_file=cache_file)
        # update the cache with the options
        options_cache["backward"] = backward_best_options
        self.tuner_cache[hash_key] = options_cache
        return [forward_best_options, backward_best_options]