示例#1
0
文件: data.py 项目: inducer/loopy
    def __init__(self, **kwargs):
        kwargs["name"] = intern(kwargs.pop("name"))

        target = kwargs.pop("target", None)

        dtype = kwargs.pop("dtype", None)

        if 'for_atomic' in kwargs:
            for_atomic = kwargs['for_atomic']
        else:
            for_atomic = False

        from loopy.types import to_loopy_type
        dtype = to_loopy_type(
                dtype, allow_auto=True, allow_none=True, for_atomic=for_atomic,
                target=target)

        import loopy as lp
        if dtype is lp.auto:
            warn("Argument/temporary data type for '%s' should be None if "
                   "unspecified, not auto. This usage will be disallowed in 2018."
                    % kwargs["name"],
                    DeprecationWarning, stacklevel=2)

            dtype = None

        kwargs["dtype"] = dtype

        ImmutableRecord.__init__(self, **kwargs)
示例#2
0
文件: testlib.py 项目: inducer/loopy
    def __call__(self, preamble_info):
        from loopy.kernel.data import temp_var_scope as scopes

        # find a function matching our name
        func_match = next(
            (x for x in preamble_info.seen_functions
             if x.name == self.func_name), None)
        desc = 'custom_funcs_indirect'
        if func_match is not None:
            from loopy.types import to_loopy_type
            # check types
            if tuple(to_loopy_type(x) for x in self.func_arg_dtypes) == \
                    func_match.arg_dtypes:
                # if match, create our temporary
                var = lp.TemporaryVariable(
                    'lookup', initializer=self.arr, dtype=self.arr.dtype,
                    shape=self.arr.shape,
                    scope=scopes.GLOBAL, read_only=True)
                # and code
                code = """
        int {name}(int start, int end, int match)
        {{
            int result = start;
            for (int i = start + 1; i < end; ++i)
            {{
                if (lookup[i] == match)
                    result = i;
            }}
            return result;
        }}
        """.format(name=self.func_name)

        # generate temporary variable code
        from cgen import Initializer
        from loopy.target.c import generate_array_literal
        codegen_state = preamble_info.codegen_state.copy(
            is_generating_device_code=True)
        kernel = preamble_info.kernel
        ast_builder = codegen_state.ast_builder
        target = kernel.target
        decl_info, = var.decl_info(target, index_dtype=kernel.index_dtype)
        decl = ast_builder.wrap_global_constant(
                ast_builder.get_temporary_decl(
                    codegen_state, None, var,
                    decl_info))
        if var.initializer is not None:
            decl = Initializer(decl, generate_array_literal(
                codegen_state, var, var.initializer))
        # return generated code
        yield (desc, '\n'.join([str(decl), code]))
示例#3
0
    def __call__(self, preamble_info):

        # find a function matching our name
        func_match = next(
            (x for x in preamble_info.seen_functions
             if x.name == self.func_name), None)
        desc = 'custom_funcs_indirect'
        if func_match is not None:
            from loopy.types import to_loopy_type
            # check types
            if tuple(to_loopy_type(x) for x in self.func_arg_dtypes) == \
                    func_match.arg_dtypes:
                # if match, create our temporary
                var = lp.TemporaryVariable(
                    'lookup', initializer=self.arr, dtype=self.arr.dtype,
                    shape=self.arr.shape,
                    address_space=lp.AddressSpace.GLOBAL, read_only=True)
                # and code
                code = """
        int {name}(int start, int end, int match)
        {{
            int result = start;
            for (int i = start + 1; i < end; ++i)
            {{
                if (lookup[i] == match)
                    result = i;
            }}
            return result;
        }}
        """.format(name=self.func_name)

        # generate temporary variable code
        from cgen import Initializer
        from loopy.target.c import generate_array_literal
        codegen_state = preamble_info.codegen_state.copy(
            is_generating_device_code=True)
        kernel = preamble_info.kernel
        ast_builder = codegen_state.ast_builder
        target = kernel.target
        decl_info, = var.decl_info(target, index_dtype=kernel.index_dtype)
        decl = ast_builder.wrap_global_constant(
                ast_builder.get_temporary_decl(
                    codegen_state, None, var,
                    decl_info))
        if var.initializer is not None:
            decl = Initializer(decl, generate_array_literal(
                codegen_state, var, var.initializer))
        # return generated code
        yield (desc, '\n'.join([str(decl), code]))
示例#4
0
 def _check_atomic_data(insn):
     # get the kernel arg written by this insn
     written = insn.assignee_var_names()[0]
     ind = next((i for i, d in enumerate(data) if d.name == written),
                None)
     # make sure the dtype is atomic, if not update it
     if ind is not None and not isinstance(data[ind].dtype, AtomicType):
         assert data[ind].dtype is not None, (
             "Change of dtype to atomic doesn't work if base dype is not"
             " populated")
         data[ind] = data[ind].copy(for_atomic=True)
     elif ind is None:
         assert written in temps, (
             'Cannot find written atomic variable: {}'.format(written))
         if not isinstance(temps[written].dtype, AtomicType):
             temps[written] = temps[written].copy(dtype=to_loopy_type(
                 temps[written].dtype, for_atomic=True))
     return written
示例#5
0
def test_target_record():
    # make bad argument (i.e, one without the target set)
    import numpy as np
    from loopy.types import to_loopy_type
    bad = lp.GlobalArg('bad', dtype=np.int32, shape=(1, ), order='C')

    def __check(record):
        with assert_raises(AssertionError):
            record.__getstate__()

    # and check list
    __check(TargetCheckingRecord(kernel_data=[bad]))
    # dictionary
    __check(TargetCheckingRecord(kernel_data={'a': bad}))
    # dictionary of lists
    __check(TargetCheckingRecord(kernel_data={'a': [bad]}))
    # and plain value
    __check(TargetCheckingRecord(kernel_data=bad))
    # numpy dtype as dictionary key
    dtype = to_loopy_type(np.int32)
    __check(TargetCheckingRecord(kernel_data={dtype: 'bad'}))
示例#6
0
    def __call__(self, kernel, name, arg_dtypes):
        """
        A function that will return a :class:`loopy.kernel.data.CallMangleInfo`
        to interface with the calling :class:`loopy.LoopKernel`
        """
        if name != self.func_name:
            return None

        from loopy.types import to_loopy_type
        from loopy.kernel.data import CallMangleInfo

        def __compare(d1, d2):
            # compare dtypes ignoring atomic
            return to_loopy_type(d1, for_atomic=True) == \
                to_loopy_type(d2, for_atomic=True)

        # check types
        if len(arg_dtypes) != len(arg_dtypes):
            raise Exception(
                'Unexpected number of arguments provided to mangler '
                '{}, expected {}, got {}'.format(self.func_name,
                                                 len(self.func_arg_dtypes),
                                                 len(arg_dtypes)))

        for i, (d1, d2) in enumerate(zip(self.func_arg_dtypes, arg_dtypes)):
            if not __compare(d1, d2):
                raise Exception(
                    'Argument at index {} for mangler {} does not '
                    'match expected dtype.  Expected {}, got {}'.format(
                        i, self.func_name, str(d1), str(d2)))

        # get target for creation
        target = arg_dtypes[0].target
        return CallMangleInfo(target_name=self.func_name,
                              result_dtypes=tuple(
                                  to_loopy_type(x, target=target)
                                  for x in self.func_result_dtypes),
                              arg_dtypes=arg_dtypes)
示例#7
0
文件: tools.py 项目: arghdos/loopy
def _add_dtypes(knl, dtype_dict):
    dtype_dict = dtype_dict.copy()
    new_args = []

    from loopy.types import to_loopy_type
    for arg in knl.args:
        new_dtype = dtype_dict.pop(arg.name, None)
        if new_dtype is not None:
            new_dtype = to_loopy_type(new_dtype, target=knl.target)
            if arg.dtype is not None and arg.dtype != new_dtype:
                raise RuntimeError(
                        "argument '%s' already has a different dtype "
                        "(existing: %s, new: %s)"
                        % (arg.name, arg.dtype, new_dtype))
            arg = arg.copy(dtype=new_dtype)

        new_args.append(arg)

    new_temp_vars = knl.temporary_variables.copy()

    import loopy as lp
    for tv_name in knl.temporary_variables:
        new_dtype = dtype_dict.pop(tv_name, None)
        if new_dtype is not None:
            new_dtype = np.dtype(new_dtype)
            tv = new_temp_vars[tv_name]
            if (tv.dtype is not None and tv.dtype is not lp.auto) \
                    and tv.dtype != new_dtype:
                raise RuntimeError(
                        "temporary variable '%s' already has a different dtype "
                        "(existing: %s, new: %s)"
                        % (tv_name, tv.dtype, new_dtype))

            new_temp_vars[tv_name] = tv.copy(dtype=new_dtype)

    return dtype_dict, new_args, new_temp_vars
示例#8
0
文件: testlib.py 项目: inducer/loopy
    def __call__(self, kernel, name, arg_dtypes):
        """
        A function that will return a :class:`loopy.kernel.data.CallMangleInfo`
        to interface with the calling :class:`loopy.LoopKernel`
        """
        if name != self.func_name:
            return None

        from loopy.types import to_loopy_type
        from loopy.kernel.data import CallMangleInfo

        def __compare(d1, d2):
            # compare dtypes ignoring atomic
            return to_loopy_type(d1, for_atomic=True) == \
                to_loopy_type(d2, for_atomic=True)

        # check types
        if len(arg_dtypes) != len(arg_dtypes):
            raise Exception('Unexpected number of arguments provided to mangler '
                            '{}, expected {}, got {}'.format(
                                self.func_name, len(self.func_arg_dtypes),
                                len(arg_dtypes)))

        for i, (d1, d2) in enumerate(zip(self.func_arg_dtypes, arg_dtypes)):
            if not __compare(d1, d2):
                raise Exception('Argument at index {} for mangler {} does not '
                                'match expected dtype.  Expected {}, got {}'.
                                format(i, self.func_name, str(d1), str(d2)))

        # get target for creation
        target = arg_dtypes[0].target
        return CallMangleInfo(
            target_name=self.func_name,
            result_dtypes=tuple(to_loopy_type(x, target=target) for x in
                                self.func_result_dtypes),
            arg_dtypes=arg_dtypes)
示例#9
0
文件: data.py 项目: mmmika/loopy
    def __init__(self, name, dtype=None, shape=(), address_space=None,
            dim_tags=None, offset=0, dim_names=None, strides=None, order=None,
            base_indices=None, storage_shape=None,
            base_storage=None, initializer=None, read_only=False,
            _base_storage_access_may_be_aliasing=False, **kwargs):
        """
        :arg dtype: :class:`loopy.auto` or a :class:`numpy.dtype`
        :arg shape: :class:`loopy.auto` or a shape tuple
        :arg base_indices: :class:`loopy.auto` or a tuple of base indices
        """

        scope = kwargs.pop("scope", None)
        if scope is not None:
            warn("Passing 'scope' is deprecated. Use 'address_space' instead.",
                    DeprecationWarning, stacklevel=2)

            if address_space is not None:
                raise ValueError("only one of 'scope' and 'address_space' "
                        "may be specified")
            else:
                address_space = scope

        del scope

        if address_space is None:
            address_space = auto

        if address_space is None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "address_space must not be None"
                    % name)

        if initializer is None:
            pass
        elif isinstance(initializer, np.ndarray):
            if offset != 0:
                raise LoopyError(
                        "temporary variable '%s': "
                        "offset must be 0 if initializer specified"
                        % name)

            from loopy.types import NumpyType, to_loopy_type
            if dtype is auto or dtype is None:
                dtype = NumpyType(initializer.dtype)
            elif to_loopy_type(dtype) != to_loopy_type(initializer.dtype):
                raise LoopyError(
                        "temporary variable '%s': "
                        "dtype of initializer does not match "
                        "dtype of array."
                        % name)

            if shape is auto:
                shape = initializer.shape

        else:
            raise LoopyError(
                    "temporary variable '%s': "
                    "initializer must be None or a numpy array"
                    % name)

        if order is None:
            order = "C"

        if base_indices is None:
            base_indices = (0,) * len(shape)

        if not read_only and initializer is not None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "read-write variables with initializer "
                    "are not currently supported "
                    "(did you mean to set read_only=True?)"
                    % name)

        if base_storage is not None and initializer is not None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "base_storage and initializer are "
                    "mutually exclusive"
                    % name)

        if base_storage is None and _base_storage_access_may_be_aliasing:
            raise LoopyError(
                    "temporary variable '%s': "
                    "_base_storage_access_may_be_aliasing option, but no "
                    "base_storage given!"
                    % name)

        ArrayBase.__init__(self, name=intern(name),
                dtype=dtype, shape=shape, strides=strides,
                dim_tags=dim_tags, offset=offset, dim_names=dim_names,
                order=order,
                base_indices=base_indices,
                address_space=address_space,
                storage_shape=storage_shape,
                base_storage=base_storage,
                initializer=initializer,
                read_only=read_only,
                _base_storage_access_may_be_aliasing=(
                    _base_storage_access_may_be_aliasing),
                **kwargs)
示例#10
0
文件: array.py 项目: inducer/loopy
    def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
            dim_names=None, strides=None, order=None, for_atomic=False,
            target=None, alignment=None,
            **kwargs):
        """
        All of the following (except *name*) are optional.
        Specify either strides or shape.

        :arg name: When passed to :class:`loopy.make_kernel`, this may contain
            multiple names separated by commas, in which case multiple arguments,
            each with identical properties, are created for each name.

        :arg shape: May be any of the things specified under :attr:`shape`,
            or a string which can be parsed into the previous form.

        :arg dim_tags: A comma-separated list of tags as understood by
            :func:`parse_array_dim_tag`.

        :arg strides: May be one of the following:

            * None

            * :class:`loopy.auto`. The strides will be determined by *order*
              and the access footprint.

            * a tuple like like :attr:`numpy.ndarray.shape`.

              Each entry of the tuple is also allowed to be a :mod:`pymbolic`
              expression involving kernel parameters, or a (potentially-comma
              separated) or a string that can be parsed to such an expression.

            * A string which can be parsed into the previous form.

        :arg order: "F" or "C" for C (row major) or Fortran
            (column major). Defaults to the *default_order* argument
            passed to :func:`loopy.make_kernel`.
        :arg for_atomic:
            Whether the array is declared for atomic access, and, if necessary,
            using atomic-capable data types.
        :arg offset: (See :attr:`offset`)
        :arg alignment: memory alignment in bytes

        """

        for kwarg_name in kwargs:
            if kwarg_name not in self.allowed_extra_kwargs:
                raise TypeError("invalid kwarg: %s" % kwarg_name)

        import loopy as lp

        from loopy.types import to_loopy_type
        dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True,
                for_atomic=for_atomic, target=target)

        if dtype is lp.auto:
            from warnings import warn
            warn("Argument/temporary data type for '%s' should be None if "
                    "unspecified, not auto. This usage will be disallowed in 2018."
                    % name,
                    DeprecationWarning, stacklevel=2)

            dtype = None

        strides_known = strides is not None and strides is not lp.auto
        shape_known = shape is not None and shape is not lp.auto

        if strides_known:
            strides = _parse_shape_or_strides(strides)

        if shape_known:
            shape = _parse_shape_or_strides(shape)

        # {{{ check dim_names

        if dim_names is not None:
            if len(dim_names) != len(set(dim_names)):
                raise LoopyError("dim_names are not unique")

            for n in dim_names:
                if not isinstance(n, str):
                    raise LoopyError("found non-string '%s' in dim_names"
                            % type(n).__name__)

        # }}}

        # {{{ convert strides to dim_tags (Note: strides override order)

        if dim_tags is not None and strides_known:
            raise TypeError("may not specify both strides and dim_tags")

        if dim_tags is None and strides_known:
            dim_tags = [FixedStrideArrayDimTag(s) for s in strides]
            strides = None

        # }}}

        if dim_tags is not None:
            dim_tags = parse_array_dim_tags(dim_tags,
                    n_axes=(len(shape) if shape_known else None),
                    use_increasing_target_axes=self.max_target_axes > 1,
                    dim_names=dim_names)

        # {{{ determine number of user axes

        num_user_axes = None
        if shape_known:
            num_user_axes = len(shape)
        for dim_iterable in [dim_tags, dim_names]:
            if dim_iterable is not None:
                new_num_user_axes = len(dim_iterable)

                if num_user_axes is None:
                    num_user_axes = new_num_user_axes
                else:
                    if new_num_user_axes != num_user_axes:
                        raise LoopyError("contradictory values for number of "
                                "dimensions of array '%s' from shape, strides, "
                                "dim_tags, or dim_names"
                                % name)

                del new_num_user_axes

        # }}}

        # {{{ convert order to dim_tags

        if order is None and self.max_target_axes > 1:
            # FIXME: Hackety hack. ImageArgs need to generate dim_tags even
            # if no order is specified. Plus they don't care that much.
            order = "C"

        if dim_tags is None and num_user_axes is not None and order is not None:
            dim_tags = parse_array_dim_tags(num_user_axes*[order],
                    n_axes=num_user_axes,
                    use_increasing_target_axes=self.max_target_axes > 1,
                    dim_names=dim_names)
            order = None

        # }}}

        if dim_tags is not None:
            # {{{ find number of target axes

            target_axes = set()
            for dim_tag in dim_tags:
                if isinstance(dim_tag, _StrideArrayDimTagBase):
                    target_axes.add(dim_tag.target_axis)

            if target_axes != set(range(len(target_axes))):
                raise LoopyError("target axes for variable '%s' are non-"
                        "contiguous" % self.name)

            num_target_axes = len(target_axes)
            del target_axes

            # }}}

            if not (self.min_target_axes <= num_target_axes <= self.max_target_axes):
                raise LoopyError("%s only supports between %d and %d target axes "
                        "('%s' has %d)" % (type(self).__name__, self.min_target_axes,
                            self.max_target_axes, self.name, num_target_axes))

            new_dim_tags = convert_computed_to_fixed_dim_tags(
                    name, num_user_axes, num_target_axes,
                    shape, dim_tags)

            if new_dim_tags is not None:
                # successfully normalized
                dim_tags = new_dim_tags
                del new_dim_tags

        if dim_tags is not None:
            # for hashability
            dim_tags = tuple(dim_tags)
            order = None

        if strides is not None:
            # Preserve strides if we weren't able to process them yet.
            # That only happens if they're set to loopy.auto (and 'guessed'
            # in loopy.kernel.creation).

            kwargs["strides"] = strides

        if dim_names is not None and not isinstance(dim_names, tuple):
            from warnings import warn
            warn("dim_names is not a tuple when calling ArrayBase constructor",
                    DeprecationWarning, stacklevel=2)

        ImmutableRecord.__init__(self,
                name=name,
                dtype=dtype,
                shape=shape,
                dim_tags=dim_tags,
                offset=offset,
                dim_names=dim_names,
                order=order,
                alignment=alignment,
                for_atomic=for_atomic,
                **kwargs)
示例#11
0
文件: testlib.py 项目: inducer/loopy
 def __compare(d1, d2):
     # compare dtypes ignoring atomic
     return to_loopy_type(d1, for_atomic=True) == \
         to_loopy_type(d2, for_atomic=True)
示例#12
0
    def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
            dim_names=None, strides=None, order=None, for_atomic=False,
            **kwargs):
        """
        All of the following are optional. Specify either strides or shape.

        :arg name: May contain multiple names separated by
            commas, in which case multiple arguments,
            each with identical properties, are created
            for each name.
        :arg dtype: the :class:`numpy.dtype` of the array.
            If this is *None*, :mod:`loopy` will try to continue without
            knowing the type of this array, where the idea is that precise
            knowledge of the type will become available at invocation time.
            :class:`loopy.CompiledKernel` (and thereby
            :meth:`loopy.LoopKernel.__call__`) automatically add this type
            information based on invocation arguments.

            Note that some transformations, such as :func:`loopy.add_padding`
            cannot be performed without knowledge of the exact *dtype*.

        :arg shape: May be one of the following:

            * *None*. In this case, no shape is intended to be specified,
              only the strides will be used to access the array. Bounds checking
              will not be performed.

            * :class:`loopy.auto`. The shape will be determined by finding the
              access footprint.

            * a tuple like like :attr:`numpy.ndarray.shape`.

              Each entry of the tuple is also allowed to be a :mod:`pymbolic`
              expression involving kernel parameters, or a (potentially-comma
              separated) or a string that can be parsed to such an expression.

              Any element of the shape tuple not used to compute strides
              may be *None*.

            * A string which can be parsed into the previous form.

        :arg dim_tags: A comma-separated list of tags as understood by
            :func:`parse_array_dim_tag`.

        :arg strides: May be one of the following:

            * None

            * :class:`loopy.auto`. The strides will be determined by *order*
              and the access footprint.

            * a tuple like like :attr:`numpy.ndarray.shape`.

              Each entry of the tuple is also allowed to be a :mod:`pymbolic`
              expression involving kernel parameters, or a (potentially-comma
              separated) or a string that can be parsed to such an expression.

            * A string which can be parsed into the previous form.

        :arg order: "F" or "C" for C (row major) or Fortran
            (column major). Defaults to the *default_order* argument
            passed to :func:`loopy.make_kernel`.
        :arg for_atomic:
            Whether the array is declared for atomic access, and, if necessary,
            using atomic-capable data types.
        :arg offset: Offset from the beginning of the buffer to the point from
            which the strides are counted. May be one of

            * 0
            * a string (that is interpreted as an argument name).
            * :class:`loopy.auto`, in which case an offset argument
              is added automatically, immediately following this argument.
              :class:`loopy.CompiledKernel` is even smarter in its treatment of
              this case and will compile custom versions of the kernel based on
              whether the passed arrays have offsets or not.
        """

        for kwarg_name in kwargs:
            if kwarg_name not in self.allowed_extra_kwargs:
                raise TypeError("invalid kwarg: %s" % kwarg_name)

        import loopy as lp

        from loopy.types import to_loopy_type
        dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True,
                for_atomic=for_atomic)

        strides_known = strides is not None and strides is not lp.auto
        shape_known = shape is not None and shape is not lp.auto

        if strides_known:
            strides = _parse_shape_or_strides(strides)

        if shape_known:
            shape = _parse_shape_or_strides(shape)

        # {{{ check dim_names

        if dim_names is not None:
            if len(dim_names) != len(set(dim_names)):
                raise LoopyError("dim_names are not unique")

            for n in dim_names:
                if not isinstance(n, str):
                    raise LoopyError("found non-string '%s' in dim_names"
                            % type(n).__name__)

        # }}}

        # {{{ convert strides to dim_tags (Note: strides override order)

        if dim_tags is not None and strides_known:
            raise TypeError("may not specify both strides and dim_tags")

        if dim_tags is None and strides_known:
            dim_tags = [FixedStrideArrayDimTag(s) for s in strides]
            strides = None

        # }}}

        if dim_tags is not None:
            dim_tags = parse_array_dim_tags(dim_tags,
                    n_axes=(len(shape) if shape_known else None),
                    use_increasing_target_axes=self.max_target_axes > 1,
                    dim_names=dim_names)

        # {{{ determine number of user axes

        num_user_axes = None
        if shape_known:
            num_user_axes = len(shape)
        for dim_iterable in [dim_tags, dim_names]:
            if dim_iterable is not None:
                new_num_user_axes = len(dim_iterable)

                if num_user_axes is None:
                    num_user_axes = new_num_user_axes
                else:
                    if new_num_user_axes != num_user_axes:
                        raise LoopyError("contradictory values for number of "
                                "dimensions of array '%s' from shape, strides, "
                                "dim_tags, or dim_names"
                                % name)

                del new_num_user_axes

        # }}}

        # {{{ convert order to dim_tags

        if order is None and self.max_target_axes > 1:
            # FIXME: Hackety hack. ImageArgs need to generate dim_tags even
            # if no order is specified. Plus they don't care that much.
            order = "C"

        if dim_tags is None and num_user_axes is not None and order is not None:
            dim_tags = parse_array_dim_tags(num_user_axes*[order],
                    n_axes=num_user_axes,
                    use_increasing_target_axes=self.max_target_axes > 1,
                    dim_names=dim_names)
            order = None

        # }}}

        if dim_tags is not None:
            # {{{ find number of target axes

            target_axes = set()
            for dim_tag in dim_tags:
                if isinstance(dim_tag, _StrideArrayDimTagBase):
                    target_axes.add(dim_tag.target_axis)

            if target_axes != set(range(len(target_axes))):
                raise LoopyError("target axes for variable '%s' are non-"
                        "contiguous" % self.name)

            num_target_axes = len(target_axes)
            del target_axes

            # }}}

            if not (self.min_target_axes <= num_target_axes <= self.max_target_axes):
                raise LoopyError("%s only supports between %d and %d target axes "
                        "('%s' has %d)" % (type(self).__name__, self.min_target_axes,
                            self.max_target_axes, self.name, num_target_axes))

            new_dim_tags = convert_computed_to_fixed_dim_tags(
                    name, num_user_axes, num_target_axes,
                    shape, dim_tags)

            if new_dim_tags is not None:
                # successfully normalized
                dim_tags = new_dim_tags
                del new_dim_tags

        if dim_tags is not None:
            # for hashability
            dim_tags = tuple(dim_tags)
            order = None

        if strides is not None:
            # Preserve strides if we weren't able to process them yet.
            # That only happens if they're set to loopy.auto (and 'guessed'
            # in loopy.kernel.creation).

            kwargs["strides"] = strides

        if dim_names is not None and not isinstance(dim_names, tuple):
            from warnings import warn
            warn("dim_names is not a tuple when calling ArrayBase constructor",
                    DeprecationWarning, stacklevel=2)

        Record.__init__(self,
                name=name,
                dtype=dtype,
                shape=shape,
                dim_tags=dim_tags,
                offset=offset,
                dim_names=dim_names,
                order=order,
                **kwargs)
示例#13
0
 def func_filter(key):
     return key.lid_strides == {} and key.dtype == to_loopy_type(np.float64) and \
            key.direction == 'load'
示例#14
0
def symbol_x(knl, name):
    if name == "X":
        from loopy.types import to_loopy_type
        return to_loopy_type(np.float32), "X"
示例#15
0
文件: array.py 项目: cmsquared/loopy
    def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
            dim_names=None, strides=None, order=None, for_atomic=False,
            **kwargs):
        """
        All of the following are optional. Specify either strides or shape.

        :arg name: May contain multiple names separated by
            commas, in which case multiple arguments,
            each with identical properties, are created
            for each name.
        :arg dtype: the :class:`numpy.dtype` of the array.
            If this is *None*, :mod:`loopy` will try to continue without
            knowing the type of this array, where the idea is that precise
            knowledge of the type will become available at invocation time.
            :class:`loopy.CompiledKernel` (and thereby
            :meth:`loopy.LoopKernel.__call__`) automatically add this type
            information based on invocation arguments.

            Note that some transformations, such as :func:`loopy.add_padding`
            cannot be performed without knowledge of the exact *dtype*.

        :arg shape: May be one of the following:

            * *None*. In this case, no shape is intended to be specified,
              only the strides will be used to access the array. Bounds checking
              will not be performed.

            * :class:`loopy.auto`. The shape will be determined by finding the
              access footprint.

            * a tuple like like :attr:`numpy.ndarray.shape`.

              Each entry of the tuple is also allowed to be a :mod:`pymbolic`
              expression involving kernel parameters, or a (potentially-comma
              separated) or a string that can be parsed to such an expression.

              Any element of the shape tuple not used to compute strides
              may be *None*.

            * A string which can be parsed into the previous form.

        :arg dim_tags: A comma-separated list of tags as understood by
            :func:`parse_array_dim_tag`.

        :arg strides: May be one of the following:

            * None

            * :class:`loopy.auto`. The strides will be determined by *order*
              and the access footprint.

            * a tuple like like :attr:`numpy.ndarray.shape`.

              Each entry of the tuple is also allowed to be a :mod:`pymbolic`
              expression involving kernel parameters, or a (potentially-comma
              separated) or a string that can be parsed to such an expression.

            * A string which can be parsed into the previous form.

        :arg order: "F" or "C" for C (row major) or Fortran
            (column major). Defaults to the *default_order* argument
            passed to :func:`loopy.make_kernel`.
        :arg for_atomic:
            Whether the array is declared for atomic access, and, if necessary,
            using atomic-capable data types.
        :arg offset: Offset from the beginning of the buffer to the point from
            which the strides are counted. May be one of

            * 0
            * a string (that is interpreted as an argument name).
            * :class:`loopy.auto`, in which case an offset argument
              is added automatically, immediately following this argument.
              :class:`loopy.CompiledKernel` is even smarter in its treatment of
              this case and will compile custom versions of the kernel based on
              whether the passed arrays have offsets or not.
        """

        for kwarg_name in kwargs:
            if kwarg_name not in self.allowed_extra_kwargs:
                raise TypeError("invalid kwarg: %s" % kwarg_name)

        import loopy as lp

        from loopy.types import to_loopy_type
        dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True,
                for_atomic=for_atomic)

        strides_known = strides is not None and strides is not lp.auto
        shape_known = shape is not None and shape is not lp.auto

        if strides_known:
            strides = _parse_shape_or_strides(strides)

        if shape_known:
            shape = _parse_shape_or_strides(shape)

        # {{{ check dim_names

        if dim_names is not None:
            if len(dim_names) != len(set(dim_names)):
                raise LoopyError("dim_names are not unique")

            for n in dim_names:
                if not isinstance(n, str):
                    raise LoopyError("found non-string '%s' in dim_names"
                            % type(n).__name__)

        # }}}

        # {{{ convert strides to dim_tags (Note: strides override order)

        if dim_tags is not None and strides_known:
            raise TypeError("may not specify both strides and dim_tags")

        if dim_tags is None and strides_known:
            dim_tags = [FixedStrideArrayDimTag(s) for s in strides]
            strides = None

        # }}}

        if dim_tags is not None:
            dim_tags = parse_array_dim_tags(dim_tags,
                    n_axes=(len(shape) if shape_known else None),
                    use_increasing_target_axes=self.max_target_axes > 1,
                    dim_names=dim_names)

        # {{{ determine number of user axes

        num_user_axes = None
        if shape_known:
            num_user_axes = len(shape)
        for dim_iterable in [dim_tags, dim_names]:
            if dim_iterable is not None:
                new_num_user_axes = len(dim_iterable)

                if num_user_axes is None:
                    num_user_axes = new_num_user_axes
                else:
                    if new_num_user_axes != num_user_axes:
                        raise LoopyError("contradictory values for number of "
                                "dimensions of array '%s' from shape, strides, "
                                "dim_tags, or dim_names"
                                % name)

                del new_num_user_axes

        # }}}

        # {{{ convert order to dim_tags

        if order is None and self.max_target_axes > 1:
            # FIXME: Hackety hack. ImageArgs need to generate dim_tags even
            # if no order is specified. Plus they don't care that much.
            order = "C"

        if dim_tags is None and num_user_axes is not None and order is not None:
            dim_tags = parse_array_dim_tags(num_user_axes*[order],
                    n_axes=num_user_axes,
                    use_increasing_target_axes=self.max_target_axes > 1,
                    dim_names=dim_names)
            order = None

        # }}}

        if dim_tags is not None:
            # {{{ find number of target axes

            target_axes = set()
            for dim_tag in dim_tags:
                if isinstance(dim_tag, _StrideArrayDimTagBase):
                    target_axes.add(dim_tag.target_axis)

            if target_axes != set(range(len(target_axes))):
                raise LoopyError("target axes for variable '%s' are non-"
                        "contiguous" % self.name)

            num_target_axes = len(target_axes)
            del target_axes

            # }}}

            if not (self.min_target_axes <= num_target_axes <= self.max_target_axes):
                raise LoopyError("%s only supports between %d and %d target axes "
                        "('%s' has %d)" % (type(self).__name__, self.min_target_axes,
                            self.max_target_axes, self.name, num_target_axes))

            new_dim_tags = convert_computed_to_fixed_dim_tags(
                    name, num_user_axes, num_target_axes,
                    shape, dim_tags)

            if new_dim_tags is not None:
                # successfully normalized
                dim_tags = new_dim_tags
                del new_dim_tags

        if dim_tags is not None:
            # for hashability
            dim_tags = tuple(dim_tags)
            order = None

        if strides is not None:
            # Preserve strides if we weren't able to process them yet.
            # That only happens if they're set to loopy.auto (and 'guessed'
            # in loopy.kernel.creation).

            kwargs["strides"] = strides

        if dim_names is not None and not isinstance(dim_names, tuple):
            pu.db
            from warnings import warn
            warn("dim_names is not a tuple when calling ArrayBase constructor",
                    DeprecationWarning, stacklevel=2)

        Record.__init__(self,
                name=name,
                dtype=dtype,
                shape=shape,
                dim_tags=dim_tags,
                offset=offset,
                dim_names=dim_names,
                order=order,
                **kwargs)
示例#16
0
文件: __init__.py 项目: inducer/loopy
    def __init__(self, domains, instructions, args=None, schedule=None,
            name="loopy_kernel",
            preambles=None,
            preamble_generators=None,
            assumptions=None,
            local_sizes=None,
            temporary_variables=None,
            iname_to_tags=None,
            substitutions=None,
            function_manglers=None,
            symbol_manglers=[],

            iname_slab_increments=None,
            loop_priority=frozenset(),
            silenced_warnings=None,

            applied_iname_rewrites=None,
            cache_manager=None,
            index_dtype=np.int32,
            options=None,

            state=KernelState.INITIAL,
            target=None,

            overridden_get_grid_sizes_for_insn_ids=None,
            _cached_written_variables=None):
        """
        :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get
            intersected in slab decomposition, their grid sizes shouldn't
            change. This provides a way to forward sub-kernel grid size requests.
        """

        # {{{ process constructor arguments

        if args is None:
            args = []
        if preambles is None:
            preambles = []
        if preamble_generators is None:
            preamble_generators = []
        if local_sizes is None:
            local_sizes = {}
        if temporary_variables is None:
            temporary_variables = {}
        if iname_to_tags is None:
            iname_to_tags = {}
        if substitutions is None:
            substitutions = {}
        if function_manglers is None:
            function_manglers = [
                default_function_mangler,
                single_arg_function_mangler,
                ]
        if symbol_manglers is None:
            function_manglers = [
                default_function_mangler,
                single_arg_function_mangler,
                ]
        if iname_slab_increments is None:
            iname_slab_increments = {}

        if silenced_warnings is None:
            silenced_warnings = []
        if applied_iname_rewrites is None:
            applied_iname_rewrites = []

        if cache_manager is None:
            from loopy.kernel.tools import SetOperationCacheManager
            cache_manager = SetOperationCacheManager()

        # }}}

        # {{{ process assumptions

        if assumptions is None:
            dom0_space = domains[0].get_space()
            assumptions_space = isl.Space.params_alloc(
                    dom0_space.get_ctx(), dom0_space.dim(dim_type.param))
            for i in range(dom0_space.dim(dim_type.param)):
                assumptions_space = assumptions_space.set_dim_name(
                        dim_type.param, i,
                        dom0_space.get_dim_name(dim_type.param, i))
            assumptions = isl.BasicSet.universe(assumptions_space)

        elif isinstance(assumptions, str):
            assumptions_set_str = "[%s] -> { : %s}" \
                    % (",".join(s for s in self.outer_params(domains)),
                        assumptions)
            assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(),
                    assumptions_set_str)

        assert assumptions.is_params()

        # }}}

        from loopy.types import to_loopy_type
        index_dtype = to_loopy_type(index_dtype, target=target)
        if not index_dtype.is_integral():
            raise TypeError("index_dtype must be an integer")
        if np.iinfo(index_dtype.numpy_dtype).min >= 0:
            raise TypeError("index_dtype must be signed")

        if state not in [
                KernelState.INITIAL,
                KernelState.PREPROCESSED,
                KernelState.SCHEDULED,
                ]:
            raise ValueError("invalid value for 'state'")

        from collections import defaultdict
        assert not isinstance(iname_to_tags, defaultdict)

        for iname, tags in six.iteritems(iname_to_tags):
            # don't tolerate empty sets
            assert tags
            assert isinstance(tags, frozenset)

        assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains)
        assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT

        ImmutableRecordWithoutPickling.__init__(self,
                domains=domains,
                instructions=instructions,
                args=args,
                schedule=schedule,
                name=name,
                preambles=preambles,
                preamble_generators=preamble_generators,
                assumptions=assumptions,
                iname_slab_increments=iname_slab_increments,
                loop_priority=loop_priority,
                silenced_warnings=silenced_warnings,
                temporary_variables=temporary_variables,
                local_sizes=local_sizes,
                iname_to_tags=iname_to_tags,
                substitutions=substitutions,
                cache_manager=cache_manager,
                applied_iname_rewrites=applied_iname_rewrites,
                function_manglers=function_manglers,
                symbol_manglers=symbol_manglers,
                index_dtype=index_dtype,
                options=options,
                state=state,
                target=target,
                overridden_get_grid_sizes_for_insn_ids=(
                    overridden_get_grid_sizes_for_insn_ids),
                _cached_written_variables=_cached_written_variables)

        self._kernel_executor_cache = {}
示例#17
0
    def test_working_buffers(self):
        # test vector to ensure the various working buffer configurations work
        # (i.e., locals)
        oploop = OptionLoopWrapper.from_get_oploop(self,
                                                   do_conp=False,
                                                   do_vector=True,
                                                   do_sparse=False)
        for opts in oploop:
            # get the dummy generator
            kgen = self._kernel_gen(opts, include_jac_lookup=True)

            # make kernels
            kgen._make_kernels()

            # process the arguements
            record, _ = kgen._process_args()

            # test that process memory works
            record, mem_limits = kgen._process_memory(record)

            # and generate working buffers
            recordnew, result = kgen._compress_to_working_buffer(record)

            if opts.depth:
                # check for local
                assert next((x for x in recordnew.kernel_data
                             if x.address_space == scopes.LOCAL), None)

            def __check_unpacks(unpacks, offsets, args):
                for arg in args:
                    # check that all args are in the unpacks
                    unpack = next((x for x in unpacks
                                   if re.search(r'\b' + arg.name + r'\b', x)),
                                  None)
                    assert unpack
                    # next check the type
                    assert kgen.type_map[arg.dtype] in unpack
                    # and scope, if needed
                    if arg.address_space == scopes.LOCAL:
                        assert 'local' in unpack
                        assert local_work_name in unpack
                        assert 'volatile' in unpack
                    else:
                        assert rhs_work_name in unpack
                    # and in offset
                    assert arg.name in offsets

            def __check_local_unpacks(result, args):
                for i, arg in enumerate(args):
                    # get offset
                    offsets = result.pointer_offsets[arg.name][2]
                    new = kgen._get_local_unpacks(result, [arg])
                    if not new.pointer_unpacks:
                        assert isinstance(arg, lp.TemporaryVariable)
                    else:
                        # and check
                        assert re.search(r'\b' + re.escape(offsets) + r'\b',
                                         new.pointer_unpacks[0])

            # check that all args are in the pointer unpacks
            __check_unpacks(
                result.pointer_unpacks, result.pointer_offsets,
                recordnew.args + recordnew.local + recordnew.host_constants)
            # check unpacks for driver function (note: this isn't the 'local' scope
            # rather, local copies out of the working buffer)
            __check_local_unpacks(result, recordnew.args)
            # next, write a dummy input file, such that we can force the constant
            # memory allocation to zero
            with NamedTemporaryFile(suffix='.yaml', mode='w') as temp:
                temp.write("""
                    memory-limits:
                        constant: 0 B
                    """)
                temp.seek(0)

                # set file
                kgen.mem_limits = temp.name

                # reprocesses
                noconst, mem_limits = kgen._process_memory(record)
                noconst, result = kgen._compress_to_working_buffer(noconst)

                # check that we have an integer workspace
                int_type = to_loopy_type(arc.kint_type, target=kgen.target)
                assert next(
                    (x for x in noconst.kernel_data if x.dtype == int_type),
                    None)

                # and recheck pointer unpacks (including host constants)
                __check_unpacks(
                    result.pointer_unpacks, result.pointer_offsets,
                    recordnew.args + recordnew.local + record.constants)
                __check_local_unpacks(
                    result,
                    recordnew.args + recordnew.local + record.constants)
示例#18
0
    def __init__(self, domains, instructions, args=[], schedule=None,
            name="loopy_kernel",
            preambles=[],
            preamble_generators=[],
            assumptions=None,
            local_sizes={},
            temporary_variables={},
            iname_to_tag={},
            substitutions={},
            function_manglers=[
                default_function_mangler,
                single_arg_function_mangler,
                ],
            symbol_manglers=[],

            iname_slab_increments={},
            loop_priority=[],
            silenced_warnings=[],

            applied_iname_rewrites=[],
            cache_manager=None,
            index_dtype=np.int32,
            options=None,

            state=kernel_state.INITIAL,
            target=None,

            # When kernels get intersected in slab decomposition,
            # their grid sizes shouldn't change. This provides
            # a way to forward sub-kernel grid size requests.
            get_grid_sizes_for_insn_ids=None):

        if cache_manager is None:
            from loopy.kernel.tools import SetOperationCacheManager
            cache_manager = SetOperationCacheManager()

        # {{{ make instruction ids unique

        from loopy.kernel.creation import UniqueName

        insn_ids = set()
        for insn in instructions:
            if insn.id is not None and not isinstance(insn.id, UniqueName):
                if insn.id in insn_ids:
                    raise RuntimeError("duplicate instruction id: %s" % insn.id)
                insn_ids.add(insn.id)

        insn_id_gen = UniqueNameGenerator(insn_ids)

        new_instructions = []

        for insn in instructions:
            if insn.id is None:
                new_instructions.append(
                        insn.copy(id=insn_id_gen("insn")))
            elif isinstance(insn.id, UniqueName):
                new_instructions.append(
                        insn.copy(id=insn_id_gen(insn.id.name)))
            else:
                new_instructions.append(insn)

        instructions = new_instructions
        del new_instructions

        # }}}

        # {{{ process assumptions

        if assumptions is None:
            dom0_space = domains[0].get_space()
            assumptions_space = isl.Space.params_alloc(
                    dom0_space.get_ctx(), dom0_space.dim(dim_type.param))
            for i in range(dom0_space.dim(dim_type.param)):
                assumptions_space = assumptions_space.set_dim_name(
                        dim_type.param, i,
                        dom0_space.get_dim_name(dim_type.param, i))
            assumptions = isl.BasicSet.universe(assumptions_space)

        elif isinstance(assumptions, str):
            assumptions_set_str = "[%s] -> { : %s}" \
                    % (",".join(s for s in self.outer_params(domains)),
                        assumptions)
            assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(),
                    assumptions_set_str)

        assert assumptions.is_params()

        # }}}

        from loopy.types import to_loopy_type
        index_dtype = to_loopy_type(index_dtype).with_target(target)
        if not index_dtype.is_integral():
            raise TypeError("index_dtype must be an integer")
        if np.iinfo(index_dtype.numpy_dtype).min >= 0:
            raise TypeError("index_dtype must be signed")

        if get_grid_sizes_for_insn_ids is not None:
            # overwrites method down below
            self.get_grid_sizes_for_insn_ids = get_grid_sizes_for_insn_ids

        if state not in [
                kernel_state.INITIAL,
                kernel_state.PREPROCESSED,
                kernel_state.SCHEDULED,
                ]:
            raise ValueError("invalid value for 'state'")

        assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains)
        assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT

        RecordWithoutPickling.__init__(self,
                domains=domains,
                instructions=instructions,
                args=args,
                schedule=schedule,
                name=name,
                preambles=preambles,
                preamble_generators=preamble_generators,
                assumptions=assumptions,
                iname_slab_increments=iname_slab_increments,
                loop_priority=loop_priority,
                silenced_warnings=silenced_warnings,
                temporary_variables=temporary_variables,
                local_sizes=local_sizes,
                iname_to_tag=iname_to_tag,
                substitutions=substitutions,
                cache_manager=cache_manager,
                applied_iname_rewrites=applied_iname_rewrites,
                function_manglers=function_manglers,
                symbol_manglers=symbol_manglers,
                index_dtype=index_dtype,
                options=options,
                state=state,
                target=target)
示例#19
0
文件: data.py 项目: inducer/loopy
    def __init__(self, name, dtype=None, shape=(), address_space=None,
            dim_tags=None, offset=0, dim_names=None, strides=None, order=None,
            base_indices=None, storage_shape=None,
            base_storage=None, initializer=None, read_only=False,
            _base_storage_access_may_be_aliasing=False, **kwargs):
        """
        :arg dtype: :class:`loopy.auto` or a :class:`numpy.dtype`
        :arg shape: :class:`loopy.auto` or a shape tuple
        :arg base_indices: :class:`loopy.auto` or a tuple of base indices
        """

        scope = kwargs.pop("scope", None)
        if scope is not None:
            warn("Passing 'scope' is deprecated. Use 'address_space' instead.",
                    DeprecationWarning, stacklevel=2)

            if address_space is not None:
                raise ValueError("only one of 'scope' and 'address_space' "
                        "may be specified")
            else:
                address_space = scope

        del scope

        if address_space is None:
            address_space = auto

        if address_space is None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "address_space must not be None"
                    % name)

        if initializer is None:
            pass
        elif isinstance(initializer, np.ndarray):
            if offset != 0:
                raise LoopyError(
                        "temporary variable '%s': "
                        "offset must be 0 if initializer specified"
                        % name)

            from loopy.types import NumpyType, to_loopy_type
            if dtype is auto or dtype is None:
                dtype = NumpyType(initializer.dtype)
            elif to_loopy_type(dtype) != to_loopy_type(initializer.dtype):
                raise LoopyError(
                        "temporary variable '%s': "
                        "dtype of initializer does not match "
                        "dtype of array."
                        % name)

            if shape is auto:
                shape = initializer.shape

        else:
            raise LoopyError(
                    "temporary variable '%s': "
                    "initializer must be None or a numpy array"
                    % name)

        if order is None:
            order = "C"

        if base_indices is None:
            base_indices = (0,) * len(shape)

        if not read_only and initializer is not None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "read-write variables with initializer "
                    "are not currently supported "
                    "(did you mean to set read_only=True?)"
                    % name)

        if base_storage is not None and initializer is not None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "base_storage and initializer are "
                    "mutually exclusive"
                    % name)

        if base_storage is None and _base_storage_access_may_be_aliasing:
            raise LoopyError(
                    "temporary variable '%s': "
                    "_base_storage_access_may_be_aliasing option, but no "
                    "base_storage given!"
                    % name)

        ArrayBase.__init__(self, name=intern(name),
                dtype=dtype, shape=shape, strides=strides,
                dim_tags=dim_tags, offset=offset, dim_names=dim_names,
                order=order,
                base_indices=base_indices,
                address_space=address_space,
                storage_shape=storage_shape,
                base_storage=base_storage,
                initializer=initializer,
                read_only=read_only,
                _base_storage_access_may_be_aliasing=(
                    _base_storage_access_may_be_aliasing),
                **kwargs)
示例#20
0
    def __init__(self, domains, instructions, args=None, schedule=None,
            name="loopy_kernel",
            preambles=None,
            preamble_generators=None,
            assumptions=None,
            local_sizes=None,
            temporary_variables=None,
            iname_to_tags=None,
            substitutions=None,
            function_manglers=None,
            symbol_manglers=[],

            iname_slab_increments=None,
            loop_priority=frozenset(),
            silenced_warnings=None,

            applied_iname_rewrites=None,
            cache_manager=None,
            index_dtype=np.int32,
            options=None,

            state=KernelState.INITIAL,
            target=None,

            overridden_get_grid_sizes_for_insn_ids=None,
            _cached_written_variables=None):
        """
        :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get
            intersected in slab decomposition, their grid sizes shouldn't
            change. This provides a way to forward sub-kernel grid size requests.
        """

        # {{{ process constructor arguments

        if args is None:
            args = []
        if preambles is None:
            preambles = []
        if preamble_generators is None:
            preamble_generators = []
        if local_sizes is None:
            local_sizes = {}
        if temporary_variables is None:
            temporary_variables = {}
        if iname_to_tags is None:
            iname_to_tags = {}
        if substitutions is None:
            substitutions = {}
        if function_manglers is None:
            function_manglers = [
                default_function_mangler,
                single_arg_function_mangler,
                ]
        if symbol_manglers is None:
            function_manglers = [
                default_function_mangler,
                single_arg_function_mangler,
                ]
        if iname_slab_increments is None:
            iname_slab_increments = {}

        if silenced_warnings is None:
            silenced_warnings = []
        if applied_iname_rewrites is None:
            applied_iname_rewrites = []

        if cache_manager is None:
            from loopy.kernel.tools import SetOperationCacheManager
            cache_manager = SetOperationCacheManager()

        # }}}

        # {{{ process assumptions

        if assumptions is None:
            dom0_space = domains[0].get_space()
            assumptions_space = isl.Space.params_alloc(
                    dom0_space.get_ctx(), dom0_space.dim(dim_type.param))
            for i in range(dom0_space.dim(dim_type.param)):
                assumptions_space = assumptions_space.set_dim_name(
                        dim_type.param, i,
                        dom0_space.get_dim_name(dim_type.param, i))
            assumptions = isl.BasicSet.universe(assumptions_space)

        elif isinstance(assumptions, str):
            assumptions_set_str = "[%s] -> { : %s}" \
                    % (",".join(s for s in self.outer_params(domains)),
                        assumptions)
            assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(),
                    assumptions_set_str)

        assert assumptions.is_params()

        # }}}

        from loopy.types import to_loopy_type
        index_dtype = to_loopy_type(index_dtype, target=target)
        if not index_dtype.is_integral():
            raise TypeError("index_dtype must be an integer")
        if np.iinfo(index_dtype.numpy_dtype).min >= 0:
            raise TypeError("index_dtype must be signed")

        if state not in [
                KernelState.INITIAL,
                KernelState.PREPROCESSED,
                KernelState.SCHEDULED,
                ]:
            raise ValueError("invalid value for 'state'")

        from collections import defaultdict
        assert not isinstance(iname_to_tags, defaultdict)

        for iname, tags in six.iteritems(iname_to_tags):
            # don't tolerate empty sets
            assert tags
            assert isinstance(tags, frozenset)

        assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains)
        assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT

        ImmutableRecordWithoutPickling.__init__(self,
                domains=domains,
                instructions=instructions,
                args=args,
                schedule=schedule,
                name=name,
                preambles=preambles,
                preamble_generators=preamble_generators,
                assumptions=assumptions,
                iname_slab_increments=iname_slab_increments,
                loop_priority=loop_priority,
                silenced_warnings=silenced_warnings,
                temporary_variables=temporary_variables,
                local_sizes=local_sizes,
                iname_to_tags=iname_to_tags,
                substitutions=substitutions,
                cache_manager=cache_manager,
                applied_iname_rewrites=applied_iname_rewrites,
                function_manglers=function_manglers,
                symbol_manglers=symbol_manglers,
                index_dtype=index_dtype,
                options=options,
                state=state,
                target=target,
                overridden_get_grid_sizes_for_insn_ids=(
                    overridden_get_grid_sizes_for_insn_ids),
                _cached_written_variables=_cached_written_variables)

        self._kernel_executor_cache = {}
示例#21
0
文件: __init__.py 项目: arghdos/loopy
    def __init__(
        self,
        domains,
        instructions,
        args=[],
        schedule=None,
        name="loopy_kernel",
        preambles=[],
        preamble_generators=[],
        assumptions=None,
        local_sizes={},
        temporary_variables={},
        iname_to_tag={},
        substitutions={},
        function_manglers=[
            default_function_mangler,
            single_arg_function_mangler,
        ],
        symbol_manglers=[],
        iname_slab_increments={},
        loop_priority=frozenset(),
        silenced_warnings=[],
        applied_iname_rewrites=[],
        cache_manager=None,
        index_dtype=np.int32,
        options=None,
        state=kernel_state.INITIAL,
        target=None,

        # When kernels get intersected in slab decomposition,
        # their grid sizes shouldn't change. This provides
        # a way to forward sub-kernel grid size requests.
        get_grid_sizes_for_insn_ids=None):

        if cache_manager is None:
            from loopy.kernel.tools import SetOperationCacheManager
            cache_manager = SetOperationCacheManager()

        # {{{ make instruction ids unique

        from loopy.kernel.creation import UniqueName

        insn_ids = set()
        for insn in instructions:
            if insn.id is not None and not isinstance(insn.id, UniqueName):
                if insn.id in insn_ids:
                    raise RuntimeError("duplicate instruction id: %s" %
                                       insn.id)
                insn_ids.add(insn.id)

        insn_id_gen = UniqueNameGenerator(insn_ids)

        new_instructions = []

        for insn in instructions:
            if insn.id is None:
                new_instructions.append(insn.copy(id=insn_id_gen("insn")))
            elif isinstance(insn.id, UniqueName):
                new_instructions.append(
                    insn.copy(id=insn_id_gen(insn.id.name)))
            else:
                new_instructions.append(insn)

        instructions = new_instructions
        del new_instructions

        # }}}

        # {{{ process assumptions

        if assumptions is None:
            dom0_space = domains[0].get_space()
            assumptions_space = isl.Space.params_alloc(
                dom0_space.get_ctx(), dom0_space.dim(dim_type.param))
            for i in range(dom0_space.dim(dim_type.param)):
                assumptions_space = assumptions_space.set_dim_name(
                    dim_type.param, i,
                    dom0_space.get_dim_name(dim_type.param, i))
            assumptions = isl.BasicSet.universe(assumptions_space)

        elif isinstance(assumptions, str):
            assumptions_set_str = "[%s] -> { : %s}" \
                    % (",".join(s for s in self.outer_params(domains)),
                        assumptions)
            assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(),
                                                     assumptions_set_str)

        assert assumptions.is_params()

        # }}}

        from loopy.types import to_loopy_type
        index_dtype = to_loopy_type(index_dtype, target=target)
        if not index_dtype.is_integral():
            raise TypeError("index_dtype must be an integer")
        if np.iinfo(index_dtype.numpy_dtype).min >= 0:
            raise TypeError("index_dtype must be signed")

        if get_grid_sizes_for_insn_ids is not None:
            # overwrites method down below
            self.get_grid_sizes_for_insn_ids = get_grid_sizes_for_insn_ids

        if state not in [
                kernel_state.INITIAL,
                kernel_state.PREPROCESSED,
                kernel_state.SCHEDULED,
        ]:
            raise ValueError("invalid value for 'state'")

        assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains)
        assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT

        ImmutableRecordWithoutPickling.__init__(
            self,
            domains=domains,
            instructions=instructions,
            args=args,
            schedule=schedule,
            name=name,
            preambles=preambles,
            preamble_generators=preamble_generators,
            assumptions=assumptions,
            iname_slab_increments=iname_slab_increments,
            loop_priority=loop_priority,
            silenced_warnings=silenced_warnings,
            temporary_variables=temporary_variables,
            local_sizes=local_sizes,
            iname_to_tag=iname_to_tag,
            substitutions=substitutions,
            cache_manager=cache_manager,
            applied_iname_rewrites=applied_iname_rewrites,
            function_manglers=function_manglers,
            symbol_manglers=symbol_manglers,
            index_dtype=index_dtype,
            options=options,
            state=state,
            target=target)

        self._kernel_executor_cache = {}
示例#22
0
文件: data.py 项目: spillai/loopy
    def __init__(self,
                 name,
                 dtype=None,
                 shape=(),
                 scope=auto,
                 dim_tags=None,
                 offset=0,
                 dim_names=None,
                 strides=None,
                 order=None,
                 base_indices=None,
                 storage_shape=None,
                 base_storage=None,
                 initializer=None,
                 read_only=False,
                 **kwargs):
        """
        :arg dtype: :class:`loopy.auto` or a :class:`numpy.dtype`
        :arg shape: :class:`loopy.auto` or a shape tuple
        :arg base_indices: :class:`loopy.auto` or a tuple of base indices
        """

        if initializer is None:
            pass
        elif isinstance(initializer, np.ndarray):
            if offset != 0:
                raise LoopyError("temporary variable '%s': "
                                 "offset must be 0 if initializer specified" %
                                 name)

            from loopy.types import NumpyType, to_loopy_type
            if dtype is auto or dtype is None:
                dtype = NumpyType(initializer.dtype)
            elif to_loopy_type(dtype) != to_loopy_type(initializer.dtype):
                raise LoopyError("temporary variable '%s': "
                                 "dtype of initializer does not match "
                                 "dtype of array." % name)

            if shape is auto:
                shape = initializer.shape

        else:
            raise LoopyError("temporary variable '%s': "
                             "initializer must be None or a numpy array" %
                             name)

        if order is None:
            order = "C"

        if base_indices is None:
            base_indices = (0, ) * len(shape)

        if not read_only and initializer is not None:
            raise LoopyError("temporary variable '%s': "
                             "read-write variables with initializer "
                             "are not currently supported "
                             "(did you mean to set read_only=True?)" % name)

        if base_storage is not None and initializer is not None:
            raise LoopyError("temporary variable '%s': "
                             "base_storage and initializer are "
                             "mutually exclusive" % name)

        ArrayBase.__init__(self,
                           name=intern(name),
                           dtype=dtype,
                           shape=shape,
                           dim_tags=dim_tags,
                           offset=offset,
                           dim_names=dim_names,
                           order=order,
                           base_indices=base_indices,
                           scope=scope,
                           storage_shape=storage_shape,
                           base_storage=base_storage,
                           initializer=initializer,
                           read_only=read_only,
                           **kwargs)
示例#23
0
    def __init__(self,
                 name,
                 dtype=None,
                 shape=None,
                 dim_tags=None,
                 offset=0,
                 dim_names=None,
                 strides=None,
                 order=None,
                 for_atomic=False,
                 target=None,
                 alignment=None,
                 **kwargs):
        """
        All of the following (except *name*) are optional.
        Specify either strides or shape.

        :arg name: When passed to :class:`loopy.make_kernel`, this may contain
            multiple names separated by commas, in which case multiple arguments,
            each with identical properties, are created for each name.

        :arg shape: May be any of the things specified under :attr:`shape`,
            or a string which can be parsed into the previous form.

        :arg dim_tags: A comma-separated list of tags as understood by
            :func:`loopy.kernel.array.parse_array_dim_tags`.

        :arg strides: May be one of the following:

            * None

            * :class:`loopy.auto`. The strides will be determined by *order*
              and the access footprint.

            * a tuple like like :attr:`numpy.ndarray.shape`.

              Each entry of the tuple is also allowed to be a :mod:`pymbolic`
              expression involving kernel parameters, or a (potentially-comma
              separated) or a string that can be parsed to such an expression.

            * A string which can be parsed into the previous form.

        :arg order: "F" or "C" for C (row major) or Fortran
            (column major). Defaults to the *default_order* argument
            passed to :func:`loopy.make_kernel`.
        :arg for_atomic:
            Whether the array is declared for atomic access, and, if necessary,
            using atomic-capable data types.
        :arg offset: (See :attr:`offset`)
        :arg alignment: memory alignment in bytes

        """

        for kwarg_name in kwargs:
            if kwarg_name not in self.allowed_extra_kwargs:
                raise TypeError("invalid kwarg: %s" % kwarg_name)

        import loopy as lp

        from loopy.types import to_loopy_type
        dtype = to_loopy_type(dtype,
                              allow_auto=True,
                              allow_none=True,
                              for_atomic=for_atomic,
                              target=target)

        if dtype is lp.auto:
            from warnings import warn
            warn(
                "Argument/temporary data type for '%s' should be None if "
                "unspecified, not auto. This usage will be disallowed in 2018."
                % name,
                DeprecationWarning,
                stacklevel=2)

            dtype = None

        strides_known = strides is not None and strides is not lp.auto
        shape_known = shape is not None and shape is not lp.auto

        if strides_known:
            strides = _parse_shape_or_strides(strides)

        if shape_known:
            shape = _parse_shape_or_strides(shape)

        # {{{ check dim_names

        if dim_names is not None:
            if len(dim_names) != len(set(dim_names)):
                raise LoopyError("dim_names are not unique")

            for n in dim_names:
                if not isinstance(n, str):
                    raise LoopyError("found non-string '%s' in dim_names" %
                                     type(n).__name__)

        # }}}

        # {{{ convert strides to dim_tags (Note: strides override order)

        if dim_tags is not None and strides_known:
            raise TypeError("may not specify both strides and dim_tags")

        if dim_tags is None and strides_known:
            dim_tags = [FixedStrideArrayDimTag(s) for s in strides]
            strides = None

        # }}}

        if dim_tags is not None:
            dim_tags = parse_array_dim_tags(
                dim_tags,
                n_axes=(len(shape) if shape_known else None),
                use_increasing_target_axes=self.max_target_axes > 1,
                dim_names=dim_names)

        # {{{ determine number of user axes

        num_user_axes = None
        if shape_known:
            num_user_axes = len(shape)
        for dim_iterable in [dim_tags, dim_names]:
            if dim_iterable is not None:
                new_num_user_axes = len(dim_iterable)

                if num_user_axes is None:
                    num_user_axes = new_num_user_axes
                else:
                    if new_num_user_axes != num_user_axes:
                        raise LoopyError(
                            "contradictory values for number of "
                            "dimensions of array '%s' from shape, strides, "
                            "dim_tags, or dim_names" % name)

                del new_num_user_axes

        # }}}

        # {{{ convert order to dim_tags

        if order is None and self.max_target_axes > 1:
            # FIXME: Hackety hack. ImageArgs need to generate dim_tags even
            # if no order is specified. Plus they don't care that much.
            order = "C"

        if dim_tags is None and num_user_axes is not None and order is not None:
            dim_tags = parse_array_dim_tags(
                num_user_axes * [order],
                n_axes=num_user_axes,
                use_increasing_target_axes=self.max_target_axes > 1,
                dim_names=dim_names)
            order = None

        # }}}

        if dim_tags is not None:
            # {{{ find number of target axes

            target_axes = set()
            for dim_tag in dim_tags:
                if isinstance(dim_tag, _StrideArrayDimTagBase):
                    target_axes.add(dim_tag.target_axis)

            if target_axes != set(range(len(target_axes))):
                raise LoopyError("target axes for variable '%s' are non-"
                                 "contiguous" % self.name)

            num_target_axes = len(target_axes)
            del target_axes

            # }}}

            if not (self.min_target_axes <= num_target_axes <=
                    self.max_target_axes):
                raise LoopyError(
                    "%s only supports between %d and %d target axes "
                    "('%s' has %d)" %
                    (type(self).__name__, self.min_target_axes,
                     self.max_target_axes, self.name, num_target_axes))

            new_dim_tags = convert_computed_to_fixed_dim_tags(
                name, num_user_axes, num_target_axes, shape, dim_tags)

            if new_dim_tags is not None:
                # successfully normalized
                dim_tags = new_dim_tags
                del new_dim_tags

        if dim_tags is not None:
            # for hashability
            dim_tags = tuple(dim_tags)
            order = None

        if strides is not None:
            # Preserve strides if we weren't able to process them yet.
            # That only happens if they're set to loopy.auto (and 'guessed'
            # in loopy.kernel.creation).

            kwargs["strides"] = strides

        if dim_names is not None and not isinstance(dim_names, tuple):
            from warnings import warn
            warn("dim_names is not a tuple when calling ArrayBase constructor",
                 DeprecationWarning,
                 stacklevel=2)

        ImmutableRecord.__init__(self,
                                 name=name,
                                 dtype=dtype,
                                 shape=shape,
                                 dim_tags=dim_tags,
                                 offset=offset,
                                 dim_names=dim_names,
                                 order=order,
                                 alignment=alignment,
                                 for_atomic=for_atomic,
                                 **kwargs)
示例#24
0
 def __compare(d1, d2):
     # compare dtypes ignoring atomic
     return to_loopy_type(d1, for_atomic=True) == \
         to_loopy_type(d2, for_atomic=True)