Пример #1
0
def make_ref_args(kernel, impl_arg_info, queue, parameters):
    import pyopencl as cl
    import pyopencl.array as cl_array

    from loopy.kernel.data import ValueArg, ArrayArg, ImageArg, \
            TemporaryVariable, ConstantArg

    from pymbolic import evaluate

    ref_args = {}
    ref_arg_data = []

    for arg in impl_arg_info:
        kernel_arg = kernel.impl_arg_to_arg.get(arg.name)

        if arg.arg_class is ValueArg:
            if arg.offset_for_name:
                continue

            arg_value = parameters[arg.name]

            try:
                argv_dtype = arg_value.dtype
            except AttributeError:
                argv_dtype = None

            if argv_dtype != arg.dtype:
                arg_value = arg.dtype.numpy_dtype.type(arg_value)

            ref_args[arg.name] = arg_value

            ref_arg_data.append(None)

        elif arg.arg_class is ArrayArg or arg.arg_class is ImageArg \
                or arg.arg_class is ConstantArg:
            if arg.shape is None or any(saxis is None for saxis in arg.shape):
                raise LoopyError(
                    "array '%s' needs known shape to use automatic "
                    "testing" % arg.name)

            shape = evaluate_shape(arg.unvec_shape, parameters)
            dtype = kernel_arg.dtype

            is_output = arg.base_name in kernel.get_written_variables()

            if arg.arg_class is ImageArg:
                storage_array = ary = cl_array.empty(queue,
                                                     shape,
                                                     dtype,
                                                     order="C")
                numpy_strides = None
                alloc_size = None
                strides = None
            else:
                strides = evaluate(arg.unvec_strides, parameters)

                alloc_size = sum(astrd * (alen - 1) if astrd != 0 else alen - 1
                                 for alen, astrd in zip(shape, strides)) + 1

                if dtype is None:
                    raise LoopyError("dtype for argument '%s' is not yet "
                                     "known. Perhaps you want to use "
                                     "loopy.add_dtypes "
                                     "or loopy.infer_argument_dtypes?" %
                                     arg.name)

                itemsize = dtype.itemsize
                numpy_strides = [itemsize * s for s in strides]

                storage_array = cl_array.empty(queue, alloc_size, dtype)

            if is_output and arg.arg_class is ImageArg:
                raise LoopyError("write-mode images not supported in "
                                 "automatic testing")

            fill_rand(storage_array)

            if arg.arg_class is ImageArg:
                # must be contiguous
                pre_run_ary = pre_run_storage_array = storage_array.copy()

                ref_args[arg.name] = cl.image_from_array(
                    queue.context, ary.get())
            else:
                pre_run_storage_array = storage_array.copy()

                ary = cl_array.as_strided(storage_array, shape, numpy_strides)
                pre_run_ary = cl_array.as_strided(pre_run_storage_array, shape,
                                                  numpy_strides)
                ref_args[arg.name] = ary

            ref_arg_data.append(
                TestArgInfo(name=arg.name,
                            ref_array=ary,
                            ref_storage_array=storage_array,
                            ref_pre_run_array=pre_run_ary,
                            ref_pre_run_storage_array=pre_run_storage_array,
                            ref_shape=shape,
                            ref_strides=strides,
                            ref_alloc_size=alloc_size,
                            ref_numpy_strides=numpy_strides,
                            needs_checking=is_output))

        elif arg.arg_class is TemporaryVariable:
            # global temporary, handled by invocation logic
            pass

        else:
            raise LoopyError("arg type %s not understood" % type(arg))

    return ref_args, ref_arg_data
Пример #2
0
def make_ref_args(kernel, impl_arg_info, queue, parameters):
    import pyopencl as cl
    import pyopencl.array as cl_array

    from loopy.kernel.data import ValueArg, GlobalArg, ImageArg, TemporaryVariable

    from pymbolic import evaluate

    ref_args = {}
    ref_arg_data = []

    for arg in impl_arg_info:
        kernel_arg = kernel.impl_arg_to_arg.get(arg.name)

        if arg.arg_class is ValueArg:
            if arg.offset_for_name:
                continue

            arg_value = parameters[arg.name]

            try:
                argv_dtype = arg_value.dtype
            except AttributeError:
                argv_dtype = None

            if argv_dtype != arg.dtype:
                arg_value = arg.dtype.numpy_dtype.type(arg_value)

            ref_args[arg.name] = arg_value

            ref_arg_data.append(None)

        elif arg.arg_class is GlobalArg or arg.arg_class is ImageArg:
            if arg.shape is None or any(saxis is None for saxis in arg.shape):
                raise LoopyError("array '%s' needs known shape to use automatic "
                        "testing" % arg.name)

            shape = evaluate_shape(arg.unvec_shape, parameters)
            dtype = kernel_arg.dtype

            is_output = arg.base_name in kernel.get_written_variables()

            if arg.arg_class is ImageArg:
                storage_array = ary = cl_array.empty(
                        queue, shape, dtype, order="C")
                numpy_strides = None
                alloc_size = None
                strides = None
            else:
                strides = evaluate(arg.unvec_strides, parameters)

                from pytools import all
                assert all(s > 0 for s in strides)
                alloc_size = sum(astrd*(alen-1)
                        for alen, astrd in zip(shape, strides)) + 1

                if dtype is None:
                    raise LoopyError("dtype for argument '%s' is not yet "
                            "known. Perhaps you want to use "
                            "loopy.add_dtypes "
                            "or loopy.infer_argument_dtypes?"
                            % arg.name)

                itemsize = dtype.itemsize
                numpy_strides = [itemsize*s for s in strides]

                storage_array = cl_array.empty(queue, alloc_size, dtype)

            if is_output and arg.arg_class is ImageArg:
                raise LoopyError("write-mode images not supported in "
                        "automatic testing")

            fill_rand(storage_array)

            if arg.arg_class is ImageArg:
                # must be contiguous
                pre_run_ary = pre_run_storage_array = storage_array.copy()

                ref_args[arg.name] = cl.image_from_array(
                        queue.context, ary.get())
            else:
                pre_run_storage_array = storage_array.copy()

                ary = cl_array.as_strided(storage_array, shape, numpy_strides)
                pre_run_ary = cl_array.as_strided(
                        pre_run_storage_array, shape, numpy_strides)
                ref_args[arg.name] = ary

            ref_arg_data.append(
                    TestArgInfo(
                        name=arg.name,
                        ref_array=ary,
                        ref_storage_array=storage_array,

                        ref_pre_run_array=pre_run_ary,
                        ref_pre_run_storage_array=pre_run_storage_array,

                        ref_shape=shape,
                        ref_strides=strides,
                        ref_alloc_size=alloc_size,
                        ref_numpy_strides=numpy_strides,
                        needs_checking=is_output))

        elif arg.arg_class is TemporaryVariable:
            # global temporary, handled by invocation logic
            pass

        else:
            raise LoopyError("arg type not understood")

    return ref_args, ref_arg_data
Пример #3
0
def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters):
    import pyopencl as cl
    import pyopencl.array as cl_array

    from loopy.kernel.data import ValueArg, ArrayArg, ImageArg,\
            TemporaryVariable, ConstantArg

    from pymbolic import evaluate

    args = {}
    for arg, arg_desc in zip(impl_arg_info, ref_arg_data):
        kernel_arg = kernel.impl_arg_to_arg.get(arg.name)

        if arg.arg_class is ValueArg:
            arg_value = parameters[arg.name]

            try:
                argv_dtype = arg_value.dtype
            except AttributeError:
                argv_dtype = None

            if argv_dtype != arg.dtype:
                arg_value = arg.dtype.numpy_dtype.type(arg_value)

            args[arg.name] = arg_value

        elif arg.arg_class is ImageArg:
            if arg.name in kernel.get_written_variables():
                raise NotImplementedError("write-mode images not supported in "
                                          "automatic testing")

            shape = evaluate_shape(arg.unvec_shape, parameters)
            assert shape == arg_desc.ref_shape

            # must be contiguous
            args[arg.name] = cl.image_from_array(
                queue.context, arg_desc.ref_pre_run_array.get())

        elif arg.arg_class is ArrayArg or\
                arg.arg_class is ConstantArg:
            shape = evaluate(arg.unvec_shape, parameters)
            strides = evaluate(arg.unvec_strides, parameters)

            dtype = kernel_arg.dtype
            itemsize = dtype.itemsize
            numpy_strides = [itemsize * s for s in strides]

            alloc_size = sum(astrd * (alen - 1) if astrd != 0 else alen - 1
                             for alen, astrd in zip(shape, strides)) + 1

            # use contiguous array to transfer to host
            host_ref_contig_array = arg_desc.ref_pre_run_storage_array.get()

            # use device shape/strides
            from pyopencl.compyte.array import as_strided
            host_ref_array = as_strided(host_ref_contig_array,
                                        arg_desc.ref_shape,
                                        arg_desc.ref_numpy_strides)

            # flatten the thing
            host_ref_flat_array = host_ref_array.flatten()

            # create host array with test shape (but not strides)
            host_contig_array = np.empty(shape, dtype=dtype)

            common_len = min(len(host_ref_flat_array),
                             len(host_contig_array.ravel()))
            host_contig_array.ravel()[:common_len] = \
                    host_ref_flat_array[:common_len]

            # create host array with test shape and storage layout
            host_storage_array = np.empty(alloc_size, dtype)
            host_array = as_strided(host_storage_array, shape, numpy_strides)
            host_array[...] = host_contig_array

            host_contig_array = arg_desc.ref_storage_array.get()
            storage_array = cl_array.to_device(queue, host_storage_array)
            ary = cl_array.as_strided(storage_array, shape, numpy_strides)

            args[arg.name] = ary

            arg_desc.test_storage_array = storage_array
            arg_desc.test_array = ary
            arg_desc.test_shape = shape
            arg_desc.test_strides = strides
            arg_desc.test_numpy_strides = numpy_strides
            arg_desc.test_alloc_size = alloc_size

        elif arg.arg_class is TemporaryVariable:
            # global temporary, handled by invocation logic
            pass

        else:
            raise LoopyError("arg type not understood")

    return args
Пример #4
0
def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters):
    import pyopencl as cl
    import pyopencl.array as cl_array

    from loopy.kernel.data import ValueArg, GlobalArg, ImageArg, TemporaryVariable

    from pymbolic import evaluate

    args = {}
    for arg, arg_desc in zip(impl_arg_info, ref_arg_data):
        kernel_arg = kernel.impl_arg_to_arg.get(arg.name)

        if arg.arg_class is ValueArg:
            arg_value = parameters[arg.name]

            try:
                argv_dtype = arg_value.dtype
            except AttributeError:
                argv_dtype = None

            if argv_dtype != arg.dtype:
                arg_value = arg.dtype.numpy_dtype.type(arg_value)

            args[arg.name] = arg_value

        elif arg.arg_class is ImageArg:
            if arg.name in kernel.get_written_variables():
                raise NotImplementedError("write-mode images not supported in "
                        "automatic testing")

            shape = evaluate_shape(arg.unvec_shape, parameters)
            assert shape == arg_desc.ref_shape

            # must be contiguous
            args[arg.name] = cl.image_from_array(
                    queue.context, arg_desc.ref_pre_run_array.get())

        elif arg.arg_class is GlobalArg:
            shape = evaluate(arg.unvec_shape, parameters)
            strides = evaluate(arg.unvec_strides, parameters)

            dtype = kernel_arg.dtype
            itemsize = dtype.itemsize
            numpy_strides = [itemsize*s for s in strides]

            assert all(s > 0 for s in strides)
            alloc_size = sum(astrd*(alen-1)
                    for alen, astrd in zip(shape, strides)) + 1

            # use contiguous array to transfer to host
            host_ref_contig_array = arg_desc.ref_pre_run_storage_array.get()

            # use device shape/strides
            from pyopencl.compyte.array import as_strided
            host_ref_array = as_strided(host_ref_contig_array,
                    arg_desc.ref_shape, arg_desc.ref_numpy_strides)

            # flatten the thing
            host_ref_flat_array = host_ref_array.flatten()

            # create host array with test shape (but not strides)
            host_contig_array = np.empty(shape, dtype=dtype)

            common_len = min(
                    len(host_ref_flat_array),
                    len(host_contig_array.ravel()))
            host_contig_array.ravel()[:common_len] = \
                    host_ref_flat_array[:common_len]

            # create host array with test shape and storage layout
            host_storage_array = np.empty(alloc_size, dtype)
            host_array = as_strided(
                    host_storage_array, shape, numpy_strides)
            host_array[...] = host_contig_array

            host_contig_array = arg_desc.ref_storage_array.get()
            storage_array = cl_array.to_device(queue, host_storage_array)
            ary = cl_array.as_strided(storage_array, shape, numpy_strides)

            args[arg.name] = ary

            arg_desc.test_storage_array = storage_array
            arg_desc.test_array = ary
            arg_desc.test_shape = shape
            arg_desc.test_strides = strides
            arg_desc.test_numpy_strides = numpy_strides
            arg_desc.test_alloc_size = alloc_size

        elif arg.arg_class is TemporaryVariable:
            # global temporary, handled by invocation logic
            pass

        else:
            raise LoopyError("arg type not understood")

    return args