Exemplo n.º 1
0
    def function_prepared_async_call(func, grid, block, stream, *args,
                                     **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn
            warn(
                "Not passing the block size to prepared_async_call is deprecated as of "
                "version 2011.1.",
                DeprecationWarning,
                stacklevel=2)
            args = (stream, ) + args
            stream = block

        from pycuda._pvt_struct import pack
        func._param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        if stream is None:
            func._launch_grid(*grid)
        else:
            grid_x, grid_y = grid
            func._launch_grid_async(grid_x, grid_y, stream)
Exemplo n.º 2
0
    def function_prepared_call_pre_v4(func, grid, block, *args, **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn

            warn(
                "Not passing the block size to prepared_call is deprecated as of "
                "version 2011.1.",
                DeprecationWarning,
                stacklevel=2,
            )
            args = (block, ) + args

        shared_size = kwargs.pop("shared_size", None)
        if shared_size is not None:
            func._set_shared_size(shared_size)

        if kwargs:
            raise TypeError("unknown keyword arguments: " +
                            ", ".join(kwargs.keys()))

        from pycuda._pvt_struct import pack

        func._param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        func._launch_grid(*grid)
Exemplo n.º 3
0
    def function_prepared_async_call(func, grid, block, stream, *args, **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn

            warn(
                "Not passing the block size to prepared_async_call is deprecated as of " "version 2011.1.",
                DeprecationWarning,
                stacklevel=2,
            )
            args = (stream,) + args
            stream = block

        from pycuda._pvt_struct import pack

        func._param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        if stream is None:
            func._launch_grid(*grid)
        else:
            grid_x, grid_y = grid
            func._launch_grid_async(grid_x, grid_y, stream)
Exemplo n.º 4
0
    def function_prepared_timed_call(func, grid, block, *args, **kwargs):
        shared_size = kwargs.pop("shared_size", 0)
        if kwargs:
            raise TypeError("unknown keyword arguments: " +
                            ", ".join(kwargs.keys()))

        from pycuda._pvt_struct import pack

        arg_buf = pack(func.arg_format, *args)

        for texref in func.texrefs:
            func.param_set_texref(texref)

        start = Event()
        end = Event()

        start.record()
        func._launch_kernel(grid, block, arg_buf, shared_size, None)
        end.record()

        def get_call_time():
            end.synchronize()
            return end.time_since(start) * 1e-3

        return get_call_time
Exemplo n.º 5
0
    def function_prepared_timed_call_pre_v4(func, grid, block, *args, **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn
            warn("Not passing the block size to prepared_timed_call is deprecated as of "
                    "version 2011.1.", DeprecationWarning, stacklevel=2)
            args = (block,) + args

        shared_size = kwargs.pop("shared_size", None)
        if shared_size is not None:
            func._set_shared_size(shared_size)

        if kwargs:
            raise TypeError("unknown keyword arguments: "
                    + ", ".join(kwargs.iterkeys()))

        from pycuda._pvt_struct import pack
        func._param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        start = Event()
        end = Event()

        start.record()
        func._launch_grid(*grid)
        end.record()

        def get_call_time():
            end.synchronize()
            return end.time_since(start)*1e-3

        return get_call_time
Exemplo n.º 6
0
    def function_prepared_async_call(func, grid, block, stream, *args,
                                     **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn

            warn(
                "Not passing the block size to prepared_async_call is "
                "deprecated as of version 2011.1.",
                DeprecationWarning,
                stacklevel=2,
            )
            args = (stream, ) + args
            stream = block

        shared_size = kwargs.pop("shared_size", 0)

        if kwargs:
            raise TypeError("unknown keyword arguments: " +
                            ", ".join(kwargs.keys()))

        from pycuda._pvt_struct import pack

        arg_buf = pack(func.arg_format, *args)

        for texref in func.texrefs:
            func.param_set_texref(texref)

        func._launch_kernel(grid, block, arg_buf, shared_size, stream)
Exemplo n.º 7
0
    def function_prepared_timed_call(func, grid, block, *args, **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn
            warn("Not passing the block size to prepared_timed_call is deprecated as of "
                    "version 2011.1.", DeprecationWarning, stacklevel=2)
            args = (block,) + args

        from pycuda._pvt_struct import pack
        func._param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        start = Event()
        end = Event()

        start.record()
        func._launch_grid(*grid)
        end.record()

        def get_call_time():
            end.synchronize()
            return end.time_since(start)*1e-3

        return get_call_time
Exemplo n.º 8
0
    def _build_arg_buf(args):
        handlers = []

        arg_data = []
        format = ""
        for i, arg in enumerate(args):
            if isinstance(arg, np.number):
                arg_data.append(arg)
                format += arg.dtype.char
            elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)):
                arg_data.append(int(arg))
                format += "P"
            elif isinstance(arg, ArgumentHandler):
                handlers.append(arg)
                arg_data.append(int(arg.get_device_alloc()))
                format += "P"
            elif isinstance(arg, np.ndarray):
                arg_data.append(arg)
                format += "%ds" % arg.nbytes
            else:
                try:
                    gpudata = np.intp(arg.gpudata)
                except AttributeError:
                    raise TypeError("invalid type on parameter #%d (0-based)" % i)
                else:
                    # for gpuarrays
                    arg_data.append(int(gpudata))
                    format += "P"

        from pycuda._pvt_struct import pack
        return handlers, pack(format, *arg_data)
Exemplo n.º 9
0
    def function_prepared_async_call_pre_v4(func, grid, block, stream, *args, **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn
            warn("Not passing the block size to prepared_async_call is deprecated as of "
                    "version 2011.1.", DeprecationWarning, stacklevel=2)
            args = (stream,) + args
            stream = block

        shared_size = kwargs.pop("shared_size", None)
        if shared_size is not None:
            func._set_shared_size(shared_size)

        if kwargs:
            raise TypeError("unknown keyword arguments: "
                    + ", ".join(kwargs.iterkeys()))

        from pycuda._pvt_struct import pack
        func._param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        if stream is None:
            func._launch_grid(*grid)
        else:
            grid_x, grid_y = grid
            func._launch_grid_async(grid_x, grid_y, stream)
Exemplo n.º 10
0
    def _build_arg_buf(args):
        handlers = []

        arg_data = []
        format = ""
        for i, arg in enumerate(args):
            if isinstance(arg, np.number):
                arg_data.append(arg)
                format += arg.dtype.char
            elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)):
                arg_data.append(int(arg))
                format += "P"
            elif isinstance(arg, ArgumentHandler):
                handlers.append(arg)
                arg_data.append(int(arg.get_device_alloc()))
                format += "P"
            elif isinstance(arg, np.ndarray):
                arg_data.append(arg)
                format += "%ds" % arg.nbytes
            else:
                try:
                    gpudata = np.intp(arg.gpudata)
                except AttributeError:
                    raise TypeError("invalid type on parameter #%d (0-based)" % i)
                else:
                    # for gpuarrays
                    arg_data.append(int(gpudata))
                    format += "P"

        from pycuda._pvt_struct import pack
        return handlers, pack(format, *arg_data)
Exemplo n.º 11
0
    def function_prepared_timed_call(func, grid, block, *args, **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn
            warn(
                "Not passing the block size to prepared_timed_call is deprecated as of "
                "version 2011.1.",
                DeprecationWarning,
                stacklevel=2)
            args = (block, ) + args

        from pycuda._pvt_struct import pack
        func._param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        start = Event()
        end = Event()

        start.record()
        func._launch_grid(*grid)
        end.record()

        def get_call_time():
            end.synchronize()
            return end.time_since(start) * 1e-3

        return get_call_time
Exemplo n.º 12
0
    def function_prepared_async_call(func, grid, block, stream, *args, **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn

            warn(
                "Not passing the block size to prepared_async_call is deprecated as of " "version 2011.1.",
                DeprecationWarning,
                stacklevel=2,
            )
            args = (stream,) + args
            stream = block

        shared_size = kwargs.pop("shared_size", 0)

        if kwargs:
            raise TypeError("unknown keyword arguments: " + ", ".join(kwargs.iterkeys()))

        from pycuda._pvt_struct import pack

        arg_buf = pack(func.arg_format, *args)

        for texref in func.texrefs:
            func.param_set_texref(texref)

        func._launch_kernel(grid, block, arg_buf, shared_size, stream)
Exemplo n.º 13
0
    def function_prepared_call(func, grid, *args):
        from pycuda._pvt_struct import pack
        func.param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        func.launch_grid(*grid)
Exemplo n.º 14
0
    def function_prepared_call(func, grid, *args):
        from pycuda._pvt_struct import pack
        func.param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        func.launch_grid(*grid)
Exemplo n.º 15
0
    def function_prepared_async_call(func, grid, stream, *args):
        from pycuda._pvt_struct import pack
        func.param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        if stream is None:
            func.launch_grid(*grid)
        else:
            grid_x, grid_y = grid
            func.launch_grid_async(grid_x, grid_y, stream)
Exemplo n.º 16
0
    def function_prepared_async_call(func, grid, stream, *args):
        from pycuda._pvt_struct import pack
        func.param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        if stream is None:
            func.launch_grid(*grid)
        else:
            grid_x, grid_y = grid
            func.launch_grid_async(grid_x, grid_y, stream)
Exemplo n.º 17
0
    def _build_arg_buf(args):
        handlers = []

        arg_data = []
        format = ""
        for i, arg in enumerate(args):
            if isinstance(arg, np.number):
                arg_data.append(arg)
                format += arg.dtype.char
            elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)):
                arg_data.append(int(arg))
                format += "P"
            elif isinstance(arg, ArgumentHandler):
                handlers.append(arg)
                arg_data.append(int(arg.get_device_alloc()))
                format += "P"
            elif isinstance(arg, np.ndarray):
                if isinstance(arg.base, ManagedAllocationOrStub):
                    arg_data.append(int(arg.base))
                    format += "P"
                else:
                    arg_data.append(arg)
                    format += "%ds" % arg.nbytes
            elif isinstance(arg, np.void):
                arg_data.append(_my_bytes(_memoryview(arg)))
                format += "%ds" % arg.itemsize
            else:
                cai = getattr(arg, "__cuda_array_interface__", None)
                if cai:
                    arg_data.append(cai["data"][0])
                    format += "P"
                    continue

                try:
                    gpudata = np.uintp(arg.gpudata)
                except AttributeError:
                    raise TypeError("invalid type on parameter #%d (0-based)" %
                                    i)
                else:
                    # for gpuarrays
                    arg_data.append(int(gpudata))
                    format += "P"

        from pycuda._pvt_struct import pack

        return handlers, pack(format, *arg_data)
Exemplo n.º 18
0
    def function_prepared_timed_call(func, grid, *args):
        from pycuda._pvt_struct import pack
        func.param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        start = Event()
        end = Event()

        start.record()
        func.launch_grid(*grid)
        end.record()

        def get_call_time():
            end.synchronize()
            return end.time_since(start) * 1e-3

        return get_call_time
Exemplo n.º 19
0
    def function_prepared_timed_call(func, grid, *args):
        from pycuda._pvt_struct import pack
        func.param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        start = Event()
        end = Event()
        
        start.record()
        func.launch_grid(*grid)
        end.record()

        def get_call_time():
            end.synchronize()
            return end.time_since(start)*1e-3

        return get_call_time
Exemplo n.º 20
0
    def function_param_set(func, *args):
        try:
            import numpy
        except ImportError:
            numpy = None

        handlers = []

        arg_data = []
        format = ""
        for i, arg in enumerate(args):
            if numpy is not None and isinstance(arg, numpy.number):
                arg_data.append(arg)
                format += arg.dtype.char
            elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)):
                arg_data.append(int(arg))
                format += "P"
            elif isinstance(arg, ArgumentHandler):
                handlers.append(arg)
                arg_data.append(int(arg.get_device_alloc()))
                format += "P"
            elif isinstance(arg, buffer):
                arg_data.append(arg)
                format += "s"
            else:
                try:
                    gpudata = arg.gpudata
                except AttributeError:
                    raise TypeError("invalid type on parameter #%d (0-based)" %
                                    i)
                else:
                    # for gpuarrays
                    arg_data.append(int(gpudata))
                    format += "P"

        from pycuda._pvt_struct import pack
        buf = pack(format, *arg_data)

        func.param_setv(0, buf)
        func.param_set_size(len(buf))

        return handlers
Exemplo n.º 21
0
    def function_param_set(func, *args):
        try:
            import numpy
        except ImportError:
            numpy = None

        handlers = []

        arg_data = []
        format = ""
        for i, arg in enumerate(args):
            if numpy is not None and isinstance(arg, numpy.number):
                arg_data.append(arg)
                format += arg.dtype.char
            elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)):
                arg_data.append(int(arg))
                format += "P"
            elif isinstance(arg, ArgumentHandler):
                handlers.append(arg)
                arg_data.append(int(arg.get_device_alloc()))
                format += "P"
            elif isinstance(arg, buffer):
                arg_data.append(arg)
                format += "s"
            else:
                try:
                    gpudata = arg.gpudata
                except AttributeError:
                    raise TypeError("invalid type on parameter #%d (0-based)" % i)
                else:
                    # for gpuarrays
                    arg_data.append(int(gpudata))
                    format += "P"

        from pycuda._pvt_struct import pack

        buf = pack(format, *arg_data)

        func.param_setv(0, buf)
        func.param_set_size(len(buf))

        return handlers
Exemplo n.º 22
0
    def function_prepared_timed_call_pre_v4(func, grid, block, *args,
                                            **kwargs):
        if isinstance(block, tuple):
            func._set_block_shape(*block)
        else:
            from warnings import warn
            warn(
                "Not passing the block size to prepared_timed_call is "
                "deprecated as of version 2011.1.",
                DeprecationWarning,
                stacklevel=2)
            args = (block, ) + args

        shared_size = kwargs.pop("shared_size", None)
        if shared_size is not None:
            func._set_shared_size(shared_size)

        if kwargs:
            raise TypeError("unknown keyword arguments: " +
                            ", ".join(six.iterkeys(kwargs)))

        from pycuda._pvt_struct import pack
        func._param_setv(0, pack(func.arg_format, *args))

        for texref in func.texrefs:
            func.param_set_texref(texref)

        start = Event()
        end = Event()

        start.record()
        func._launch_grid(*grid)
        end.record()

        def get_call_time():
            end.synchronize()
            return end.time_since(start) * 1e-3

        return get_call_time
Exemplo n.º 23
0
    def function_prepared_timed_call(func, grid, block, *args, **kwargs):
        shared_size = kwargs.pop("shared_size", 0)
        if kwargs:
            raise TypeError("unknown keyword arguments: "
                    + ", ".join(kwargs.iterkeys()))

        from pycuda._pvt_struct import pack
        arg_buf = pack(func.arg_format, *args)

        for texref in func.texrefs:
            func.param_set_texref(texref)

        start = Event()
        end = Event()

        start.record()
        func._launch_kernel(grid, block, arg_buf, shared_size, None)
        end.record()

        def get_call_time():
            end.synchronize()
            return end.time_since(start)*1e-3

        return get_call_time