示例#1
0
def _init_devices():
    """Internal function that is used to initialize the Python mapping of
       physical offload targets to a Python dictionary.
    """
    no_of_devices = number_of_devices()
    debug(5, "found {0} device(s)", no_of_devices)
    device_dict = {}
    for d in range(0, no_of_devices):
        device_dict[d] = OffloadDevice(d)
    return device_dict
示例#2
0
def _init_devices():
    """Internal function that is used to initialize the Python mapping of
       physical offload targets to a Python dictionary.
    """
    no_of_devices = number_of_devices()
    debug(5, "found {0} device(s)", no_of_devices)
    device_dict = {}
    for d in range(0, no_of_devices):
        device_dict[d] = OffloadDevice(d)
    return device_dict
示例#3
0
    def __init__(self, device=None):
        # save a reference to the device
        assert device is not None
        self._device = device
        self._device_id = device.device_id

        # construct the stream
        self._stream_id = pymic_stream_create(self._device_id, 'stream')
        debug(1,
              'created stream 0x{0:x} for device {1}'.format(self._stream_id,
                                                             self._device_id))
示例#4
0
    def __init__(self, device=None):
        # save a reference to the device
        assert device is not None
        self._device = device
        self._device_id = device.device_id

        # construct the stream
        self._stream_id = pymic_stream_create(self._device_id, 'stream')
        debug(1,
              'created stream 0x{0:x} for device {1}'.format(self._stream_id,
                                                             self._device_id))
示例#5
0
文件: _tracing.py 项目: 01org/pyMIC
    def _trace_func(func):
        funcname = func.__name__
        debug(5, "collecting statistics for {0}", funcname)

        def wrapper(*args, **kwargs):
            tstart = timeit.default_timer()
            rv = func(*args, **kwargs)
            tend = timeit.default_timer()
            stack_info = _stack_walk_func()
            _trace_database.register(funcname, tstart, tend,
                                     args, kwargs, stack_info)
            return rv
        return wrapper
示例#6
0
    def __init__(self, library, device=None):
        """Initialize this OffloadLibrary instance.  This function is not to be
           called from outside pymic.
        """

        # safety checks
        assert device is not None

        # bookkeeping
        self._library = library
        self._device = device
        self._device_id = device._map_dev_id()
        self.unloader = pymic_library_unload
        self._cache = {}

        # locate the library on the host file system
        debug(5, "searching for {0} in {1}", library, config._search_path)
        filename = OffloadLibrary._find_library(library)
        if filename is None:
            debug(5, "no suitable library found for '{0}'", library)
            raise OffloadError("Cannot find library '{0}' "
                               "in PYMIC_LIBRARY_PATH".format(library))

        # load the library and memorize handle
        debug(5, "loading '{0}' on device {1}", filename, self._device_id)
        self._handle, self._tempfile = pymic_library_load(self._device_id,
                                                          filename)
        debug(5, "successfully loaded '{0}' on device {1} with handle 0x{2:x}",
              filename, self._device_id, self._handle)
示例#7
0
    def _trace_func(func):
        funcname = func.__name__
        debug(5, "collecting statistics for {0}", funcname)

        def wrapper(*args, **kwargs):
            tstart = timeit.default_timer()
            rv = func(*args, **kwargs)
            tend = timeit.default_timer()
            stack_info = _stack_walk_func()
            _trace_database.register(funcname, tstart, tend, args, kwargs,
                                     stack_info)
            return rv

        return wrapper
示例#8
0
    def allocate_device_memory(self, nbytes, alignment=64, sticky=False):
        """Allocate device memory on device associated with the invoking
           stream object.  Though it is part of the stream interface,
           the operation is synchronous.

           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the
                    high-level interfaces of OffloadArray instead.

           Parameters
           ----------
           nbytes : int
              Number of bytes to allocate
           alignment : int
              Alignment of the data on the target device.

           See Also
           --------
           deallocate_device_memory, transfer_host2device,
           transfer_device2host, transfer_device2device

           Returns
           -------
           out : int
              Fake pointer that identifies the allocated memory

           Examples
           --------
           >>> ptr = stream.allocate_device_memory(4096)
           >>> print ptr
           140297169571840
        """

        device = self._device_id

        if nbytes <= 0:
            raise ValueError('Cannot allocate negative amount of '
                             'memory: {0}'.format(nbytes))

        device_ptr = pymic_stream_allocate(device, self._stream_id,
                                           nbytes, alignment)
        device_ptr = DeviceAllocation(self, device, device_ptr, sticky)
        debug(2, 'allocated {0} bytes on device {1} at {2}'
                 ', alignment {3}',
                 nbytes, device, device_ptr, alignment)
        return device_ptr
示例#9
0
    def allocate_device_memory(self, nbytes, alignment=64, sticky=False):
        """Allocate device memory on device associated with the invoking
           stream object.  Though it is part of the stream interface,
           the operation is synchronous.

           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the
                    high-level interfaces of OffloadArray instead.

           Parameters
           ----------
           nbytes : int
              Number of bytes to allocate
           alignment : int
              Alignment of the data on the target device.

           See Also
           --------
           deallocate_device_memory, transfer_host2device,
           transfer_device2host, transfer_device2device

           Returns
           -------
           out : int
              Fake pointer that identifies the allocated memory

           Examples
           --------
           >>> ptr = stream.allocate_device_memory(4096)
           >>> print ptr
           140297169571840
        """

        device = self._device_id

        if nbytes <= 0:
            raise ValueError('Cannot allocate negative amount of '
                             'memory: {0}'.format(nbytes))

        device_ptr = pymic_stream_allocate(device, self._stream_id,
                                           nbytes, alignment)
        device_ptr = DeviceAllocation(self, device, device_ptr, sticky)
        debug(2, 'allocated {0} bytes on device {1} at {2}'
                 ', alignment {3}',
                 nbytes, device, device_ptr, alignment)
        return device_ptr
示例#10
0
    def _find_library(library):
        if os.path.isabs(library) and OffloadLibrary._check_k1om(library):
            abspath = library
        else:
            path_list = [pymic_dir]
            path_list.extend(config._search_path.split(os.pathsep))
            for path in path_list:
                debug(5, "    looking for {0} in {1}", library, path)
                abspath = os.path.join(path, library)

                if (os.path.isfile(abspath) and
                        OffloadLibrary._check_k1om(abspath)):
                    break
            else:
                return

        return abspath
示例#11
0
    def deallocate_device_memory(self, device_ptr):
        """Deallocate device memory previously allocated through
           allocate_device_memory.  Though it is part of the stream
           interface, the operation is synchronous.

           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the
                    high-level interfaces of OffloadArray instead.

           Parameters
           ----------
           device_ptr : int
              Fake pointer of memory do deallocate

           See Also
           --------
           allocate_device_memory, transfer_host2device,
           transfer_device2host, transfer_device2device

           Returns
           -------
           None

           Examples
           --------
           >>> ptr = stream.allocate_device_memory(4096)
           >>> stream.deallocate_device_memory(ptr)
        """

        device = self._device

        if device_ptr is None:
            raise ValueError('Cannot deallocate None pointer')
        if not isinstance(device_ptr, DeviceAllocation):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)

        self.sync()
        pymic_stream_deallocate(self._device_id, self._stream_id,
                                device_ptr._device_ptr)
        debug(2, 'deallocated pointer {0} on device {1}',
                 device_ptr, device)

        return None
示例#12
0
    def deallocate_device_memory(self, device_ptr):
        """Deallocate device memory previously allocated through
           allocate_device_memory.  Though it is part of the stream
           interface, the operation is synchronous.

           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the
                    high-level interfaces of OffloadArray instead.

           Parameters
           ----------
           device_ptr : int
              Fake pointer of memory do deallocate

           See Also
           --------
           allocate_device_memory, transfer_host2device,
           transfer_device2host, transfer_device2device

           Returns
           -------
           None

           Examples
           --------
           >>> ptr = stream.allocate_device_memory(4096)
           >>> stream.deallocate_device_memory(ptr)
        """

        device = self._device

        if device_ptr is None:
            raise ValueError('Cannot deallocate None pointer')
        if not isinstance(device_ptr, DeviceAllocation):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)

        pymic_stream_deallocate(self._device_id, self._stream_id,
                                device_ptr._device_ptr)
        debug(2, 'deallocated pointer {0} on device {1}',
                 device_ptr, device)

        return None
示例#13
0
    def sync(self):
        """Wait for all outstanding requests in this OffloadStream to complete.
           The function does not return until all in-flight requests have
           completed.

           Parameters
           ----------
           n/a

           Returns
           -------
           n/a

           See Also
           --------
           n/a
        """
        debug(2, 'syncing stream 0x{0:x} on device {1}',
                 self._stream_id, self._device_id)
        pymic_stream_sync(self._device_id, self._stream_id)
        return None
示例#14
0
    def sync(self):
        """Wait for all outstanding requests in this OffloadStream to complete.
           The function does not return until all in-flight requests have
           completed.

           Parameters
           ----------
           n/a

           Returns
           -------
           n/a

           See Also
           --------
           n/a
        """
        debug(2, 'syncing stream 0x{0:x} on device {1}',
                 self._stream_id, self._device_id)
        pymic_stream_sync(self._device_id, self._stream_id)
        return None
示例#15
0
    def invoke(self, kernel, *args):
        """Invoke a native kernel on the target device by enqueuing a request
           in the current stream.  The kernel is identified by accessing its
           library's attribute with the same name as the kernel name. The
           kernel function needs to be in a shared-object library that has
           been loaded by calling the load_library of the target device
           before invoke.

           The additional arguments of invoke can be either instances
           of OffloadArray, numpy.ndarray, or scalar data.  For numpy.ndarray
           or scalar arguments, invoke automatically performs copy-in and
           copy-out of the argument, that is, before the kernel is invoked,
           the argument is automatically transferred to the target device and
           transferred back after the kernel has finished.

           All operations (copy in/copy out and invocation) are enqueued into
           the stream object and complete asynchronously.

           Parameters
           ----------
           kernel : kernel
              Kernel to be invoked
           args : OffloadArray, numpy.ndarray, or scalar type
              Arguments to be passed to the kernel function

           See Also
           --------
           load_library

           Returns
           -------
           None

           Examples
           --------
           >>> library = device.load_library("libdgemm")
           >>> stream.invoke(library.dgemm, A, B, C, n, m, k)
        """

        # if called from wrapper, actual arguments are wrapped in an
        # extra tuple, so we unwrap them
        if len(args):
            if type(args[0]) == tuple:
                args = args[0]

        # throw an exception if the number of kernel arguments is more than
        # 16 (that's a limitation of libxstream at the moment)
        if len(args) > 16:
            raise ValueError("Kernels with more than 16 arguments "
                             "are not supported")

        # safety check: avoid invoking a kernel if it's library has been loaded
        # on a different device
        if kernel[2] is not self._device:
            raise OffloadError("Cannot invoke kernel, "
                               "library not loaded on device")

        # determine the types of the arguments (scalar vs arrays);
        # we store the device pointers as 64-bit integers in an ndarray
        arg_dims = numpy.empty((len(args),), dtype=numpy.int64)
        arg_type = numpy.empty((len(args),), dtype=numpy.int64)
        arg_ptrs = numpy.empty((len(args),), dtype=numpy.int64)
        arg_size = numpy.empty((len(args),), dtype=numpy.int64)
        copy_in_out = []
        scalars = []
        for i, a in enumerate(args):
            if a is None:
                # this is a None object, so we pass a nullptr to kernel
                arg_dims[i] = 1
                arg_type[i] = -1    # magic number to mark nullptrs
                arg_ptrs[i] = 0     # nullptr
                arg_size[i] = 0
                debug(3,
                      "(device {0}, stream 0x{1:x}) kernel '{2}' "
                      "arg {3} is None (device pointer 'nullptr')"
                      "".format(self._device_id, self._stream_id,
                                kernel[0], i))
            elif isinstance(a, pymic.OffloadArray):
                # get the device pointer of the OffloadArray and
                # pass it to the kernel
                arg_dims[i] = 1
                arg_type[i] = map_data_types(a.dtype)
                arg_ptrs[i] = a._device_ptr._device_ptr  # fake pointer
                arg_size[i] = a._nbytes
                debug(3,
                      "(device {0}, stream 0x{1:x}) kernel '{2}' "
                      "arg {3} is offload array (device pointer "
                      "{4})".format(self._device_id, self._stream_id,
                                    kernel[0], i, a._device_ptr))
            elif isinstance(a, numpy.ndarray):
                # allocate device buffer on the target of the invoke
                # and mark the numpy.ndarray for copyin/copyout semantics
                host_ptr = a.ctypes.data  # raw C pointer to host data
                nbytes = a.dtype.itemsize * a.size
                dev_ptr = self.allocate_device_memory(nbytes)
                copy_in_out.append((host_ptr, dev_ptr, nbytes, a))
                arg_dims[i] = 1
                arg_type[i] = map_data_types(a.dtype)
                arg_ptrs[i] = dev_ptr._device_ptr    # fake pointer
                arg_size[i] = nbytes
                debug(3,
                      "(device {0}, stream 0x{1:x}) kernel '{2}' "
                      "arg {3} is copy-in/-out array (host pointer {4}, "
                      "device pointer "
                      "{5})".format(self._device_id, self._stream_id,
                                    kernel[0], i, host_ptr, dev_ptr))
            else:
                # this is a hack, but let's wrap scalars as numpy arrays
                cvtd = numpy.asarray(a)
                host_ptr = cvtd.ctypes.data  # raw C pointer to host data
                nbytes = cvtd.dtype.itemsize * cvtd.size
                scalars.append(cvtd)
                arg_dims[i] = 0
                arg_type[i] = map_data_types(cvtd.dtype)
                arg_ptrs[i] = host_ptr
                arg_size[i] = nbytes
                debug(3,
                      "(device {0}, stream 0x{1:x}) kernel '{2}' "
                      "arg {3} is scalar {4} (host pointer "
                      "{5})".format(self._device_id, self._stream_id,
                                    kernel[0], i, a, host_ptr))
        debug(1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' "
                 "(pointer 0x{3:x}) with {4} "
                 "argument(s) ({5} copy-in/copy-out, {6} scalars)",
                 self._device_id, self._stream_id, kernel[0], kernel[1],
                 len(args), len(copy_in_out), len(scalars))
        # iterate over the copyin arguments and transfer them
        for c in copy_in_out:
            self.transfer_host2device(c[0], c[1], c[2])
        pymic_stream_invoke_kernel(self._device_id, self._stream_id, kernel[1],
                                   len(args), arg_dims, arg_type, arg_ptrs,
                                   arg_size)
        # iterate over the copyout arguments, transfer them back
        for c in copy_in_out:
            self.transfer_device2host(c[1], c[0], c[2])
        if len(copy_in_out) != 0:
            self.sync()
示例#16
0
 def __del__(self):
     debug(1,
           'destroying stream 0x{0:0x} for device {1}',
           self._stream_id, self._device_id)
     if self._device_id is not None:
         pymic_stream_destroy(self._device_id, self._stream_id)
示例#17
0
        stack.append(
            (traceback.function, (traceback.filename, traceback.lineno)))
        current_frame = current_frame.f_back
    return stack


def _trace_func(func):
    # this is the do-nothing-wrapper
    def wrapper(*args, **kwargs):
        return func(*args, **kwargs)

    return wrapper


if config._trace_level == 1:
    debug(5, "tracing is enabled", config._trace_level)
elif config._trace_level is not None:
    debug(5, "tracing is disabled", config._trace_level)

if config._trace_level >= 1:
    _stack_walk_func = _stack_walk_compact
    if config._collect_stacks_str.lower() == "none":
        debug(5, "stack collection is set to '{0}'",
              config._collect_stacks_str)
        _stack_walk_func = _stack_walk_none
    elif config._collect_stacks_str.lower() == "compact":
        debug(5, "stack collection is set to '{0}'",
              config._collect_stacks_str)
        _stack_walk_func = _stack_walk_compact
    elif config._collect_stacks_str.lower() == "full":
        debug(5, "stack collection is set to '{0}'",
示例#18
0
    def invoke(self, kernel, *args):
        """Invoke a native kernel on the target device by enqueuing a request
           in the current stream.  The kernel is identified by accessing its
           library's attribute with the same name as the kernel name. The
           kernel function needs to be in a shared-object library that has
           been loaded by calling the load_library of the target device
           before invoke.

           The additional arguments of invoke can be either instances
           of OffloadArray, numpy.ndarray, or scalar data.  For numpy.ndarray
           or scalar arguments, invoke automatically performs copy-in and
           copy-out of the argument, that is, before the kernel is invoked,
           the argument is automatically transferred to the target device and
           transferred back after the kernel has finished.

           All operations (copy in/copy out and invocation) are enqueued into
           the stream object and complete asynchronously.

           Parameters
           ----------
           kernel : kernel
              Kernel to be invoked
           args : OffloadArray, numpy.ndarray, or scalar type
              Arguments to be passed to the kernel function

           See Also
           --------
           load_library

           Returns
           -------
           None

           Examples
           --------
           >>> library = device.load_library("libdgemm")
           >>> stream.invoke(library.dgemm, A, B, C, n, m, k)
        """

        # if called from wrapper, actual arguments are wrapped in an
        # extra tuple, so we unwrap them
        if len(args):
            if type(args[0]) == tuple:
                args = args[0]

        # throw an exception if the number of kernel arguments is more than
        # 16 (that's a limitation of libxstream at the moment)
        if len(args) > 16:
            raise ValueError("Kernels with more than 16 arguments "
                             "are not supported")

        # safety check: avoid invoking a kernel if it's library has been loaded
        # on a different device
        if kernel[2] is not self._device:
            raise OffloadError("Cannot invoke kernel, "
                               "library not loaded on device")

        # determine the types of the arguments (scalar vs arrays);
        # we store the device pointers as 64-bit integers in an ndarray
        arg_dims = numpy.empty((len(args),), dtype=numpy.int64)
        arg_type = numpy.empty((len(args),), dtype=numpy.int64)
        arg_ptrs = numpy.empty((len(args),), dtype=numpy.int64)
        arg_size = numpy.empty((len(args),), dtype=numpy.int64)
        copy_in_out = []
        scalars = []
        for i, a in enumerate(args):
            if a is None:
                # this is a None object, so we pass a nullptr to kernel
                arg_dims[i] = 1
                arg_type[i] = -1    # magic number to mark nullptrs
                arg_ptrs[i] = 0     # nullptr
                arg_size[i] = 0
                debug(3,
                      "(device {0}, stream 0x{1:x}) kernel '{2}' "
                      "arg {3} is None (device pointer 'nullptr')"
                      "".format(self._device_id, self._stream_id,
                                kernel[0], i))
            elif isinstance(a, pymic.OffloadArray):
                # get the device pointer of the OffloadArray and
                # pass it to the kernel
                arg_dims[i] = 1
                arg_type[i] = map_data_types(a.dtype)
                arg_ptrs[i] = a._device_ptr._device_ptr  # fake pointer
                arg_size[i] = a._nbytes
                debug(3,
                      "(device {0}, stream 0x{1:x}) kernel '{2}' "
                      "arg {3} is offload array (device pointer "
                      "{4})".format(self._device_id, self._stream_id,
                                    kernel[0], i, a._device_ptr))
            elif isinstance(a, numpy.ndarray):
                # allocate device buffer on the target of the invoke
                # and mark the numpy.ndarray for copyin/copyout semantics
                host_ptr = a.ctypes.data  # raw C pointer to host data
                nbytes = a.dtype.itemsize * a.size
                dev_ptr = self.allocate_device_memory(nbytes)
                copy_in_out.append((host_ptr, dev_ptr, nbytes, a))
                arg_dims[i] = 1
                arg_type[i] = map_data_types(a.dtype)
                arg_ptrs[i] = dev_ptr._device_ptr    # fake pointer
                arg_size[i] = nbytes
                debug(3,
                      "(device {0}, stream 0x{1:x}) kernel '{2}' "
                      "arg {3} is copy-in/-out array (host pointer {4}, "
                      "device pointer "
                      "{5})".format(self._device_id, self._stream_id,
                                    kernel[0], i, host_ptr, dev_ptr))
            else:
                # this is a hack, but let's wrap scalars as numpy arrays
                cvtd = numpy.asarray(a)
                host_ptr = cvtd.ctypes.data  # raw C pointer to host data
                nbytes = cvtd.dtype.itemsize * cvtd.size
                scalars.append(cvtd)
                arg_dims[i] = 0
                arg_type[i] = map_data_types(cvtd.dtype)
                arg_ptrs[i] = host_ptr
                arg_size[i] = nbytes
                debug(3,
                      "(device {0}, stream 0x{1:x}) kernel '{2}' "
                      "arg {3} is scalar {4} (host pointer "
                      "{5})".format(self._device_id, self._stream_id,
                                    kernel[0], i, a, host_ptr))
        debug(1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' "
                 "(pointer 0x{3:x}) with {4} "
                 "argument(s) ({5} copy-in/copy-out, {6} scalars)",
                 self._device_id, self._stream_id, kernel[0], kernel[1],
                 len(args), len(copy_in_out), len(scalars))
        # iterate over the copyin arguments and transfer them
        for c in copy_in_out:
            self.transfer_host2device(c[0], c[1], c[2])
        pymic_stream_invoke_kernel(self._device_id, self._stream_id, kernel[1],
                                   len(args), arg_dims, arg_type, arg_ptrs,
                                   arg_size)
        # iterate over the copyout arguments, transfer them back
        for c in copy_in_out:
            self.transfer_device2host(c[1], c[0], c[2])
        if len(copy_in_out) != 0:
            self.sync()
示例#19
0
    def transfer_device2device(self, device_ptr_src, device_ptr_dst,
                               nbytes, offset_device_src=0,
                               offset_device_dst=0):
        """Transfer data from a device memory location (identified by its
           fake pointer) to another memory region on the same device. The
           operation is executed asynchronously with stream semantics.

           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the
                    high-level interfaces of OffloadArray instead.

           Parameters
           ----------
           device_ptr_src : int
              Fake pointer to the source memory location
           device_ptr_dst : int
              Fake pointer to the destination memory location
           nbytes : int
              Number of bytes to copy
           offset_device_src : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory (source).
           offset_device_dst : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory (destination).

           See Also
           --------
           transfer_host2device, allocate_device_memory,
           deallocate_device_memory

           Returns
           -------
           None

           Examples
           --------
           >>> a = numpy.arange(0.0, 16.0)
           >>> nbytes = a.dtype.itemsize * a.size
           >>> ptr_a_host = a.ctypes.data
           >>> device_ptr_1 = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_host2device(ptr_a_host, device_ptr_1, nbytes)
           >>> device_ptr_2 = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_device2device(device_ptr_1, device_ptr_2,
                                             nbytes)
           >>> b = numpy.empty_like(a)
           [  6.95303066e-310   6.83874600e-317   3.95252517e-322
              0.00000000e+000   9.31741387e+242   0.00000000e+000
              0.00000000e+000   0.00000000e+000   4.94065646e-324
              3.30519641e-317   1.72409659e+212   1.20070123e-089
              5.05907223e-085   4.87883721e+199   0.00000000e+000
              6.78545805e-317]
           # random data
           >>> print b
           >>> ptr_b_host = b.ctypes.data
           >>> stream.transfer_device2host(device_ptr_2, ptr_b_host, nbytes)
           >>> stream.sync()
           >>> print b
           [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.
             12.  13.  14.  15.]
        """

        if not isinstance(device_ptr_src, DeviceAllocation):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if not isinstance(device_ptr_dst, DeviceAllocation):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if offset_device_src < 0:
            raise ValueError("Negative offset passed for offset_device_src")
        if offset_device_dst < 0:
            raise ValueError("Negative offset passed for offset_device_dst")
        if device_ptr_src is None:
            raise ValueError('Invalid None device pointer')
        if device_ptr_dst is None:
            raise ValueError('Invalid None host pointer')
        if nbytes <= 0:
            raise ValueError('Invalid byte count: {0}'.format(nbytes))

        device_ptr_src = device_ptr_src._device_ptr
        device_ptr_dst = device_ptr_dst._device_ptr
        debug(1, '(device {0} -> device {0}) transferring {1} bytes '
                 '(source ptr {2}, destination ptr {3})',
                 self._device_id, nbytes, device_ptr_src, device_ptr_dst)
        pymic_stream_memcpy_d2d(self._device_id, self._stream_id,
                                device_ptr_src, device_ptr_dst,
                                nbytes, offset_device_src,
                                offset_device_dst)
        return None
示例#20
0
    def transfer_device2host(self, device_ptr, host_ptr,
                             nbytes, offset_device=0, offset_host=0):
        """Transfer data from a device memory location (identified by its
           fake pointer) to a host memory region identified by its raw pointer
           (i.e., a C pointer)on the target device. The operation is executed
           asynchronously with stream semantics.

           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the
                    high-level interfaces of OffloadArray instead.

           Parameters
           ----------
           host_ptr : int
              Pointer to the data on the host
           device_ptr : int
              Fake pointer of the destination
           nbytes : int
              Number of bytes to copy
           offset_device : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory.
           offset_host : int, optional, default 0
              Transfer offset (bytes) to be added to raw host pointer

           See Also
           --------
           transfer_host2device, transfer_device2device,
           allocate_device_memory, deallocate_device_memory

           Returns
           -------
           None

           Examples
           --------
           >>> a = numpy.arange(0.0, 16.0)
           >>> nbytes = a.dtype.itemsize * a.size
           >>> ptr_a_host = a.ctypes.data
           >>> device_ptr = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_host2device(ptr_a_host, device_ptr, nbytes)
           >>> b = numpy.empty_like(a)
           >>> print b
           [  6.90762927e-310   7.73120247e-317   3.60667921e-322
              0.00000000e+000   0.00000000e+000   0.00000000e+000
              0.00000000e+000   0.00000000e+000   4.94065646e-324
              9.76815212e-317   7.98912845e-317   0.00000000e+000
              5.53353523e-322   1.58101007e-322   0.00000000e+000
              7.38839996e-317]
           # random data
           >>> ptr_b_host = b.ctypes.data
           >>> stream.transfer_device2host(device_ptr, ptr_b_host, nbytes)
           >>> stream.sync()
           >>> print b
           [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.
             12.  13.  14.  15.]
        """

        if not isinstance(device_ptr, DeviceAllocation):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if offset_device < 0:
            raise ValueError("Negative offset passed for offset_device")
        if offset_host < 0:
            raise ValueError("Negative offset passed for offset_host")
        if device_ptr is None:
            raise ValueError('Invalid None device pointer')
        if host_ptr is None:
            raise ValueError('Invalid None host pointer')
        if nbytes <= 0:
            raise ValueError('Invalid byte count: {0}'.format(nbytes))

        debug(1, '(device {0} -> host) transferring {1} bytes '
                 '(device ptr {2}, host ptr 0x{3:x})',
                 self._device_id, nbytes, device_ptr, host_ptr)
        device_ptr = device_ptr._device_ptr
        pymic_stream_memcpy_d2h(self._device_id, self._stream_id,
                                device_ptr, host_ptr,
                                nbytes, offset_device, offset_host)
        return None
示例#21
0
    def transfer_device2host(self, device_ptr, host_ptr,
                             nbytes, offset_device=0, offset_host=0):
        """Transfer data from a device memory location (identified by its
           fake pointer) to a host memory region identified by its raw pointer
           (i.e., a C pointer)on the target device. The operation is executed
           asynchronously with stream semantics.

           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the
                    high-level interfaces of OffloadArray instead.

           Parameters
           ----------
           host_ptr : int
              Pointer to the data on the host
           device_ptr : int
              Fake pointer of the destination
           nbytes : int
              Number of bytes to copy
           offset_device : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory.
           offset_host : int, optional, default 0
              Transfer offset (bytes) to be added to raw host pointer

           See Also
           --------
           transfer_host2device, transfer_device2device,
           allocate_device_memory, deallocate_device_memory

           Returns
           -------
           None

           Examples
           --------
           >>> a = numpy.arange(0.0, 16.0)
           >>> nbytes = a.dtype.itemsize * a.size
           >>> ptr_a_host = a.ctypes.data
           >>> device_ptr = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_host2device(ptr_a_host, device_ptr, nbytes)
           >>> b = numpy.empty_like(a)
           >>> print b
           [  6.90762927e-310   7.73120247e-317   3.60667921e-322
              0.00000000e+000   0.00000000e+000   0.00000000e+000
              0.00000000e+000   0.00000000e+000   4.94065646e-324
              9.76815212e-317   7.98912845e-317   0.00000000e+000
              5.53353523e-322   1.58101007e-322   0.00000000e+000
              7.38839996e-317]
           # random data
           >>> ptr_b_host = b.ctypes.data
           >>> stream.transfer_device2host(device_ptr, ptr_b_host, nbytes)
           >>> stream.sync()
           >>> print b
           [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.
             12.  13.  14.  15.]
        """

        if not isinstance(device_ptr, DeviceAllocation):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if offset_device < 0:
            raise ValueError("Negative offset passed for offset_device")
        if offset_host < 0:
            raise ValueError("Negative offset passed for offset_host")
        if device_ptr is None:
            raise ValueError('Invalid None device pointer')
        if host_ptr is None:
            raise ValueError('Invalid None host pointer')
        if nbytes <= 0:
            raise ValueError('Invalid byte count: {0}'.format(nbytes))

        debug(1, '(device {0} -> host) transferring {1} bytes '
                 '(device ptr {2}, host ptr 0x{3:x})',
                 self._device_id, nbytes, device_ptr, host_ptr)
        device_ptr = device_ptr._device_ptr
        pymic_stream_memcpy_d2h(self._device_id, self._stream_id,
                                device_ptr, host_ptr,
                                nbytes, offset_device, offset_host)
        return None
示例#22
0
    def transfer_device2device(self, device_ptr_src, device_ptr_dst,
                               nbytes, offset_device_src=0,
                               offset_device_dst=0):
        """Transfer data from a device memory location (identified by its
           fake pointer) to another memory region on the same device. The
           operation is executed asynchronously with stream semantics.

           Caution: this is a low-level function, do not use it unless you
                    have a very specific reason to do so.  Better use the
                    high-level interfaces of OffloadArray instead.

           Parameters
           ----------
           device_ptr_src : int
              Fake pointer to the source memory location
           device_ptr_dst : int
              Fake pointer to the destination memory location
           nbytes : int
              Number of bytes to copy
           offset_device_src : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory (source).
           offset_device_dst : int, optional, default 0
              Transfer offset (bytes) to be added to the address of the device
              memory (destination).

           See Also
           --------
           transfer_host2device, allocate_device_memory,
           deallocate_device_memory

           Returns
           -------
           None

           Examples
           --------
           >>> a = numpy.arange(0.0, 16.0)
           >>> nbytes = a.dtype.itemsize * a.size
           >>> ptr_a_host = a.ctypes.data
           >>> device_ptr_1 = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_host2device(ptr_a_host, device_ptr_1, nbytes)
           >>> device_ptr_2 = stream.allocate_device_memory(nbytes)
           >>> stream.transfer_device2device(device_ptr_1, device_ptr_2,
                                             nbytes)
           >>> b = numpy.empty_like(a)
           [  6.95303066e-310   6.83874600e-317   3.95252517e-322
              0.00000000e+000   9.31741387e+242   0.00000000e+000
              0.00000000e+000   0.00000000e+000   4.94065646e-324
              3.30519641e-317   1.72409659e+212   1.20070123e-089
              5.05907223e-085   4.87883721e+199   0.00000000e+000
              6.78545805e-317]
           # random data
           >>> print b
           >>> ptr_b_host = b.ctypes.data
           >>> stream.transfer_device2host(device_ptr_2, ptr_b_host, nbytes)
           >>> stream.sync()
           >>> print b
           [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.
             12.  13.  14.  15.]
        """

        if not isinstance(device_ptr_src, DeviceAllocation):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if not isinstance(device_ptr_dst, DeviceAllocation):
            raise ValueError('Wrong argument, no device pointer given')
        # TODO: add more safety checks here (e.g., pointer from right device
        #       and stream)
        if offset_device_src < 0:
            raise ValueError("Negative offset passed for offset_device_src")
        if offset_device_dst < 0:
            raise ValueError("Negative offset passed for offset_device_dst")
        if device_ptr_src is None:
            raise ValueError('Invalid None device pointer')
        if device_ptr_dst is None:
            raise ValueError('Invalid None host pointer')
        if nbytes <= 0:
            raise ValueError('Invalid byte count: {0}'.format(nbytes))

        device_ptr_src = device_ptr_src._device_ptr
        device_ptr_dst = device_ptr_dst._device_ptr
        debug(1, '(device {0} -> device {0}) transferring {1} bytes '
                 '(source ptr {2}, destination ptr {3})',
                 self._device_id, nbytes, device_ptr_src, device_ptr_dst)
        pymic_stream_memcpy_d2d(self._device_id, self._stream_id,
                                device_ptr_src, device_ptr_dst,
                                nbytes, offset_device_src,
                                offset_device_dst)
        return None
示例#23
0
 def __del__(self):
     debug(1,
           'destroying stream 0x{0:0x} for device {1}',
           self._stream_id, self._device_id)
     if self._device_id is not None:
         pymic_stream_destroy(self._device_id, self._stream_id)
示例#24
0
文件: _tracing.py 项目: 01org/pyMIC
        traceback = inspect.getframeinfo(current_frame)
        stack.append((traceback.function,
                     (traceback.filename, traceback.lineno)))
        current_frame = current_frame.f_back
    return stack


def _trace_func(func):
    # this is the do-nothing-wrapper
    def wrapper(*args, **kwargs):
        return func(*args, **kwargs)
    return wrapper


if config._trace_level == 1:
    debug(5, "tracing is enabled", config._trace_level)
elif config._trace_level is not None:
    debug(5, "tracing is disabled", config._trace_level)

if config._trace_level >= 1:
    _stack_walk_func = _stack_walk_compact
    if config._collect_stacks_str.lower() == "none":
        debug(5, "stack collection is set to '{0}'",
              config._collect_stacks_str)
        _stack_walk_func = _stack_walk_none
    elif config._collect_stacks_str.lower() == "compact":
        debug(5, "stack collection is set to '{0}'",
              config._collect_stacks_str)
        _stack_walk_func = _stack_walk_compact
    elif config._collect_stacks_str.lower() == "full":
        debug(5, "stack collection is set to '{0}'",