Пример #1
0
    def test_cuda_driver_basic(self):
        module = self.context.create_module_ptx(self.ptx)
        function = module.get_function('_Z10helloworldPi')

        array = (c_int * 100)()

        memory = self.context.memalloc(sizeof(array))

        host_to_device(memory, array, sizeof(array))

        launch_kernel(
            function.handle,  # Kernel
            1,
            1,
            1,  # gx, gy, gz
            100,
            1,
            1,  # bx, by, bz
            0,  # dynamic shared mem
            0,  # stream
            [memory])  # arguments

        device_to_host(array, memory, sizeof(array))
        for i, v in enumerate(array):
            self.assertEqual(i, v)

        module.unload()
Пример #2
0
    def test_cuda_driver_basic(self):
        module = self.context.create_module_ptx(self.ptx)
        function = module.get_function('_Z10helloworldPi')

        array = (c_int * 100)()

        memory = self.context.memalloc(sizeof(array))
        host_to_device(memory, array, sizeof(array))

        ptr = memory.device_ctypes_pointer
        stream = 0

        if _driver.USE_NV_BINDING:
            ptr = c_void_p(int(ptr))
            stream = _driver.binding.CUstream(stream)

        launch_kernel(
            function.handle,  # Kernel
            1,
            1,
            1,  # gx, gy, gz
            100,
            1,
            1,  # bx, by, bz
            0,  # dynamic shared mem
            stream,  # stream
            [ptr])  # arguments

        device_to_host(array, memory, sizeof(array))
        for i, v in enumerate(array):
            self.assertEqual(i, v)

        module.unload()
Пример #3
0
    def test_cuda_driver_stream_operations(self):
        module = self.context.create_module_ptx(self.ptx)
        function = module.get_function('_Z10helloworldPi')

        array = (c_int * 100)()

        stream = self.context.create_stream()

        with stream.auto_synchronize():
            memory = self.context.memalloc(sizeof(array))
            host_to_device(memory, array, sizeof(array), stream=stream)

            launch_kernel(
                function.handle,  # Kernel
                1,
                1,
                1,  # gx, gy, gz
                100,
                1,
                1,  # bx, by, bz
                0,  # dynamic shared mem
                stream.handle,  # stream
                [memory])  # arguments

        device_to_host(array, memory, sizeof(array), stream=stream)

        for i, v in enumerate(array):
            self.assertEqual(i, v)
Пример #4
0
    def copy_to_device(self, ary, stream=0):
        """Copy `ary` to `self`.

        If `ary` is a CUDA memory, perform a device-to-device transfer.
        Otherwise, perform a a host-to-device transfer.
        """
        if ary.size == 0:
            # Nothing to do
            return

        sentry_contiguous(self)
        stream = self._default_stream(stream)

        self_core, ary_core = array_core(self), array_core(ary)
        if _driver.is_device_memory(ary):
            sentry_contiguous(ary)
            check_array_compatibility(self_core, ary_core)
            _driver.device_to_device(self, ary, self.alloc_size, stream=stream)
        else:
            # Ensure same contiguity. Only makes a host-side copy if necessary
            # (i.e., in order to materialize a writable strided view)
            ary_core = np.array(
                ary_core,
                order='C' if self_core.flags['C_CONTIGUOUS'] else 'F',
                subok=True,
                copy=not ary_core.flags['WRITEABLE'])
            check_array_compatibility(self_core, ary_core)
            _driver.host_to_device(self,
                                   ary_core,
                                   self.alloc_size,
                                   stream=stream)
Пример #5
0
 def test_d2d(self):
     hst = np.arange(100, dtype=np.uint32)
     hst2 = np.empty_like(hst)
     sz = hst.size * hst.dtype.itemsize
     dev1 = self.context.memalloc(sz)
     dev2 = self.context.memalloc(sz)
     driver.host_to_device(dev1, hst, sz)
     driver.device_to_device(dev2, dev1, sz)
     driver.device_to_host(hst2, dev2, sz)
     self.assertTrue(np.all(hst == hst2))
Пример #6
0
    def test_memcpy(self):
        hstary = np.arange(100, dtype=np.uint32)
        hstary2 = np.arange(100, dtype=np.uint32)
        sz = hstary.size * hstary.dtype.itemsize
        devary = self.context.memalloc(sz)

        driver.host_to_device(devary, hstary, sz)
        driver.device_to_host(hstary2, devary, sz)

        self.assertTrue(np.all(hstary == hstary2))
Пример #7
0
    def test_memcpy(self):
        hstary = np.arange(100, dtype=np.uint32)
        hstary2 = np.arange(100, dtype=np.uint32)
        sz = hstary.size * hstary.dtype.itemsize
        devary = self.context.memalloc(sz)

        driver.host_to_device(devary, hstary, sz)
        driver.device_to_host(hstary2, devary, sz)

        self.assertTrue(np.all(hstary == hstary2))
Пример #8
0
 def test_d2d(self):
     hst = np.arange(100, dtype=np.uint32)
     hst2 = np.empty_like(hst)
     sz = hst.size * hst.dtype.itemsize
     dev1 = self.context.memalloc(sz)
     dev2 = self.context.memalloc(sz)
     driver.host_to_device(dev1, hst, sz)
     driver.device_to_device(dev2, dev1, sz)
     driver.device_to_host(hst2, dev2, sz)
     self.assertTrue(np.all(hst == hst2))
Пример #9
0
    def test_cuda_driver_basic(self):
        module = self.context.create_module_ptx(self.ptx)
        function = module.get_function('_Z10helloworldPi')

        array = (c_int * 100)()

        memory = self.context.memalloc(sizeof(array))

        host_to_device(memory, array, sizeof(array))

        function = function.configure((1, ), (100, ))
        function(memory)

        device_to_host(array, memory, sizeof(array))
        for i, v in enumerate(array):
            self.assertEqual(i, v)

        module.unload()
Пример #10
0
    def test_cuda_driver_basic(self):
        module = self.context.create_module_ptx(self.ptx)
        function = module.get_function('_Z10helloworldPi')

        array = (c_int * 100)()

        memory = self.context.memalloc(sizeof(array))

        host_to_device(memory, array, sizeof(array))

        function = function.configure((1,), (100,))
        function(memory)

        device_to_host(array, memory, sizeof(array))
        for i, v in enumerate(array):
            self.assertEqual(i, v)

        module.unload()
Пример #11
0
    def test_cuda_driver_stream(self):
        module = self.context.create_module_ptx(self.ptx)
        function = module.get_function('_Z10helloworldPi')

        array = (c_int * 100)()

        stream = self.context.create_stream()

        with stream.auto_synchronize():
            memory = self.context.memalloc(sizeof(array))
            host_to_device(memory, array, sizeof(array), stream=stream)

            function = function.configure((1, ), (100, ), stream=stream)
            function(memory)

        device_to_host(array, memory, sizeof(array), stream=stream)

        for i, v in enumerate(array):
            self.assertEqual(i, v)
Пример #12
0
    def test_cuda_driver_stream(self):
        module = self.context.create_module_ptx(self.ptx)
        function = module.get_function('_Z10helloworldPi')

        array = (c_int * 100)()

        stream = self.context.create_stream()

        with stream.auto_synchronize():
            memory = self.context.memalloc(sizeof(array))
            host_to_device(memory, array, sizeof(array), stream=stream)

            function = function.configure((1,), (100,), stream=stream)
            function(memory)

        device_to_host(array, memory, sizeof(array), stream=stream)

        for i, v in enumerate(array):
            self.assertEqual(i, v)