示例#1
0
    def test_copy_from_device(self):
        a_gpu = memory.alloc(4)
        a_cpu = ctypes.c_int(100)
        a_gpu.copy_from(ctypes.byref(a_cpu), 4)

        b_gpu = memory.alloc(4)
        b_gpu.copy_from(a_gpu, 4)
        b_cpu = ctypes.c_int()
        b_gpu.copy_to_host(ctypes.byref(b_cpu), 4)
        self.assertEqual(b_cpu.value, a_cpu.value)
示例#2
0
    def test_copy_from_device(self):
        a_gpu = memory.alloc(4)
        a_cpu = ctypes.c_int(100)
        a_gpu.copy_from(ctypes.byref(a_cpu), 4)

        b_gpu = memory.alloc(4)
        b_gpu.copy_from(a_gpu, 4)
        b_cpu = ctypes.c_int()
        b_gpu.copy_to_host(ctypes.byref(b_cpu), 4)
        self.assertEqual(b_cpu.value, a_cpu.value)
示例#3
0
    def test_copy_from_device(self):
        a_gpu = memory.alloc(4)
        a_cpu = ctypes.c_int(100)
        a_gpu.copy_from(ctypes.cast(ctypes.byref(a_cpu), ctypes.c_void_p), 4)

        b_gpu = memory.alloc(4)
        b_gpu.copy_from(a_gpu, 4)
        b_cpu = ctypes.c_int()
        b_gpu.copy_to_host(ctypes.cast(ctypes.byref(b_cpu), ctypes.c_void_p),
                           4)
        assert b_cpu.value == a_cpu.value
示例#4
0
    def test_copy_from_device_async_using_raw_ptr(self):
        a_gpu = memory.alloc(4)
        a_cpu = ctypes.c_int(100)
        a_cpu_ptr = ctypes.cast(ctypes.byref(a_cpu), ctypes.c_void_p)
        a_gpu.copy_from_async(a_cpu_ptr.value, 4, stream=self.stream)

        b_gpu = memory.alloc(4)
        b_gpu.copy_from_async(a_gpu, 4, stream=self.stream)
        b_cpu = ctypes.c_int()
        b_cpu_ptr = ctypes.cast(ctypes.byref(b_cpu), ctypes.c_void_p)
        b_gpu.copy_to_host_async(b_cpu_ptr.value, 4, stream=self.stream)
        assert b_cpu.value == a_cpu.value
示例#5
0
 def test_can_use_cub_oversize_input3(self):
     # full reduction with 2^63-1 elements
     mem = memory.alloc(100)
     max_num = sys.maxsize
     a = cupy.ndarray((max_num, ), dtype=cupy.int8, memptr=mem)
     b = cupy.empty((), dtype=cupy.int8)
     assert self.can_use([a], [b], (0, ), ()) is None
示例#6
0
文件: test_memory.py 项目: keckj/cupy
    def test_copy_from_device_async_using_raw_ptr(self):
        a_gpu = memory.alloc(4)
        a_cpu = ctypes.c_int(100)
        a_cpu_ptr = ctypes.cast(ctypes.byref(a_cpu), ctypes.c_void_p)
        a_gpu.copy_from_async(a_cpu_ptr.value, 4, stream=self.stream)

        b_gpu = memory.alloc(4)
        b_gpu.copy_from_async(a_gpu, 4, stream=self.stream)
        b_cpu = ctypes.c_int()
        b_cpu_ptr = ctypes.cast(ctypes.byref(b_cpu), ctypes.c_void_p)
        b_gpu.copy_to_host_async(b_cpu_ptr.value, 4, stream=self.stream)
        if self.stream is not None:
            self.stream.synchronize()
        else:
            stream_module.get_current_stream().synchronize()
        assert b_cpu.value == a_cpu.value
示例#7
0
 def test_memset(self):
     a_gpu = memory.alloc(4)
     a_gpu.memset(1, 4)
     a_cpu = ctypes.c_ubyte()
     for i in range(4):
         a_gpu.copy_to_host(ctypes.byref(a_cpu), 1)
         self.assertEqual(a_cpu.value, 1)
         a_gpu += 1
示例#8
0
 def test_copy_to_and_from_host(self):
     a_gpu = memory.alloc(4)
     a_cpu = ctypes.c_int(100)
     a_gpu.copy_from(ctypes.cast(ctypes.byref(a_cpu), ctypes.c_void_p), 4)
     b_cpu = ctypes.c_int()
     a_gpu.copy_to_host(
         ctypes.cast(ctypes.byref(b_cpu), ctypes.c_void_p), 4)
     self.assertEqual(b_cpu.value, a_cpu.value)
示例#9
0
 def test_memset(self):
     a_gpu = memory.alloc(4)
     a_gpu.memset(1, 4)
     a_cpu = ctypes.c_ubyte()
     for i in range(4):
         a_gpu.copy_to_host(ctypes.byref(a_cpu), 1)
         self.assertEqual(a_cpu.value, 1)
         a_gpu += 1
示例#10
0
 def test_copy_to_and_from_host(self):
     a_gpu = memory.alloc(4)
     a_cpu = ctypes.c_int(100)
     a_gpu.copy_from(ctypes.cast(ctypes.byref(a_cpu), ctypes.c_void_p), 4)
     b_cpu = ctypes.c_int()
     a_gpu.copy_to_host(
         ctypes.cast(ctypes.byref(b_cpu), ctypes.c_void_p), 4)
     self.assertEqual(b_cpu.value, a_cpu.value)
示例#11
0
 def test_memset(self):
     a_gpu = memory.alloc(4)
     a_gpu.memset(1, 4)
     a_cpu = ctypes.c_ubyte()
     for i in range(4):
         a_gpu.copy_to_host(
             ctypes.cast(ctypes.byref(a_cpu), ctypes.c_void_p), 1)
         assert a_cpu.value == 1
         a_gpu += 1
示例#12
0
    def test_copy_to_and_from_host_async(self):
        a_gpu = memory.alloc(4)
        a_cpu = ctypes.c_int(100)
        a_gpu.copy_from_async(ctypes.cast(ctypes.byref(a_cpu), ctypes.c_void_p), 4, stream=self.stream)

        b_cpu = ctypes.c_int()
        a_gpu.copy_to_host_async(
            ctypes.cast(ctypes.byref(b_cpu), ctypes.c_void_p), 4, stream=self.stream)
        assert b_cpu.value == a_cpu.value
示例#13
0
文件: test_memory.py 项目: keckj/cupy
    def test_stream3(self):
        # Check: destory stream does not affect memory deallocation
        s = cupy.cuda.Stream()
        with s:
            memptr = memory.alloc(100)

        del s
        gc.collect()
        del memptr
示例#14
0
文件: test_memory.py 项目: keckj/cupy
    def test_stream5(self):
        # Check: free on another stream
        s1 = cupy.cuda.Stream()
        with s1:
            memptr = memory.alloc(100)
        del s1

        s2 = cupy.cuda.Stream()
        with s2:
            del memptr
示例#15
0
    def test_copy_to_and_from_host_using_raw_ptr(self):
        a_gpu = memory.alloc(4)
        a_cpu = ctypes.c_int(100)
        a_cpu_ptr = ctypes.cast(ctypes.byref(a_cpu), ctypes.c_void_p)
        a_gpu.copy_from(a_cpu_ptr.value, 4)

        b_cpu = ctypes.c_int()
        b_cpu_ptr = ctypes.cast(ctypes.byref(b_cpu), ctypes.c_void_p)
        a_gpu.copy_to_host(b_cpu_ptr.value, 4)
        assert b_cpu.value == a_cpu.value
示例#16
0
    def test_raw_pointer(self):
        mod = cupy.RawModule(code=test_cast, backend=self.backend)
        ker = mod.get_function('my_func')

        a = cupy.ones((100, ), dtype=cupy.float64)
        memptr = memory.alloc(100 * a.dtype.itemsize)
        memptr.copy_from(a.data, 100 * a.dtype.itemsize)  # one-initialize
        b = cupy.ndarray((100, ), cupy.float64, memptr=memptr)

        ker((1, ), (100, ), (memptr, 100))
        a = 3. * a - 8.
        assert (a == b).all()
示例#17
0
    def test_copy_to_and_from_host_async(self):
        a_gpu = memory.alloc(4)
        a_cpu = ctypes.c_int(100)
        a_gpu.copy_from_async(ctypes.cast(ctypes.byref(
            a_cpu), ctypes.c_void_p), 4, stream=self.stream)

        b_cpu = ctypes.c_int()
        a_gpu.copy_to_host_async(
            ctypes.cast(ctypes.byref(b_cpu), ctypes.c_void_p),
            4, stream=self.stream)
        if self.stream is not None:
            self.stream.synchronize()
        else:
            stream_module.get_current_stream().synchronize()
        assert b_cpu.value == a_cpu.value
示例#18
0
 def test_can_use_cub_oversize_input2(self):
     # full reduction with array size = 64 GB should work!
     mem = memory.alloc(100)
     a = cupy.ndarray((2**6 * 1024**3, ), dtype=cupy.int8, memptr=mem)
     b = cupy.empty((), dtype=cupy.int8)
     assert self.can_use([a], [b], (0, ), ()) is not None
示例#19
0
 def __init__(self, handle, seed):
     state_size = cudnn.dropoutGetStatesSize(handle)
     self._states = memory.alloc(state_size)
     self._desc = create_dropout_descriptor(handle, 0., self._states.ptr,
                                            state_size, seed)
示例#20
0
文件: test_memory.py 项目: keckj/cupy
 def test_stream4(self):
     # Check: free on the same stream
     s = cupy.cuda.Stream()
     with s:
         memptr = memory.alloc(100)
         del memptr
示例#21
0
文件: test_memory.py 项目: keckj/cupy
 def test_stream2(self):
     # Check: the memory was allocated on the right stream
     s = cupy.cuda.Stream()
     with s:
         memptr = memory.alloc(100)
         assert memptr.mem.stream == s.ptr
示例#22
0
 def test_can_use_cub_oversize_input4(self):
     # partial reduction with too many (2^31) blocks
     mem = memory.alloc(100)
     a = cupy.ndarray((2**31, 8), dtype=cupy.int8, memptr=mem)
     b = cupy.empty((), dtype=cupy.int8)
     assert self.can_use([a], [b], (1, ), (0, )) is None