def test_consume_no_sync(self): # Create a foreign array with no stream f_arr = ForeignArray(cuda.device_array(10)) with patch.object(cuda.cudadrv.driver.Stream, 'synchronize', return_value=None) as mock_sync: cuda.as_cuda_array(f_arr) # Ensure the synchronize method of a stream was not called mock_sync.assert_not_called()
def test_launch_sync_disabled(self): # Create two foreign arrays with streams s1 = cuda.stream() s2 = cuda.stream() f_arr1 = ForeignArray(cuda.device_array(10, stream=s1)) f_arr2 = ForeignArray(cuda.device_array(10, stream=s2)) with override_config('CUDA_ARRAY_INTERFACE_SYNC', False): @cuda.jit def f(x, y): pass with patch.object(cuda.cudadrv.driver.Stream, 'synchronize', return_value=None) as mock_sync: f[1, 1](f_arr1, f_arr2) # Ensure that synchronize was not called mock_sync.assert_not_called()
def test_consume_stream(self): # Create a foreign array with a stream s = cuda.stream() f_arr = ForeignArray(cuda.device_array(10, stream=s)) # Ensure that an imported array has the stream as its default stream c_arr = cuda.as_cuda_array(f_arr) self.assertTrue(c_arr.stream.external) stream_value = self.get_stream_value(s) imported_stream_value = self.get_stream_value(c_arr.stream) self.assertEqual(stream_value, imported_stream_value)
def test_consume_sync(self): # Create a foreign array with a stream s = cuda.stream() f_arr = ForeignArray(cuda.device_array(10, stream=s)) with patch.object(cuda.cudadrv.driver.Stream, 'synchronize', return_value=None) as mock_sync: cuda.as_cuda_array(f_arr) # Ensure the synchronize method of a stream was called mock_sync.assert_called_once_with()
def test_zero_size_array(self): # for #4175 c_arr = cuda.device_array(0) self.assertEqual(c_arr.__cuda_array_interface__['data'][0], 0) @cuda.jit def add_one(arr): x = cuda.grid(1) N = arr.shape[0] if x < N: arr[x] += 1 d_arr = ForeignArray(c_arr) add_one[1, 10](d_arr) # this should pass
def test_as_cuda_array(self): h_arr = np.arange(10) self.assertFalse(cuda.is_cuda_array(h_arr)) d_arr = cuda.to_device(h_arr) self.assertTrue(cuda.is_cuda_array(d_arr)) my_arr = ForeignArray(d_arr) self.assertTrue(cuda.is_cuda_array(my_arr)) wrapped = cuda.as_cuda_array(my_arr) self.assertTrue(cuda.is_cuda_array(wrapped)) # Their values must equal the original array np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr) np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr) # d_arr and wrapped must be the same buffer self.assertPointersEqual(wrapped, d_arr)
def test_kernel_arg(self): h_arr = np.arange(10) d_arr = cuda.to_device(h_arr) my_arr = ForeignArray(d_arr) wrapped = cuda.as_cuda_array(my_arr) @cuda.jit def mutate(arr, val): arr[cuda.grid(1)] += val val = 7 mutate.forall(wrapped.size)(wrapped, val) np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr + val) np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr + val)
def test_launch_no_sync(self): # Create a foreign array with no stream f_arr = ForeignArray(cuda.device_array(10)) @cuda.jit def f(x): pass with patch.object(cuda.cudadrv.driver.Stream, 'synchronize', return_value=None) as mock_sync: f[1, 1](f_arr) # Ensure the synchronize method of a stream was not called mock_sync.assert_not_called()
def test_consume_sync_disabled(self): # Create a foreign array with a stream s = cuda.stream() f_arr = ForeignArray(cuda.device_array(10, stream=s)) # Set sync to false before testing. The test suite should generally be # run with sync enabled, but stash the old value just in case it is # not. with override_config('CUDA_ARRAY_INTERFACE_SYNC', False): with patch.object(cuda.cudadrv.driver.Stream, 'synchronize', return_value=None) as mock_sync: cuda.as_cuda_array(f_arr) # Ensure the synchronize method of a stream was not called mock_sync.assert_not_called()
def check_ipc_handle_serialization(self, index_arg=None, foreign=False): # prepare data for IPC arr = np.arange(10, dtype=np.intp) devarr = cuda.to_device(arr) if index_arg is not None: devarr = devarr[index_arg] if foreign: devarr = cuda.as_cuda_array(ForeignArray(devarr)) expect = devarr.copy_to_host() # create IPC handle ctx = cuda.current_context() ipch = ctx.get_ipc_handle(devarr.gpu_data) # pickle buf = pickle.dumps(ipch) ipch_recon = pickle.loads(buf) self.assertIs(ipch_recon.base, None) self.assertEqual(ipch_recon.size, ipch.size) if driver.USE_NV_BINDING: self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved) else: self.assertEqual(tuple(ipch_recon.handle), tuple(ipch.handle)) # spawn new process for testing ctx = mp.get_context('spawn') result_queue = ctx.Queue() args = (ipch, result_queue) proc = ctx.Process(target=serialize_ipc_handle_test, args=args) proc.start() succ, out = result_queue.get() if not succ: self.fail(out) else: np.testing.assert_equal(expect, out) proc.join(3)
def check_ipc_array(self, index_arg=None, foreign=False): # prepare data for IPC arr = np.arange(10, dtype=np.intp) devarr = cuda.to_device(arr) # Slice if index_arg is not None: devarr = devarr[index_arg] if foreign: devarr = cuda.as_cuda_array(ForeignArray(devarr)) expect = devarr.copy_to_host() ipch = devarr.get_ipc_handle() # spawn new process for testing ctx = mp.get_context('spawn') result_queue = ctx.Queue() args = (ipch, result_queue) proc = ctx.Process(target=ipc_array_test, args=args) proc.start() succ, out = result_queue.get() if not succ: self.fail(out) else: np.testing.assert_equal(expect, out) proc.join(3)