示例#1
0
 def test_device_array(self):
     blkct = 4
     blksz = 128
     nelem = blkct * blksz
     expect = np.arange(nelem) + 1
     logger.info('device array like')
     darr = roc.device_array_like(expect)
     logger.info('pre launch')
     copy_kernel[blkct, blksz](darr, roc.to_device(expect))
     logger.info('post launch')
     got = darr.copy_to_host()
     np.testing.assert_equal(got, expect)
示例#2
0
 def test_device_array(self):
     blkct = 4
     blksz = 128
     nelem = blkct * blksz
     expect = np.arange(nelem) + 1
     logger.info('device array like')
     darr = roc.device_array_like(expect)
     logger.info('pre launch')
     copy_kernel[blkct, blksz](darr, roc.to_device(expect))
     logger.info('post launch')
     got = darr.copy_to_host()
     np.testing.assert_equal(got, expect)
示例#3
0
 def test_async_copy_to_device_and_back(self):
     arr = np.arange(1024)
     hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
     gotarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
     stream = roc.stream()
     ct = len(stream._signals)
     devarr = roc.to_device(hostarr, stream=stream)
     self.assertEqual(ct + 1, len(stream._signals))
     devarr.copy_to_host(gotarr, stream=stream)
     self.assertEqual(ct + 2, len(stream._signals))
     stream.synchronize()
     self.assertEqual(0, len(stream._signals))
     np.testing.assert_equal(hostarr, gotarr)
示例#4
0
 def test_device_device_transfer(self):
     # This has to be run in isolation and before the above
     # TODO: investigate why?!
     nelem = 1000
     expect = np.arange(nelem, dtype=np.int32) + 1
     logger.info('device array like')
     darr = roc.device_array_like(expect)
     self.assertTrue(np.all(expect != darr.copy_to_host()))
     logger.info('to_device')
     stage = roc.to_device(expect)
     logger.info('device -> device')
     darr.copy_to_device(stage)
     np.testing.assert_equal(expect, darr.copy_to_host())
示例#5
0
 def test_device_device_transfer(self):
     # This has to be run in isolation and before the above
     # TODO: investigate why?!
     nelem = 1000
     expect = np.arange(nelem, dtype=np.int32) + 1
     logger.info('device array like')
     darr = roc.device_array_like(expect)
     self.assertTrue(np.all(expect != darr.copy_to_host()))
     logger.info('to_device')
     stage = roc.to_device(expect)
     logger.info('device -> device')
     darr.copy_to_device(stage)
     np.testing.assert_equal(expect, darr.copy_to_host())
示例#6
0
 def test_async_copy_to_device_and_back(self):
     arr = np.arange(1024)
     hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
     gotarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
     stream = roc.stream()
     ct = len(stream._signals)
     devarr = roc.to_device(hostarr, stream=stream)
     self.assertEqual(ct + 1, len(stream._signals))
     devarr.copy_to_host(gotarr, stream=stream)
     self.assertEqual(ct + 2, len(stream._signals))
     stream.synchronize()
     self.assertEqual(0, len(stream._signals))
     np.testing.assert_equal(hostarr, gotarr)
    def test_ctx_managed_stream(self):
        logger.info("context info: %s", roc.get_context().agent)

        @roc.jit("int32[:], int32[:]")
        def add1_kernel(dst, src):
            i = roc.get_global_id(0)
            if i < dst.size:
                dst[i] = src[i] + 1

        blksz = 256
        gridsz = 10**5
        nitems = blksz * gridsz
        ntimes = 500

        arr = np.arange(nitems, dtype=np.int32)

        logger.info("make coarse_arr")
        coarse_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_arr[:] = arr

        logger.info("make coarse_res_arr")
        coarse_res_arr = roc.coarsegrain_array(shape=arr.shape,
                                               dtype=arr.dtype)
        coarse_res_arr[:] = 0

        logger.info("make stream")
        stream = roc.stream()

        with stream.auto_synchronize():
            logger.info("make gpu_res_arr")
            gpu_res_arr = roc.device_array_like(coarse_arr)

            logger.info("make gpu_arr")
            gpu_arr = roc.to_device(coarse_arr, stream=stream)

            for i in range(ntimes):
                logger.info("launch kernel: %d", i)
                add1_kernel[gridsz, blksz, stream](gpu_res_arr, gpu_arr)
                gpu_arr.copy_to_device(gpu_res_arr, stream=stream)

            logger.info("get kernel result")
            gpu_res_arr.copy_to_host(coarse_res_arr, stream=stream)

        logger.info("synchronize on ctx __exit__")

        logger.info("compare result")
        np.testing.assert_equal(coarse_res_arr, coarse_arr + ntimes)
示例#8
0
    def test_ctx_managed_stream(self):
        logger.info('context info: %s', roc.get_context().agent)

        @roc.jit("int32[:], int32[:]")
        def add1_kernel(dst, src):
            i = roc.get_global_id(0)
            if i < dst.size:
                dst[i] = src[i] + 1

        blksz = 256
        gridsz = 10**5
        nitems = blksz * gridsz
        ntimes = 500

        arr = np.arange(nitems, dtype=np.int32)

        logger.info('make coarse_arr')
        coarse_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_arr[:] = arr

        logger.info('make coarse_res_arr')
        coarse_res_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_res_arr[:] = 0

        logger.info("make stream")
        stream = roc.stream()

        with stream.auto_synchronize():
            logger.info('make gpu_res_arr')
            gpu_res_arr = roc.device_array_like(coarse_arr)

            logger.info('make gpu_arr')
            gpu_arr = roc.to_device(coarse_arr, stream=stream)

            for i in range(ntimes):
                logger.info('launch kernel: %d', i)
                add1_kernel[gridsz, blksz, stream](gpu_res_arr, gpu_arr)
                gpu_arr.copy_to_device(gpu_res_arr, stream=stream)

            logger.info('get kernel result')
            gpu_res_arr.copy_to_host(coarse_res_arr, stream=stream)

        logger.info("synchronize on ctx __exit__")

        logger.info("compare result")
        np.testing.assert_equal(coarse_res_arr, coarse_arr + ntimes)
示例#9
0
    def test_async_copy_to_device(self):
        arr = np.arange(1024)

        devarr = roc.to_device(arr)

        # allocate pinned array equivalent
        hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        hostarr[:] = arr + 100

        stream = roc.stream()
        ct = len(stream._signals)
        devarr.copy_to_device(hostarr, stream=stream)
        self.assertEqual(ct + 1, len(stream._signals), "no new async signal")
        # implicit synchronization
        got = devarr.copy_to_host()
        self.assertEqual(0, len(stream._signals),
                         "missing implicit synchronization")
        np.testing.assert_equal(hostarr, got)
示例#10
0
    def test_async_copy_to_device(self):
        arr = np.arange(1024)

        devarr = roc.to_device(arr)

        # allocate pinned array equivalent
        hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        hostarr[:] = arr + 100

        stream = roc.stream()
        ct = len(stream._signals)
        devarr.copy_to_device(hostarr, stream=stream)
        self.assertEqual(ct + 1, len(stream._signals),
                         "no new async signal")
        # implicit synchronization
        got = devarr.copy_to_host()
        self.assertEqual(0, len(stream._signals),
                         "missing implicit synchronization")
        np.testing.assert_equal(hostarr, got)