Пример #1
0
 def setUp(self):
     self.pool = memory.MemoryAsyncPool()
     self.unit = memory._allocation_unit_size
     self.stream = stream_module.Stream()
     self.stream_ident = self.stream.ptr
     cupy.get_default_memory_pool().free_all_blocks()
     cupy.cuda.Device().synchronize()
Пример #2
0
 def setUp(self):
     if cupy.cuda.runtime.deviceGetAttribute(
             cupy.cuda.runtime.cudaDevAttrMemoryPoolsSupported, 0) == 0:
         pytest.skip('malloc_async is not supported on device 0')
     self.pool = memory.MemoryAsyncPool()
     self.unit = memory._allocation_unit_size
     self.stream = stream_module.Stream()
     self.stream_ident = self.stream.ptr
     cupy.get_default_memory_pool().free_all_blocks()
     cupy.cuda.Device().synchronize()
Пример #3
0
c = cupy.random.random([extent[i] for i in mode_c])
a = a.astype(dtype)
c = c.astype(dtype)

desc_a = cutensor.create_tensor_descriptor(a)
desc_c = cutensor.create_tensor_descriptor(c)

alpha = 1.0
beta = 0.1

# rehearsal
c = cutensor.reduction(alpha, a, desc_a, mode_a, beta, c, desc_c, mode_c)

ev_start = stream.Event()
ev_end = stream.Event()
st = stream.Stream()
with st:
    # measurement
    ev_start.record()
    c = cutensor.reduction(alpha, a, desc_a, mode_a, beta, c, desc_c, mode_c)
    ev_end.record()
st.synchronize()

elapsed_ms = stream.get_elapsed_time(ev_start, ev_end)
transfer_byte = a.size * a.itemsize + c.size * c.itemsize
if beta != 0.0:
    transfer_byte += c.size * c.itemsize
gbs = transfer_byte / elapsed_ms / 1e6

print('dtype: {}'.format(numpy.dtype(dtype).name))
print('time (ms): {}'.format(elapsed_ms))
Пример #4
0
 def setUp(self):
     self.pool = memory.SingleDeviceMemoryPool(allocator=mock_alloc)
     self.unit = memory._allocation_unit_size
     self.stream = stream_module.Stream()
     self.stream_ptr = self.stream.ptr
Пример #5
0
 def setUp(self):
     self.stream = stream_module.Stream() if self.use_streams else None
Пример #6
0
def _get_stream(strm):
    if strm is None:
        return stream.Stream(null=True)
    else:
        return strm