def test_LRU_cache9(self): # test if memsizes in the cache adds up cache = config.get_plan_cache() assert cache.get_curr_size() == 0 <= cache.get_size() memsize = 0 a = testing.shaped_random((10, ), cupy, cupy.float32) cupy.fft.fft(a) assert cache.get_curr_size() == 1 <= cache.get_size() memsize += next(iter(cache))[1].plan.work_area.mem.size a = testing.shaped_random((48, ), cupy, cupy.complex64) cupy.fft.fft(a) assert cache.get_curr_size() == 2 <= cache.get_size() memsize += next(iter(cache))[1].plan.work_area.mem.size assert memsize == cache.get_curr_memsize()
def test_LRU_cache10(self): # test if deletion works and if show_info() is consistent with data cache = config.get_plan_cache() assert cache.get_curr_size() == 0 <= cache.get_size() curr_size = 0 size = 2 curr_memsize = 0 memsize = '(unlimited)' # default a = testing.shaped_random((16, 16), cupy, cupy.float32) cupy.fft.fft2(a) assert cache.get_curr_size() == 1 <= cache.get_size() node1 = next(iter(cache))[1] curr_size += 1 curr_memsize += node1.plan.work_area.mem.size stdout = intercept_stdout(cache.show_info) assert '{0} / {1} (counts)'.format(curr_size, size) in stdout assert '{0} / {1} (bytes)'.format(curr_memsize, memsize) in stdout assert str(node1) in stdout a = testing.shaped_random((1024, ), cupy, cupy.complex64) cupy.fft.ifft(a) assert cache.get_curr_size() == 2 <= cache.get_size() node2 = next(iter(cache))[1] curr_size += 1 curr_memsize += node2.plan.work_area.mem.size stdout = intercept_stdout(cache.show_info) assert '{0} / {1} (counts)'.format(curr_size, size) in stdout assert '{0} / {1} (bytes)'.format(curr_memsize, memsize) in stdout assert str(node2) + '\n' + str(node1) in stdout # test deletion key = node2.key del cache[key] assert cache.get_curr_size() == 1 <= cache.get_size() curr_size -= 1 curr_memsize -= node2.plan.work_area.mem.size stdout = intercept_stdout(cache.show_info) assert '{0} / {1} (counts)'.format(curr_size, size) in stdout assert '{0} / {1} (bytes)'.format(curr_memsize, memsize) in stdout assert str(node2) not in stdout
def test_LRU_cache2(self): # test if plan is reused cache = config.get_plan_cache() assert cache.get_curr_size() == 0 <= cache.get_size() # run once and fetch the cached plan a = testing.shaped_random((10, ), cupy, cupy.float32) cupy.fft.fft(a) assert cache.get_curr_size() == 1 <= cache.get_size() iterator = iter(cache) plan0 = next(iterator)[1].plan # repeat cupy.fft.fft(a) assert cache.get_curr_size() == 1 <= cache.get_size() iterator = iter(cache) plan1 = next(iterator)[1].plan # we should get the same plan assert plan0 is plan1
def test_LRU_cache8(self): # test if Plan1d and PlanNd can coexist in the same cache cache = config.get_plan_cache() assert cache.get_curr_size() == 0 <= cache.get_size() # do a 1D FFT a = testing.shaped_random((10, ), cupy, cupy.float32) cupy.fft.fft(a) assert cache.get_curr_size() == 1 <= cache.get_size() assert isinstance(next(iter(cache))[1].plan, cufft.Plan1d) # then a 3D FFT a = testing.shaped_random((8, 8, 8), cupy, cupy.complex128) cupy.fft.fftn(a) assert cache.get_curr_size() == 2 <= cache.get_size() iterator = iter(cache) # the cached order is 1. PlanNd, 2. Plan1d assert isinstance(next(iterator)[1].plan, cufft.PlanNd) assert isinstance(next(iterator)[1].plan, cufft.Plan1d)
def test_LRU_cache13(self): # test if plan insertion respect the memory size limit cache = config.get_plan_cache() cache.set_memsize(1024) # ensure a fresh state assert cache.get_curr_size() == 0 <= cache.get_size() # On CUDA 10.0 + sm75, this generates a plan of size 1024 bytes a = testing.shaped_random((128, ), cupy, cupy.complex64) cupy.fft.ifft(a) assert cache.get_curr_size() == 1 <= cache.get_size() assert cache.get_curr_memsize() == 1024 == cache.get_memsize() # a second plan (of same size) is generated, but the cache is full, # so the first plan is evicted a = testing.shaped_random((64, ), cupy, cupy.complex128) cupy.fft.ifft(a) assert cache.get_curr_size() == 1 <= cache.get_size() assert cache.get_curr_memsize() == 1024 == cache.get_memsize() plan = next(iter(cache))[1].plan # this plan is twice as large, so won't fit in a = testing.shaped_random((128, ), cupy, cupy.complex128) with pytest.raises(RuntimeError) as e: cupy.fft.ifft(a) assert 'memsize is too large' in str(e.value) # the cache remains intact assert cache.get_curr_size() == 1 <= cache.get_size() assert cache.get_curr_memsize() == 1024 == cache.get_memsize() plan1 = next(iter(cache))[1].plan assert plan1 is plan # double the cache size would make the plan just fit (and evict # the existing one) cache.set_memsize(2048) cupy.fft.ifft(a) assert cache.get_curr_size() == 1 <= cache.get_size() assert cache.get_curr_memsize() == 2048 == cache.get_memsize() plan2 = next(iter(cache))[1].plan assert plan2 is not plan
def test_LRU_cache4(self): # test if fetching the plan will reorder it to the top cache = config.get_plan_cache() assert cache.get_curr_size() == 0 <= cache.get_size() # this creates a Plan1d a = testing.shaped_random((10, ), cupy, cupy.float32) cupy.fft.fft(a) assert cache.get_curr_size() == 1 <= cache.get_size() # this creates a PlanNd a = testing.shaped_random((10, 20), cupy, cupy.float32) cupy.fft.fftn(a) assert cache.get_curr_size() == 2 <= cache.get_size() # The first in the cache is the most recently used one; # using an iterator to access the linked list guarantees that # we don't alter the cache order iterator = iter(cache) assert isinstance(next(iterator)[1].plan, cufft.PlanNd) assert isinstance(next(iterator)[1].plan, cufft.Plan1d) with pytest.raises(StopIteration): next(iterator) # this brings Plan1d to the top a = testing.shaped_random((10, ), cupy, cupy.float32) cupy.fft.fft(a) assert cache.get_curr_size() == 2 <= cache.get_size() iterator = iter(cache) assert isinstance(next(iterator)[1].plan, cufft.Plan1d) assert isinstance(next(iterator)[1].plan, cufft.PlanNd) with pytest.raises(StopIteration): next(iterator) # An LRU cache guarantees that such a silly operation never # raises StopIteration iterator = iter(cache) for i in range(100): cache[next(iterator)[0]]
def test_LRU_cache3(self): # test if cache size is limited cache = config.get_plan_cache() assert cache.get_curr_size() == 0 <= cache.get_size() # run once and fetch the cached plan a = testing.shaped_random((10, ), cupy, cupy.float32) cupy.fft.fft(a) assert cache.get_curr_size() == 1 <= cache.get_size() iterator = iter(cache) plan = next(iterator)[1].plan # run another two FFTs with different sizes so that the first # plan is discarded from the cache a = testing.shaped_random((20, ), cupy, cupy.float32) cupy.fft.fft(a) assert cache.get_curr_size() == 2 <= cache.get_size() a = testing.shaped_random((30, ), cupy, cupy.float32) cupy.fft.fft(a) assert cache.get_curr_size() == 2 <= cache.get_size() # check if the first plan is indeed not cached for _, node in cache: assert plan is not node.plan
def init_caches(gpus): for i in gpus: with device.Device(i): config.get_plan_cache()