def test_2d_real_to_complex_double(self, ctx): if not has_double(ctx): #TODO: find better way to skip test return queue = cl.CommandQueue(ctx) M = 64 N = 32 nd_data = np.arange(M * N, dtype=np.float64) nd_data.shape = (M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros(queue, (M, N // 2 + 1), dtype=np.complex128) transform = FFT( ctx, queue, cl_data, cl_data_transformed, axes=(1, 0), ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.rfft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.rfft2(nd_data), rtol=1e-8, atol=1e-8)
def test_2d_real_to_complex(self, ctx): queue = cl.CommandQueue(ctx) M = 64 N = 32 nd_data = np.arange(M * N, dtype=np.float32) nd_data.shape = (M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros(queue, (M, N // 2 + 1), dtype=np.complex64) transform = FFT( ctx, queue, cl_data, cl_data_transformed, axes=(1, 0), ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.rfft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.rfft2(nd_data), rtol=1e-3, atol=1e-3)
def test_2d_in_4d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) L1 = 4 L2 = 5 M = 64 N = 32 axes = (-1, -2) #ok #axes = (0,1) #ok #axes = (0,2) #cannot be collapsed nd_data = np.arange(L1 * L2 * M * N, dtype=np.complex64) nd_data.shape = (L1, L2, M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT( ctx, queue, cl_data, cl_data_transformed, axes=axes, ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.fft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.fft2(nd_data, axes=axes), rtol=1e-3, atol=1e-3)
def test_2d_real_to_complex(self, ctx): queue = cl.CommandQueue(ctx) M = 64 N = 32 nd_data = np.arange(M*N, dtype=np.float32) nd_data.shape = (M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex64) transform = FFT(ctx, queue, cl_data, cl_data_transformed, axes = (1,0), ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.rfft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.rfft2(nd_data), rtol=1e-3, atol=1e-3)
def test_2d_real_to_complex_double(self, ctx): if not has_double(ctx): #TODO: find better way to skip test return queue = cl.CommandQueue(ctx) M = 64 N = 32 nd_data = np.arange(M*N, dtype=np.float64) nd_data.shape = (M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex128) transform = FFT(ctx, queue, cl_data, cl_data_transformed, axes = (1,0), ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.rfft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.rfft2(nd_data), rtol=1e-8, atol=1e-8)
def test_2d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) L = 4 M = 64 N = 32 axes = (-1, -2) nd_data = np.arange(L * M * N, dtype=np.complex64) nd_data.shape = (L, M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT( ctx, queue, cl_data, cl_data_transformed, axes=axes, ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.fft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.fft2(nd_data, axes=axes), rtol=1e-3, atol=1e-3)
def test_2d_in_4d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) L1 = 4 L2 = 5 M = 64 N = 32 axes = (-1, -2) #ok #axes = (0,1) #ok #axes = (0,2) #cannot be collapsed nd_data = np.arange(L1*L2*M*N, dtype=np.complex64) nd_data.shape = (L1, L2, M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT(ctx, queue, cl_data, cl_data_transformed, axes = axes, ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.fft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.fft2(nd_data, axes=axes), rtol=1e-3, atol=1e-3)
def test_2d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) L = 4 M = 64 N = 32 axes = (-1, -2) nd_data = np.arange(L*M*N, dtype=np.complex64) nd_data.shape = (L, M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT(ctx, queue, cl_data, cl_data_transformed, axes = axes, ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.fft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.fft2(nd_data, axes=axes), rtol=1e-3, atol=1e-3)
def test_1d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) nd_data = np.arange(32, dtype=np.complex64) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT(ctx, queue, cl_data, cl_data_transformed) transform.enqueue() assert np.allclose(cl_data_transformed.get(), np.fft.fft(nd_data))
def test_1d_inplace_double(self, ctx): if not has_double(ctx): #TODO: find better way to skip test return queue = cl.CommandQueue(ctx) nd_data = np.arange(32, dtype=np.complex128) cl_data = cla.to_device(queue, nd_data) transform = FFT(ctx, queue, cl_data) transform.enqueue() assert np.allclose(cl_data.get(), np.fft.fft(nd_data))
def test_1d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) nd_data = np.arange(32, dtype=np.complex64) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT(ctx, queue, cl_data, cl_data_transformed ) transform.enqueue() assert np.allclose(cl_data_transformed.get(), np.fft.fft(nd_data))
def test_1d_real_to_complex(self, ctx): queue = cl.CommandQueue(ctx) N = 32 nd_data = np.arange(N, dtype=np.float32) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros(queue, (N//2+1,), dtype = np.complex64) transform = FFT(ctx, queue, cl_data, cl_data_transformed, ) transform.enqueue() assert np.allclose(cl_data_transformed.get(), np.fft.rfft(nd_data))
def run(double_precision=False): context = cl.create_some_context() queue = cl.CommandQueue(context) dtype = np.complex64 if not double_precision else np.complex128 n_run = 100 #set to 1 for testing for correct result if n_run > 1: nd_dataC = np.random.normal(size=(1024, 1024)).astype(dtype) else: nd_dataC = np.ones((1024, 1024), dtype=dtype) #set n_run to 1 nd_dataF = np.asfortranarray(nd_dataC) dataC = cla.to_device(queue, nd_dataC) dataF = cla.to_device(queue, nd_dataF) nd_result = np.zeros_like(nd_dataC, dtype=dtype) resultC = cla.to_device(queue, nd_result) resultF = cla.to_device(queue, np.asfortranarray(nd_result)) result = resultF axes_list = [(-2, -1), (-1, -2), None] #batched 2d transforms if True: print('out of place transforms', dataC.shape, dataC.dtype) print('axes in out') for axes in axes_list: for data in (dataC, dataF): for result in (resultC, resultF): t_ms, gflops = 0, 0 try: transform = FFT(context, queue, data, result, axes=axes) #transform.plan.transpose_result = True #not implemented for some transforms (works e.g. for out of place, (2,1) C C) print( '%-10s %3s %3s' % ( axes, 'C' if data.flags.c_contiguous else 'F', 'C' if result.flags.c_contiguous else 'F', ), end=' ', ) tic = timeit.default_timer() for i in range(n_run): events = transform.enqueue() #events = transform.enqueue(False) for e in events: e.wait() toc = timeit.default_timer() t_ms = 1e3 * (toc - tic) / n_run gflops = 5e-9 * np.log2(np.prod( transform.t_shape)) * np.prod( transform.t_shape) * transform.batchsize / ( 1e-3 * t_ms) npfft_result = npfftn(nd_dataC, axes=axes) if transform.plan.transpose_result: npfft_result = np.swapaxes(npfft_result, axes[0], axes[1]) max_error = np.max(abs(result.get() - npfft_result)) print('%8.1e' % max_error, end=' ') assert_allclose( result.get(), npfft_result, atol=1e-8 if double_precision else 1e-3, rtol=1e-8 if double_precision else 1e-3) #assert_array_almost_equal(abs(result.get() - npfftn(data.get(), axes = axes)), # 1e-4) except GpyFFT_Error as e: print(e) except AssertionError as e: print(e) except Exception as e: print(e) finally: print('%5.2fms %6.2f Gflops' % (t_ms, gflops)) print('in place transforms', nd_dataC.shape, nd_dataC.dtype) for axes in axes_list: for nd_data in (nd_dataC, nd_dataF): data = cla.to_device(queue, nd_data) transform = FFT(context, queue, data, axes=axes) #transform.plan.transpose_result = True #not implemented tic = timeit.default_timer() for i in range(n_run): # inplace transform fails for n_run > 1 events = transform.enqueue() for e in events: e.wait() toc = timeit.default_timer() t_ms = 1e3 * (toc - tic) / n_run gflops = 5e-9 * np.log2(np.prod(transform.t_shape)) * np.prod( transform.t_shape) * transform.batchsize / (1e-3 * t_ms) print( '%-10s %3s %5.2fms %6.2f Gflops' % (axes, 'C' if data.flags.c_contiguous else 'F', t_ms, gflops))
def run(double_precision=False): context = cl.create_some_context() queue = cl.CommandQueue(context) dtype = np.complex64 if not double_precision else np.complex128 n_run = 100 #set to 1 for proper testing if n_run > 1: nd_dataC = np.random.normal(size=(4,1024, 1024)).astype(dtype) #faster than 1024x1024? else: nd_dataC = np.ones((4,1024, 1024), dtype = dtype) #set n_run to 1 nd_dataF = np.asfortranarray(nd_dataC) dataC = cla.to_device(queue, nd_dataC) dataF = cla.to_device(queue, nd_dataF) nd_result = np.zeros_like(nd_dataC, dtype = dtype) resultC = cla.to_device(queue, nd_result) resultF = cla.to_device(queue, np.asfortranarray(nd_result)) result = resultF axes_list = [(1,2), (2,1)] #batched 2d transforms if True: print('out of place transforms', dataC.shape, dataC.dtype) print('axes in out') for axes in axes_list: for data in (dataC, dataF): for result in (resultC, resultF): try: transform = FFT(context, queue, data, result, axes = axes) #transform.plan.transpose_result = True #not implemented for some transforms (works e.g. for out of place, (2,1) C C) print('%-10s %3s %3s' % ( axes, 'C' if data.flags.c_contiguous else 'F', 'C' if result.flags.c_contiguous else 'F', ), end=' ', ) tic = timeit.default_timer() for i in range(n_run): events = transform.enqueue() #events = transform.enqueue(False) for e in events: e.wait() toc = timeit.default_timer() t_ms = 1e3*(toc-tic)/n_run gflops = 5e-9 * np.log2(np.prod(transform.t_shape))*np.prod(transform.t_shape) * transform.batchsize / (1e-3*t_ms) npfft_result = npfftn(nd_dataC, axes = axes) if transform.plan.transpose_result: npfft_result = np.swapaxes(npfft_result, axes[0], axes[1]) max_error = np.max(abs(result.get() - npfft_result)) print('%8.1e'%max_error, end=' ') assert_allclose(result.get(), npfft_result, atol = 1e-8 if double_precision else 1e-3, rtol = 1e-8 if double_precision else 1e-3) #assert_array_almost_equal(abs(result.get() - npfftn(data.get(), axes = axes)), # 1e-4) except GpyFFT_Error as e: print(e) t_ms, gflops = 0, 0 except AssertionError as e: print(e) finally: print('%5.2fms %6.2f Gflops' % (t_ms, gflops) ) print('in place transforms', nd_dataC.shape, nd_dataC.dtype) for axes in axes_list: for nd_data in (nd_dataC, nd_dataF): data = cla.to_device(queue, nd_data) transform = FFT(context, queue, data, axes = axes) #transform.plan.transpose_result = True #not implemented tic = timeit.default_timer() for i in range(n_run): # inplace transform fails for n_run > 1 events = transform.enqueue() for e in events: e.wait() toc = timeit.default_timer() t_ms = 1e3*(toc-tic)/n_run gflops = 5e-9 * np.log2(np.prod(transform.t_shape))*np.prod(transform.t_shape) * transform.batchsize / (1e-3*t_ms) print('%-10s %3s %5.2fms %6.2f Gflops' % ( axes, 'C' if data.flags.c_contiguous else 'F', t_ms, gflops ))