Пример #1
0
    def test_2d_real_to_complex_double(self, ctx):
        if not has_double(ctx):  #TODO: find better way to skip test
            return
        queue = cl.CommandQueue(ctx)

        M = 64
        N = 32

        nd_data = np.arange(M * N, dtype=np.float64)
        nd_data.shape = (M, N)
        cl_data = cla.to_device(queue, nd_data)

        cl_data_transformed = cla.zeros(queue, (M, N // 2 + 1),
                                        dtype=np.complex128)

        transform = FFT(
            ctx,
            queue,
            cl_data,
            cl_data_transformed,
            axes=(1, 0),
        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.rfft2(nd_data))

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.rfft2(nd_data),
                           rtol=1e-8,
                           atol=1e-8)
Пример #2
0
    def test_2d_real_to_complex(self, ctx):
        queue = cl.CommandQueue(ctx)

        M = 64
        N = 32

        nd_data = np.arange(M * N, dtype=np.float32)
        nd_data.shape = (M, N)
        cl_data = cla.to_device(queue, nd_data)

        cl_data_transformed = cla.zeros(queue, (M, N // 2 + 1),
                                        dtype=np.complex64)

        transform = FFT(
            ctx,
            queue,
            cl_data,
            cl_data_transformed,
            axes=(1, 0),
        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.rfft2(nd_data))

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.rfft2(nd_data),
                           rtol=1e-3,
                           atol=1e-3)
Пример #3
0
    def test_2d_in_4d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        L1 = 4
        L2 = 5

        M = 64
        N = 32
        axes = (-1, -2)  #ok
        #axes = (0,1) #ok
        #axes = (0,2) #cannot be collapsed

        nd_data = np.arange(L1 * L2 * M * N, dtype=np.complex64)
        nd_data.shape = (L1, L2, M, N)
        cl_data = cla.to_device(queue, nd_data)

        cl_data_transformed = cla.zeros_like(cl_data)

        transform = FFT(
            ctx,
            queue,
            cl_data,
            cl_data_transformed,
            axes=axes,
        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.fft2(nd_data))

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft2(nd_data, axes=axes),
                           rtol=1e-3,
                           atol=1e-3)
Пример #4
0
    def test_2d_real_to_complex(self, ctx):
        queue = cl.CommandQueue(ctx)
        
        M = 64
        N = 32

        nd_data = np.arange(M*N, dtype=np.float32)
        nd_data.shape = (M, N)
        cl_data = cla.to_device(queue, nd_data)
        
        cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex64)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed,
                        axes = (1,0),
                        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.rfft2(nd_data))
        
        assert np.allclose(cl_data_transformed.get(),
                           np.fft.rfft2(nd_data),
                           rtol=1e-3, atol=1e-3)
Пример #5
0
    def test_2d_real_to_complex_double(self, ctx):
        if not has_double(ctx): #TODO: find better way to skip test
            return
        queue = cl.CommandQueue(ctx)
        
        M = 64
        N = 32

        nd_data = np.arange(M*N, dtype=np.float64)
        nd_data.shape = (M, N)
        cl_data = cla.to_device(queue, nd_data)
        
        cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex128)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed,
                        axes = (1,0),
                        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.rfft2(nd_data))
        
        assert np.allclose(cl_data_transformed.get(),
                           np.fft.rfft2(nd_data),
                           rtol=1e-8, atol=1e-8)
Пример #6
0
    def test_2d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        L = 4
        M = 64
        N = 32
        axes = (-1, -2)

        nd_data = np.arange(L * M * N, dtype=np.complex64)
        nd_data.shape = (L, M, N)
        cl_data = cla.to_device(queue, nd_data)

        cl_data_transformed = cla.zeros_like(cl_data)

        transform = FFT(
            ctx,
            queue,
            cl_data,
            cl_data_transformed,
            axes=axes,
        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.fft2(nd_data))

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft2(nd_data, axes=axes),
                           rtol=1e-3,
                           atol=1e-3)
Пример #7
0
    def test_2d_in_4d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        L1 = 4
        L2 = 5
        
        M = 64
        N = 32
        axes = (-1, -2) #ok
        #axes = (0,1) #ok
        #axes = (0,2) #cannot be collapsed
        
        nd_data = np.arange(L1*L2*M*N, dtype=np.complex64)
        nd_data.shape = (L1, L2, M, N)
        cl_data = cla.to_device(queue, nd_data)
        
        cl_data_transformed = cla.zeros_like(cl_data)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed,
                        axes = axes,
                        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.fft2(nd_data))
        
        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft2(nd_data, axes=axes),
                           rtol=1e-3, atol=1e-3)
Пример #8
0
    def test_2d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        L = 4
        M = 64
        N = 32
        axes = (-1, -2)
        
        nd_data = np.arange(L*M*N, dtype=np.complex64)
        nd_data.shape = (L, M, N)
        cl_data = cla.to_device(queue, nd_data)
        
        cl_data_transformed = cla.zeros_like(cl_data)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed,
                        axes = axes,
                        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.fft2(nd_data))
        
        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft2(nd_data, axes=axes),
                           rtol=1e-3, atol=1e-3)
Пример #9
0
    def test_1d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        nd_data = np.arange(32, dtype=np.complex64)
        cl_data = cla.to_device(queue, nd_data)
        cl_data_transformed = cla.zeros_like(cl_data)

        transform = FFT(ctx, queue, cl_data, cl_data_transformed)
        transform.enqueue()

        assert np.allclose(cl_data_transformed.get(), np.fft.fft(nd_data))
Пример #10
0
    def test_1d_inplace_double(self, ctx):
        if not has_double(ctx):  #TODO: find better way to skip test
            return
        queue = cl.CommandQueue(ctx)

        nd_data = np.arange(32, dtype=np.complex128)
        cl_data = cla.to_device(queue, nd_data)

        transform = FFT(ctx, queue, cl_data)
        transform.enqueue()

        assert np.allclose(cl_data.get(), np.fft.fft(nd_data))
Пример #11
0
    def test_1d_inplace_double(self, ctx):
        if not has_double(ctx): #TODO: find better way to skip test
            return
        queue = cl.CommandQueue(ctx)
        
        nd_data = np.arange(32, dtype=np.complex128)
        cl_data = cla.to_device(queue, nd_data)
        
        transform = FFT(ctx, queue,
                        cl_data)
        transform.enqueue()

        assert np.allclose(cl_data.get(),
                           np.fft.fft(nd_data))
Пример #12
0
    def test_1d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)
        
        nd_data = np.arange(32, dtype=np.complex64)
        cl_data = cla.to_device(queue, nd_data)
        cl_data_transformed = cla.zeros_like(cl_data)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed
        )
        transform.enqueue()

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft(nd_data))
Пример #13
0
    def test_1d_real_to_complex(self, ctx):
        queue = cl.CommandQueue(ctx)
        
        N = 32

        nd_data = np.arange(N, dtype=np.float32)
        cl_data = cla.to_device(queue, nd_data)
        cl_data_transformed = cla.zeros(queue, (N//2+1,), dtype = np.complex64)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed,
        )
        transform.enqueue()

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.rfft(nd_data))
Пример #14
0
    def test_1d_real_to_complex(self, ctx):
        queue = cl.CommandQueue(ctx)
        
        N = 32

        nd_data = np.arange(N, dtype=np.float32)
        cl_data = cla.to_device(queue, nd_data)
        cl_data_transformed = cla.zeros(queue, (N//2+1,), dtype = np.complex64)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed,
        )
        transform.enqueue()

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.rfft(nd_data))
Пример #15
0
def run(double_precision=False):
    context = cl.create_some_context()
    queue = cl.CommandQueue(context)

    dtype = np.complex64 if not double_precision else np.complex128

    n_run = 100  #set to 1 for testing for correct result

    if n_run > 1:
        nd_dataC = np.random.normal(size=(1024, 1024)).astype(dtype)
    else:
        nd_dataC = np.ones((1024, 1024), dtype=dtype)  #set n_run to 1

    nd_dataF = np.asfortranarray(nd_dataC)
    dataC = cla.to_device(queue, nd_dataC)
    dataF = cla.to_device(queue, nd_dataF)

    nd_result = np.zeros_like(nd_dataC, dtype=dtype)
    resultC = cla.to_device(queue, nd_result)
    resultF = cla.to_device(queue, np.asfortranarray(nd_result))
    result = resultF

    axes_list = [(-2, -1), (-1, -2), None]  #batched 2d transforms

    if True:
        print('out of place transforms', dataC.shape, dataC.dtype)
        print('axes         in out')
        for axes in axes_list:
            for data in (dataC, dataF):
                for result in (resultC, resultF):
                    t_ms, gflops = 0, 0
                    try:

                        transform = FFT(context,
                                        queue,
                                        data,
                                        result,
                                        axes=axes)
                        #transform.plan.transpose_result = True #not implemented for some transforms (works e.g. for out of place, (2,1) C C)
                        print(
                            '%-10s %3s %3s' % (
                                axes,
                                'C' if data.flags.c_contiguous else 'F',
                                'C' if result.flags.c_contiguous else 'F',
                            ),
                            end=' ',
                        )

                        tic = timeit.default_timer()
                        for i in range(n_run):
                            events = transform.enqueue()
                            #events = transform.enqueue(False)
                        for e in events:
                            e.wait()
                        toc = timeit.default_timer()
                        t_ms = 1e3 * (toc - tic) / n_run
                        gflops = 5e-9 * np.log2(np.prod(
                            transform.t_shape)) * np.prod(
                                transform.t_shape) * transform.batchsize / (
                                    1e-3 * t_ms)

                        npfft_result = npfftn(nd_dataC, axes=axes)
                        if transform.plan.transpose_result:
                            npfft_result = np.swapaxes(npfft_result, axes[0],
                                                       axes[1])
                        max_error = np.max(abs(result.get() - npfft_result))
                        print('%8.1e' % max_error, end=' ')
                        assert_allclose(
                            result.get(),
                            npfft_result,
                            atol=1e-8 if double_precision else 1e-3,
                            rtol=1e-8 if double_precision else 1e-3)

                        #assert_array_almost_equal(abs(result.get() - npfftn(data.get(), axes = axes)),
                        #                          1e-4)

                    except GpyFFT_Error as e:
                        print(e)
                    except AssertionError as e:
                        print(e)
                    except Exception as e:
                        print(e)
                    finally:
                        print('%5.2fms %6.2f Gflops' % (t_ms, gflops))

        print('in place transforms', nd_dataC.shape, nd_dataC.dtype)

    for axes in axes_list:
        for nd_data in (nd_dataC, nd_dataF):
            data = cla.to_device(queue, nd_data)
            transform = FFT(context, queue, data, axes=axes)
            #transform.plan.transpose_result = True #not implemented
            tic = timeit.default_timer()
            for i in range(n_run):  # inplace transform fails for n_run > 1
                events = transform.enqueue()
            for e in events:
                e.wait()
            toc = timeit.default_timer()
            t_ms = 1e3 * (toc - tic) / n_run
            gflops = 5e-9 * np.log2(np.prod(transform.t_shape)) * np.prod(
                transform.t_shape) * transform.batchsize / (1e-3 * t_ms)
            print(
                '%-10s %3s %5.2fms %6.2f Gflops' %
                (axes, 'C' if data.flags.c_contiguous else 'F', t_ms, gflops))
Пример #16
0
def run(double_precision=False):
    context = cl.create_some_context()
    queue = cl.CommandQueue(context)

    dtype = np.complex64 if not double_precision else np.complex128
    
    n_run = 100 #set to 1 for proper testing

    if n_run > 1:
        nd_dataC = np.random.normal(size=(4,1024, 1024)).astype(dtype) #faster than 1024x1024?
    else:
        nd_dataC = np.ones((4,1024, 1024), dtype = dtype) #set n_run to 1

    nd_dataF = np.asfortranarray(nd_dataC)
    dataC = cla.to_device(queue, nd_dataC)
    dataF = cla.to_device(queue, nd_dataF)

    nd_result = np.zeros_like(nd_dataC, dtype = dtype)
    resultC = cla.to_device(queue, nd_result)
    resultF = cla.to_device(queue, np.asfortranarray(nd_result))
    result = resultF

    axes_list = [(1,2), (2,1)] #batched 2d transforms

    if True:
        print('out of place transforms', dataC.shape, dataC.dtype)
        print('axes         in out')
        for axes in axes_list:
            for data in (dataC,
                         dataF):
                for result in (resultC,
                               resultF):
                    try:

                        transform = FFT(context, queue, data, result, axes = axes)
                        #transform.plan.transpose_result = True #not implemented for some transforms (works e.g. for out of place, (2,1) C C)
                        print('%-10s %3s %3s'
                               % (
                                   axes,
                                   'C' if data.flags.c_contiguous else 'F',
                                   'C' if result.flags.c_contiguous else 'F',
                               ),
                              end=' ',
                        )
                        
                        tic = timeit.default_timer()
                        for i in range(n_run):
                            events = transform.enqueue()
                            #events = transform.enqueue(False)
                        for e in events:
                            e.wait()
                        toc = timeit.default_timer()
                        t_ms = 1e3*(toc-tic)/n_run
                        gflops = 5e-9 * np.log2(np.prod(transform.t_shape))*np.prod(transform.t_shape) * transform.batchsize / (1e-3*t_ms)

                        npfft_result = npfftn(nd_dataC, axes = axes)
                        if transform.plan.transpose_result:
                            npfft_result = np.swapaxes(npfft_result, axes[0], axes[1])
                        max_error = np.max(abs(result.get() - npfft_result))
                        print('%8.1e'%max_error, end=' ')
                        assert_allclose(result.get(), npfft_result,
                                        atol = 1e-8 if double_precision else 1e-3,
                                        rtol = 1e-8 if double_precision else 1e-3)
                        
                        #assert_array_almost_equal(abs(result.get() - npfftn(data.get(), axes = axes)),
                        #                          1e-4)

   
                    except GpyFFT_Error as e:
                        print(e)
                        t_ms, gflops = 0, 0
                    except AssertionError as e:
                        print(e)
                    finally:
                        print('%5.2fms %6.2f Gflops' % (t_ms, gflops) )

        print('in place transforms', nd_dataC.shape, nd_dataC.dtype)

    for axes in axes_list:
        for nd_data in (nd_dataC, nd_dataF):
            data = cla.to_device(queue, nd_data)
            transform = FFT(context, queue, data, axes = axes)
            #transform.plan.transpose_result = True #not implemented
            tic = timeit.default_timer()
            for i in range(n_run):  # inplace transform fails for n_run > 1
                events = transform.enqueue()
            for e in events:
                    e.wait()
            toc = timeit.default_timer()
            t_ms = 1e3*(toc-tic)/n_run
            gflops = 5e-9 * np.log2(np.prod(transform.t_shape))*np.prod(transform.t_shape) * transform.batchsize / (1e-3*t_ms)
            print('%-10s %3s %5.2fms %6.2f Gflops' % (
                axes,
                'C' if data.flags.c_contiguous else 'F',
                t_ms, gflops
                ))