示例#1
0
    def test_against_fft_2d_mgpu(self):
        from pyculib.fft.binding import Plan, CUFFT_R2C
        rank = 64
        rowsize = 64
        N = rank * rowsize
        x = np.arange(N, dtype=np.float32)

        halfZ = rowsize // 2 + 1
        xh = np.arange(rank * halfZ, dtype=np.complex64)
        for j in range(rank):
            for i in range(halfZ - 1):
                ii = j * rowsize + 2 * i
                r = x[ii]
                if ii + 1 < N:
                    imag = x[ii + 1]
                else:
                    imag = 0
                xh[j * halfZ + i] = np.complex(r, imag)
            xh[j * halfZ + halfZ - 1] = 0

        x = x.reshape(rank, rowsize)
        xh = xh.reshape(rank, halfZ)

        xf = np.fft.fft2(x)

        plan = Plan.many([rank, rowsize], CUFFT_R2C, 1, 2)
        d_x_gpu = plan.to_device(xh)
        xf_gpu = np.zeros(shape=(rank, halfZ), dtype=np.complex64)
        #d_xf_gpu = plan.to_device(xf_gpu)
        plan.forward(d_x_gpu, d_x_gpu)  #Inplace
        d_x_gpu.copy_to_host(xf_gpu)

        self.assertTrue(np.allclose(xf[:, 0:halfZ], xf_gpu, atol=1e-6))
示例#2
0
    def test_against_fft_1d_mgpu(self):
        return True

        from pyculib.fft.binding import Plan, CUFFT_R2C, CUFFT_C2R
        N = 32
        x = np.arange(N, dtype=np.float32)

        halfZ = N // 2 + 1
        xh = np.arange(halfZ, dtype=np.complex64)
        for i in range(halfZ - 1):
            r = x[2 * i]
            if 2 * i + 1 < N:
                imag = x[2 * i + 1]
            else:
                imag = 0
            xh[i] = np.complex(r, imag)
        xh[halfZ - 1] = 0

        print(x)
        print(xh)

        xf = np.fft.fft(x)

        plan = Plan.many([N], CUFFT_R2C, 1, 2)
        d_x_gpu = plan.to_device(xh)
        xf_gpu = np.zeros(halfZ, dtype=np.complex64)
        #d_xf_gpu = plan.to_device(xf_gpu)
        plan.forward(d_x_gpu, d_x_gpu)
        d_x_gpu.copy_to_host(xf_gpu)

        self.assertTrue(np.allclose(xf[0:halfZ], xf_gpu, atol=1e-6))
示例#3
0
 def test_against_fft_1d(self):
     from pyculib.fft.binding import Plan, CUFFT_R2C
     N = 128
     x = np.asarray(np.arange(N), dtype=np.float32)
     xf = np.fft.fft(x)
     d_x_gpu = cuda.to_device(x)
     xf_gpu = np.zeros(N//2+1, np.complex64)
     d_xf_gpu = cuda.to_device(xf_gpu)
     plan = Plan.many(x.shape, CUFFT_R2C)
     plan.forward(d_x_gpu, d_xf_gpu)
     d_xf_gpu.copy_to_host(xf_gpu)
     self.assertTrue( np.allclose(xf[0:N//2+1], xf_gpu,
                                   atol=1e-6) )
示例#4
0
 def test_against_fft_2d(self):
     from pyculib.fft.binding import Plan, CUFFT_R2C
     rank = 2
     rowsize = 128
     N = rowsize * rank
     x = np.arange(N, dtype=np.float32).reshape(rank, rowsize)
     xf = np.fft.fft2(x)
     d_x_gpu = cuda.to_device(x)
     xf_gpu = np.zeros(shape=(rank, rowsize//2 + 1), dtype=np.complex64)
     d_xf_gpu = cuda.to_device(xf_gpu)
     plan = Plan.many(x.shape, CUFFT_R2C)
     plan.forward(d_x_gpu, d_xf_gpu)
     d_xf_gpu.copy_to_host(xf_gpu)
     self.assertTrue(np.allclose(xf[:, 0:rowsize//2+1], xf_gpu, atol=1e-6))
示例#5
0
    def test_against_fft_3d(self):
        from pyculib.fft.binding import Plan, CUFFT_R2C
        depth = 2
        colsize = 2
        rowsize = 64
        N = depth * colsize * rowsize
        x = np.arange(N, dtype=np.float32).reshape(depth, colsize, rowsize)

        xf = np.fft.fftn(x)

        halfZ = rowsize // 2 + 1
        plan = Plan.many(x.shape, CUFFT_R2C)
        d_x_gpu = plan.to_device(x)
        xf_gpu = np.zeros(shape=(depth, colsize, halfZ), dtype=np.complex64)
        d_xf_gpu = plan.to_device(xf_gpu)
        plan.forward(d_x_gpu, d_xf_gpu)
        d_xf_gpu.copy_to_host(xf_gpu)

        self.assertTrue(np.allclose(xf[:, :, 0:halfZ], xf_gpu, atol=1e-6))
示例#6
0
    def test_against_fft_3d_mgpu(self):
        from pyculib.fft.binding import Plan, CUFFT_R2C
        depth = 32
        colsize = 32
        rowsize = 32
        N = depth * colsize * rowsize
        x = np.arange(N, dtype=np.float32)

        halfZ = rowsize // 2 + 1
        xh = np.arange(depth * colsize * halfZ, dtype=np.complex64)
        for k in range(depth):
            for j in range(colsize):
                for i in range(halfZ - 1):
                    ii = k * colsize * rowsize + j * rowsize + 2 * i
                    r = x[ii]
                    if ii + 1 < N:
                        imag = x[ii + 1]
                    else:
                        imag = 0
                    xh[k * colsize * halfZ + j * halfZ + i] = np.complex(
                        r, imag)

                xh[k * colsize * halfZ + j * halfZ + halfZ - 1] = 0

        x = x.reshape(depth, colsize, rowsize)
        xh = xh.reshape(depth, colsize, halfZ)

        xf = np.fft.fftn(x)

        plan = Plan.many([depth, colsize, rowsize], CUFFT_R2C, 1, 2)
        d_x_gpu = plan.to_device(xh)
        xf_gpu = np.zeros(shape=(depth, colsize, halfZ), dtype=np.complex64)
        #d_xf_gpu = plan.to_device(xf_gpu)
        plan.forward(d_x_gpu, d_x_gpu)
        d_x_gpu.copy_to_host(xf_gpu)

        self.assertTrue(np.allclose(xf[:, :, 0:halfZ], xf_gpu, atol=1e-6))
示例#7
0
# f = fft.FFTPlan(img_shape, np.complex64, np.complex64, 1, 0, fft.FFTPlan.MODE_FFTW_PADDING)

from pyculib.fft.binding import Plan, CUFFT_C2C
from pyculib import blas as cublas
n = (128 * 10)**2
data1 = np.arange(n, dtype=np.complex64).reshape(2, n // 2)
data = np.arange(n, dtype=np.complex64)
orig = data.copy()
d_data = cuda.to_device(data)
#s0 = cuda.stream()
# cuda.select_device(1)
# d_data1 = cuda.to_device(data)
#s1 = cuda.stream()
# fftplan = Plan.one(CUFFT_C2C, *data.shape)
# Plan.many()
fftplan1 = Plan.many(data.shape, CUFFT_C2C, 1500)
b = cublas.Blas()
rounds = 10000
start = time.clock()

for x in range(rounds):
    # fft.fft_inplace(img)
    # cuda.select_device(0)
    # fftplan1.forward(d_data, d_data)
    # fftplan1.inverse(d_data, d_data)
    # cuda.select_device(1)
    # fftplan1.forward(d_data1, d_data1)
    #fftplan1.forward(d_data1, d_data1)
    # fftplan.inverse(d_data, d_data)
    # d_data = cuda.to_device(data)
    # cublas.dot(d_data, d_data)