Exemplo n.º 1
0
    def test_against_fft_2d_mgpu(self):
        from pyculib.fft.binding import Plan, CUFFT_R2C
        rank = 64
        rowsize = 64
        N = rank * rowsize
        x = np.arange(N, dtype=np.float32)

        halfZ = rowsize // 2 + 1
        xh = np.arange(rank * halfZ, dtype=np.complex64)
        for j in range(rank):
            for i in range(halfZ - 1):
                ii = j * rowsize + 2 * i
                r = x[ii]
                if ii + 1 < N:
                    imag = x[ii + 1]
                else:
                    imag = 0
                xh[j * halfZ + i] = np.complex(r, imag)
            xh[j * halfZ + halfZ - 1] = 0

        x = x.reshape(rank, rowsize)
        xh = xh.reshape(rank, halfZ)

        xf = np.fft.fft2(x)

        plan = Plan.many([rank, rowsize], CUFFT_R2C, 1, 2)
        d_x_gpu = plan.to_device(xh)
        xf_gpu = np.zeros(shape=(rank, halfZ), dtype=np.complex64)
        #d_xf_gpu = plan.to_device(xf_gpu)
        plan.forward(d_x_gpu, d_x_gpu)  #Inplace
        d_x_gpu.copy_to_host(xf_gpu)

        self.assertTrue(np.allclose(xf[:, 0:halfZ], xf_gpu, atol=1e-6))
Exemplo n.º 2
0
    def test_against_fft_1d_mgpu(self):
        return True

        from pyculib.fft.binding import Plan, CUFFT_R2C, CUFFT_C2R
        N = 32
        x = np.arange(N, dtype=np.float32)

        halfZ = N // 2 + 1
        xh = np.arange(halfZ, dtype=np.complex64)
        for i in range(halfZ - 1):
            r = x[2 * i]
            if 2 * i + 1 < N:
                imag = x[2 * i + 1]
            else:
                imag = 0
            xh[i] = np.complex(r, imag)
        xh[halfZ - 1] = 0

        print(x)
        print(xh)

        xf = np.fft.fft(x)

        plan = Plan.many([N], CUFFT_R2C, 1, 2)
        d_x_gpu = plan.to_device(xh)
        xf_gpu = np.zeros(halfZ, dtype=np.complex64)
        #d_xf_gpu = plan.to_device(xf_gpu)
        plan.forward(d_x_gpu, d_x_gpu)
        d_x_gpu.copy_to_host(xf_gpu)

        self.assertTrue(np.allclose(xf[0:halfZ], xf_gpu, atol=1e-6))
Exemplo n.º 3
0
 def test_plan2d(self):
     from pyculib.fft.binding import Plan, CUFFT_C2C
     n = 2**4
     data = np.arange(n, dtype=np.complex64).reshape(2, n//2)
     orig = data.copy()
     d_data = cuda.to_device(data)
     fftplan = Plan.two(CUFFT_C2C, *data.shape)
     fftplan.forward(d_data, d_data)
     fftplan.inverse(d_data, d_data)
     d_data.copy_to_host(data)
     result = data / n
     self.assertTrue(np.allclose(orig, result.real))
Exemplo n.º 4
0
 def test_against_fft_1d(self):
     from pyculib.fft.binding import Plan, CUFFT_R2C
     N = 128
     x = np.asarray(np.arange(N), dtype=np.float32)
     xf = np.fft.fft(x)
     d_x_gpu = cuda.to_device(x)
     xf_gpu = np.zeros(N//2+1, np.complex64)
     d_xf_gpu = cuda.to_device(xf_gpu)
     plan = Plan.many(x.shape, CUFFT_R2C)
     plan.forward(d_x_gpu, d_xf_gpu)
     d_xf_gpu.copy_to_host(xf_gpu)
     self.assertTrue( np.allclose(xf[0:N//2+1], xf_gpu,
                                   atol=1e-6) )
Exemplo n.º 5
0
    def test_plan1d(self):

        from pyculib.fft.binding import Plan, CUFFT_C2C
        n = 10
        data = np.arange(n, dtype=np.complex64)
        orig = data.copy()
        fftplan = Plan.one(CUFFT_C2C, n)
        d_data = fftplan.to_device(data)
        fftplan.forward(d_data, d_data)
        fftplan.inverse(d_data, d_data)
        d_data.copy_to_host(data)
        result = data / n
        self.assertTrue(np.allclose(orig, result.real))
Exemplo n.º 6
0
 def test_against_fft_2d(self):
     from pyculib.fft.binding import Plan, CUFFT_R2C
     rank = 2
     rowsize = 128
     N = rowsize * rank
     x = np.arange(N, dtype=np.float32).reshape(rank, rowsize)
     xf = np.fft.fft2(x)
     d_x_gpu = cuda.to_device(x)
     xf_gpu = np.zeros(shape=(rank, rowsize//2 + 1), dtype=np.complex64)
     d_xf_gpu = cuda.to_device(xf_gpu)
     plan = Plan.many(x.shape, CUFFT_R2C)
     plan.forward(d_x_gpu, d_xf_gpu)
     d_xf_gpu.copy_to_host(xf_gpu)
     self.assertTrue(np.allclose(xf[:, 0:rowsize//2+1], xf_gpu, atol=1e-6))
Exemplo n.º 7
0
    def test_against_fft_3d(self):
        from pyculib.fft.binding import Plan, CUFFT_R2C
        depth = 2
        colsize = 2
        rowsize = 64
        N = depth * colsize * rowsize
        x = np.arange(N, dtype=np.float32).reshape(depth, colsize, rowsize)

        xf = np.fft.fftn(x)

        halfZ = rowsize // 2 + 1
        plan = Plan.many(x.shape, CUFFT_R2C)
        d_x_gpu = plan.to_device(x)
        xf_gpu = np.zeros(shape=(depth, colsize, halfZ), dtype=np.complex64)
        d_xf_gpu = plan.to_device(xf_gpu)
        plan.forward(d_x_gpu, d_xf_gpu)
        d_xf_gpu.copy_to_host(xf_gpu)

        self.assertTrue(np.allclose(xf[:, :, 0:halfZ], xf_gpu, atol=1e-6))
Exemplo n.º 8
0
    def test_against_fft_3d_mgpu(self):
        from pyculib.fft.binding import Plan, CUFFT_R2C
        depth = 32
        colsize = 32
        rowsize = 32
        N = depth * colsize * rowsize
        x = np.arange(N, dtype=np.float32)

        halfZ = rowsize // 2 + 1
        xh = np.arange(depth * colsize * halfZ, dtype=np.complex64)
        for k in range(depth):
            for j in range(colsize):
                for i in range(halfZ - 1):
                    ii = k * colsize * rowsize + j * rowsize + 2 * i
                    r = x[ii]
                    if ii + 1 < N:
                        imag = x[ii + 1]
                    else:
                        imag = 0
                    xh[k * colsize * halfZ + j * halfZ + i] = np.complex(
                        r, imag)

                xh[k * colsize * halfZ + j * halfZ + halfZ - 1] = 0

        x = x.reshape(depth, colsize, rowsize)
        xh = xh.reshape(depth, colsize, halfZ)

        xf = np.fft.fftn(x)

        plan = Plan.many([depth, colsize, rowsize], CUFFT_R2C, 1, 2)
        d_x_gpu = plan.to_device(xh)
        xf_gpu = np.zeros(shape=(depth, colsize, halfZ), dtype=np.complex64)
        #d_xf_gpu = plan.to_device(xf_gpu)
        plan.forward(d_x_gpu, d_x_gpu)
        d_x_gpu.copy_to_host(xf_gpu)

        self.assertTrue(np.allclose(xf[:, :, 0:halfZ], xf_gpu, atol=1e-6))
Exemplo n.º 9
0
# f = fft.FFTPlan(img_shape, np.complex64, np.complex64, 1, 0, fft.FFTPlan.MODE_FFTW_PADDING)

from pyculib.fft.binding import Plan, CUFFT_C2C
from pyculib import blas as cublas
n = (128 * 10)**2
data1 = np.arange(n, dtype=np.complex64).reshape(2, n // 2)
data = np.arange(n, dtype=np.complex64)
orig = data.copy()
d_data = cuda.to_device(data)
#s0 = cuda.stream()
# cuda.select_device(1)
# d_data1 = cuda.to_device(data)
#s1 = cuda.stream()
# fftplan = Plan.one(CUFFT_C2C, *data.shape)
# Plan.many()
fftplan1 = Plan.many(data.shape, CUFFT_C2C, 1500)
b = cublas.Blas()
rounds = 10000
start = time.clock()

for x in range(rounds):
    # fft.fft_inplace(img)
    # cuda.select_device(0)
    # fftplan1.forward(d_data, d_data)
    # fftplan1.inverse(d_data, d_data)
    # cuda.select_device(1)
    # fftplan1.forward(d_data1, d_data1)
    #fftplan1.forward(d_data1, d_data1)
    # fftplan.inverse(d_data, d_data)
    # d_data = cuda.to_device(data)
    # cublas.dot(d_data, d_data)
Exemplo n.º 10
0
    arg3 = clip(max((-z + r1 + r2)*(z + r1 - r2)*(z - r1 + r2)*(z + r1 + r2), 0.),-1,1)

    if   (r1 <= r2 - z) : return math.pi*r1*r1							                              # planet completely overlaps stellar circle
    elif (r1 >= r2 + z) : return math.pi*r2*r2						                                  # stellar circle completely overlaps planet
    else                : return r1*r1*math.acos(arg1) + r2*r2*math.acos(arg2) - 0.5*math.sqrt(arg3)  # partial overlap



####################
# GPU functions
##################
if numba.cuda.is_available():

    # FFT plan
    from pyculib.fft.binding import Plan, CUFFT_C2C
    fftplan17 = Plan.one(CUFFT_C2C, 2**17)
    fftplan18 = Plan.one(CUFFT_C2C, 2**18)


    @numba.cuda.jit('float64(float64,float64,float64)', device=True, inline=True)
    def d_clip(a, b, c):
        if (a < b)   : return b
        elif (a > c) : return c
        else         : return a


    @numba.cuda.jit('float64(float64,float64,float64)', device=True, inline=True)
    def d_area(z, r1, r2):
        arg1 = d_clip((z*z + r1*r1 - r2*r2)/(2.*z*r1),-1,1)
        arg2 = d_clip((z*z + r2*r2 - r1*r1)/(2.*z*r2),-1,1)
        arg3 = d_clip(max((-z + r1 + r2)*(z + r1 - r2)*(z - r1 + r2)*(z + r1 + r2), 0.),-1,1)