Пример #1
0
if rank == 0:
    t0 = time.time()
    ##################################Wdeltag########################################
    print '=' * 80
    print 'starting cal wdengx wdengy'
    deltagw1 = np.empty((N, N, N / 2 + 1), dtype=np.complex128)
    deltagw2 = np.empty((N, N, N / 2 + 1), dtype=np.complex128)
    deltax1 = np.empty_like(deltax, dtype=np.float64)
    deltax2 = np.empty_like(deltax, dtype=np.float64)
    deltak = np.empty((N, N, N / 2 + 1), dtype=np.complex128)
    fft = fftw.Plan(inarray=deltax,
                    outarray=deltak,
                    direction='forward',
                    nthreads=nthreads)
    fftw.execute(fft)
    fftw.destroy_plan(fft)
    k[0, 0, 0] = 10**-4 / Kf
comm.Scatter(deltak, recvdata_k1, root=0)  #deltak  smoothed log
W = wk(k * Kf)
if rank == 0:
    W[0, 0, 0] = 1
deltak1 = recvdata_k1 * W * 1j * Kf * (mpi_fn[rank][:, None, None] +
                                       np.zeros_like(fn)[None, :, None] +
                                       np.zeros_like(fnc)[None, None, :])
deltak2 = recvdata_k1 * W * 1j * Kf * (
    np.zeros_like(mpi_fn[rank])[:, None, None] + fn[None, :, None] +
    np.zeros_like(fnc)[None, None, :])
comm.Gather(deltak1, deltagw1, root=0)
comm.Gather(deltak2, deltagw2, root=0)
if rank == 0:
    k[0, 0, 0] = 0
Пример #2
0
    Pk0=np.empty((N,N,N/2+1),dtype=np.float64)
    deltax=np.linspace(0,N,N**3).reshape(N,N,N)
    change=np.array(Tide.LoadData(Input),dtype=np.float64)
    deltax[:]=change[:]
    deltax=np.array(deltax,dtype=np.float64)
    del change
    sum=deltax.sum()
    deltax*=(N**3/sum)   #for halo, the data is n/nbar.
###################################smooth#######################################
    print '='*80
    print 'smoothing...'
    t0=time.time()
    deltak=np.empty((N,N,N/2+1),dtype=np.complex128)
    fft=fftw.Plan(inarray=deltax,outarray=deltak,direction='forward',nthreads=nthreads)
    fftw.execute(fft)
    fftw.destroy_plan(fft)
    smooth_k=np.empty((N,N,N/2+1),dtype=np.complex128)
k=(mpi_fn[rank][:,None,None]**2.+fn[None,:,None]**2.+fnc[None,None,:]**2)**(1./2.)
window_k= np.sinc(1./N*mpi_fn[rank][:,None,None])*np.sinc(1./N*fn[None,:,None])*np.sinc(1./N*fnc[None,None,:])
comm.Scatter(deltak,recvdata_k1,root=0) #deltak
sum=comm.bcast(sum,root=0) #deltak
senddata_k1=recvdata_k1*np.exp(-0.5*Kf*Kf*k*k*Sigma**2)/window_k      #smooth_k
Ph=L**3/N**6*np.abs(senddata_k1)**2
Wiener=Ph/(Ph+(L**3)/sum)   #wiener filter
senddata_k1*=Wiener
Pk_halo=np.abs(recvdata_k1/window_k)**2
Pk_halo*=(L**3/N**6)
Pk_halo=np.array(Pk_halo,dtype=np.float64)
comm.Gather(senddata_k1,smooth_k,root=0)
comm.Gather(Pk_halo,Pk0,root=0)
if rank==0:
Пример #3
0
def sample_defrost_cpu(lat, func, gamma, m2_eff):
    """Calculates a sample of random values in the lattice.
    Taken from Defrost-program.

    func = name of Cuda kernel
    n = size of cubic lattice
    gamma = -0.25 or +0.25
    m2_eff = effective mass

    This uses numpy to calculate FFTW.
    """
    import fftw3

    "Various constants:"
    mpl = lat.mpl
    n = lat.n
    nn = lat.nn
    os = 16
    nos = n * pow(os, 2)
    dk = lat.dk
    dx = lat.dx
    dkos = dk / (2. * os)
    dxos = dx / os
    kcut = nn * dk / 2.0
    norm = 0.5 / (math.sqrt(2 * pi * dk**3.) * mpl) * (dkos / dxos)

    ker = np.empty(nos, dtype=np.float64)
    fft = fftw3.Plan(ker,
                     ker,
                     direction='forward',
                     flags=['measure'],
                     realtypes=['realodd 10'])

    for k in xrange(nos):
        kk = (k + 0.5) * dkos
        ker[k] = (kk * (kk**2. + m2_eff)**gamma) * math.exp(-(kk / kcut)**2.)
    fft.execute()
    fftw3.destroy_plan(fft)

    for k in xrange(nos):
        ker[k] = norm * ker[k] / (k + 1)

    l0 = int(np.floor(np.sqrt(3) * n / 2 * os))

    tmp = np.zeros((n, n, n), dtype=np.float64)
    Fk = np.zeros((n, n, n / 2 + 1), dtype=np.complex128)

    ker_gpu = gpuarray.to_gpu(ker)
    tmp_gpu = gpuarray.to_gpu(tmp)

    func(tmp_gpu,
         ker_gpu,
         np.uint32(nn),
         np.float64(os),
         np.uint32(lat.dimx),
         np.uint32(lat.dimy),
         np.uint32(lat.dimz),
         block=lat.cuda_block_1,
         grid=lat.cuda_grid)

    tmp += tmp_gpu.get()

    Fk = np.fft.rfftn(tmp)

    if lat.test == True:
        print 'Testing mode on! Set testQ to False to disable this.\n'
        np.random.seed(1)

    rr1 = np.random.normal(
        size=Fk.shape) + np.random.normal(size=Fk.shape) * 1j
    Fk *= rr1

    tmp = np.fft.irfftn(Fk)

    ker_gpu.gpudata.free()
    tmp_gpu.gpudata.free()

    return tmp
Пример #4
0
def sample_defrost_cpu2(lat, func, gamma, m2_eff):
    """Calculates a sample of random values in the lattice

    lat = Lattice
    func = name of Cuda kernel
    n = size of cubic lattice
    gamma = -0.25 or +0.25
    m2_eff = effective mass

    This uses fftw3 to calculate FFTW.
    """
    import fftw3

    "Various constants:"
    mpl = lat.mpl
    n = lat.n
    nn = lat.nn
    os = 16
    nos = n*pow(os,2)
    dk = lat.dk
    dx = lat.dx
    dkos = dk/(2.*os)
    dxos = dx/os
    kcut = nn*dk/2.0
    norm = 0.5/(math.sqrt(2*pi*dk**3.)*mpl)*(dkos/dxos)

    ker = np.empty(nos, dtype= lat.prec_real)
    fft = fftw3.Plan(ker,ker, direction='forward', flags=['measure'],
                     realtypes = ['realodd 10'])

    for k in xrange(nos):
        kk = (k+0.5)*dkos
        ker[k] = kk*(kk**2. + m2_eff)**gamma*math.exp(-(kk/kcut)**2.)
    fft.execute()
    fftw3.destroy_plan(fft)

    for k in xrange(nos):
        ker[k] = norm*ker[k]/(k+1)

    tmp = np.zeros((n,n,n),dtype = lat.prec_real)
    Fk = np.zeros((n,n,n/2+1),dtype = lat.prec_complex)

    ker_gpu = gpuarray.to_gpu(ker)
    tmp_gpu = gpuarray.to_gpu(tmp)

    fft2 = fftw3.Plan(tmp, Fk, direction='forward', flags=['measure'])
    fft3 = fftw3.Plan(Fk, tmp, direction='forward', flags=['measure'])
    
    func(tmp_gpu, ker_gpu, np.uint32(nn), np.float64(os),
         np.uint32(lat.dimx), np.uint32(lat.dimy), np.uint32(lat.dimz),
         block = lat.cuda_block_1, grid = lat.cuda_grid)
    
    tmp += tmp_gpu.get()

    fft2.execute()
    fftw3.destroy_plan(fft2)
    
    if lat.test==True:
        print'Testing mode on! Set testQ to False to disable this.\n'
        np.random.seed(1)

    rr1 = np.random.normal(size=Fk.shape) + np.random.normal(size=Fk.shape)*1j
    Fk *= rr1
    
    fft3.execute()

    fftw3.destroy_plan(fft3)

    tmp *= 1./lat.VL

    return tmp
Пример #5
0
def sample_defrost_gpu(lat, func, gamma, m2_eff):
    """Calculates a sample of random values in the lattice

    lat = Lattice
    func = name of Cuda kernel
    n = size of cubic lattice
    gamma = -0.25 or +0.25
    m2_eff = effective mass

    This uses CuFFT to calculate FFTW.
    """
    import scikits.cuda.fft as fft
    import fftw3

    "Various constants:"
    mpl = lat.mpl
    n = lat.n
    nn = lat.nn
    os = 16
    nos = n * pow(os, 2)
    dk = lat.dk
    dx = lat.dx
    dkos = dk / (2. * os)
    dxos = dx / os
    kcut = nn * dk / 2.0
    norm = 0.5 / (math.sqrt(2 * pi * dk**3.) * mpl) * (dkos / dxos)

    ker = np.empty(nos, dtype=lat.prec_real)
    fft1 = fftw3.Plan(ker,
                      ker,
                      direction='forward',
                      flags=['measure'],
                      realtypes=['realodd 10'])

    for k in xrange(nos):
        kk = (k + 0.5) * dkos
        ker[k] = kk * (kk**2. + m2_eff)**gamma * math.exp(-(kk / kcut)**2.)
    fft1.execute()
    fftw3.destroy_plan(fft1)

    for k in xrange(nos):
        ker[k] = norm * ker[k] / (k + 1)

    Fk_gpu = gpuarray.zeros((n / 2 + 1, n, n), dtype=lat.prec_complex)

    ker_gpu = gpuarray.to_gpu(ker)
    tmp_gpu = gpuarray.zeros((n, n, n), dtype=lat.prec_real)

    plan = fft.Plan(tmp_gpu.shape, lat.prec_real, lat.prec_complex)
    plan2 = fft.Plan(tmp_gpu.shape, lat.prec_complex, lat.prec_real)

    func(tmp_gpu,
         ker_gpu,
         np.uint32(nn),
         np.float64(os),
         np.uint32(lat.dimx),
         np.uint32(lat.dimy),
         np.uint32(lat.dimz),
         block=lat.cuda_block_1,
         grid=lat.cuda_grid)

    fft.fft(tmp_gpu, Fk_gpu, plan)

    if lat.test == True:
        print 'Testing mode on! Set testQ to False to disable this.\n'
        np.random.seed(1)

    rr1 = (np.random.normal(size=Fk_gpu.shape) +
           np.random.normal(size=Fk_gpu.shape) * 1j)

    Fk = Fk_gpu.get()
    Fk *= rr1
    Fk_gpu = gpuarray.to_gpu(Fk)

    fft.ifft(Fk_gpu, tmp_gpu, plan2)
    res = (tmp_gpu.get()).astype(lat.prec_real)

    res *= 1. / lat.VL

    return res