Exemplo n.º 1
0
def gpu_getmax(map):
  """
  Use pycuda to get the maximum absolute deviation of the residual map,
  with the correct sign
  """
  imax=gpu.max(cumath.fabs(map)).get()
  if gpu.max(map).get()!=imax: imax*=-1
  return np.float32(imax)
Exemplo n.º 2
0
 def test_fabs(self):
     """tests if the fabs function works"""
     a = simplearray.array(test_sample).fill_arange() * -1    
     b = cumath.fabs(a)
     
     for i in range(test_sample):
         self.assert_(a[i] + b[i] == 0)
         self.assert_(b[i] >= 0)
Exemplo n.º 3
0
def gpu_getmax(map):
    """
    Use pycuda to get the maximum absolute deviation of the residual map,
    with the correct sign
    """
    imax = gpu.max(cumath.fabs(map)).get()
    if gpu.max(map).get() != imax: imax *= -1
    return np.float32(imax)
def calculateTimestep(meshPropSpeedsGPU, cellDim):


    maxPropSpeed = gpuarray.max(cumath.fabs(meshPropSpeedsGPU)).get()
    return cellDim / (4.0 * maxPropSpeed)
Exemplo n.º 5
0
 def abs_t(self, a, out):
     cumath.fabs(a, out=out)
Exemplo n.º 6
0
def cuda_gridvis(settings,plan):
  """
  Grid the visibilities parallelized by pixel.
  References:
    - Chapter 10 in "Interferometry and Synthesis in Radio Astronomy"
        by Thompson, Moran, & Swenson
    - Daniel Brigg's PhD Thesis: http://www.aoc.nrao.edu/dissertations/dbriggs/
  """
  print "Gridding the visibilities"
  t_start=time.time()

  # unpack parameters
  vfile   = settings['vfile']
  briggs  = settings['briggs']
  imsize  = settings['imsize']
  cell    = settings['cell']
  nx      = np.int32(2*imsize)
  noff    = np.int32((nx-imsize)/2)

  ## constants
  arc2rad = np.float32(np.pi/180/3600.)
  du      = np.float32(1./(arc2rad*cell*nx))
  ## grab data
  f  = pyfits.open(settings['vfile'])
  ## quickly figure out what data is not flagged
  freq  = np.float32(f[0].header['CRVAL4'])
  good  = np.where(f[0].data.data[:,0,0,0,0,0,0] != 0)
  h_u   = np.float32(freq*f[0].data.par('uu')[good])
  h_v   = np.float32(freq*f[0].data.par('vv')[good])
  gcount = np.int32(np.size(h_u))
  ## assume data is unpolarized
  h_re   = np.float32(0.5*(f[0].data.data[good,0,0,0,0,0,0]+f[0].data.data[good,0,0,0,0,1,0]))
  h_im   = np.float32(0.5*(f[0].data.data[good,0,0,0,0,0,1]+f[0].data.data[good,0,0,0,0,1,1]))
  ## make GPU arrays
  h_grd  = np.zeros((nx,nx),dtype=np.complex64)
  h_cnt  = np.zeros((nx,nx),dtype=np.int32)
  d_u    = gpu.to_gpu(h_u)
  d_v    = gpu.to_gpu(h_v)
  d_re   = gpu.to_gpu(h_re)
  d_im   = gpu.to_gpu(h_im)
  d_cnt  = gpu.zeros((np.int(nx),np.int(nx)),np.int32)
  d_grd  = gpu.zeros((np.int(nx),np.int(nx)),np.complex64)
  d_ngrd = gpu.zeros_like(d_grd)
  d_bm   = gpu.zeros_like(d_grd)
  d_nbm  = gpu.zeros_like(d_grd)
  d_fim  = gpu.zeros((np.int(imsize),np.int(imsize)),np.float32)
  ## define kernel parameters
  blocksize2D  = (8,16,1)
  gridsize2D   = (np.int(np.ceil(1.*nx/blocksize2D[0])),np.int(np.ceil(1.*nx/blocksize2D[1])))
  blocksizeF2D = (16,16,1)
  gridsizeF2D  = (np.int(np.ceil(1.*imsize/blocksizeF2D[0])),np.int(np.ceil(1.*imsize/blocksizeF2D[1])))
  blocksize1D  = (256,1,1)
  gridsize1D   = (np.int(np.ceil(1.*gcount/blocksize1D[0])),1)

  # ------------------------
  # make gridding kernels
  # ------------------------
  ## make spheroidal convolution kernel (don't mess with these!)
  width = 6.
  ngcf  = 24.
  h_cgf = gcf(ngcf,width)
  ## make grid correction
  h_corr = corrfun(nx,width)
  d_cgf  = module.get_global('cgf')[0]
  d_corr = gpu.to_gpu(h_corr)
  cu.memcpy_htod(d_cgf,h_cgf)

  # ------------------------
  # grid it up
  # ------------------------
  d_umax = gpu.max(cumath.fabs(d_u))
  d_vmax = gpu.max(cumath.fabs(d_v))
  umax   = np.int32(np.ceil(d_umax.get()/du))
  vmax   = np.int32(np.ceil(d_vmax.get()/du))

  ## grid ($$)
  #  This should be improvable via:
  #    - shared memory solution? I tried...
  #    - better coalesced memory access? I tried...
  #    - reorganzing and indexing UV data beforehand?
  #       (i.e. http://www.nvidia.com/docs/IO/47905/ECE757_Project_Report_Gregerson.pdf)
  #    - storing V(u,v) in texture memory?
  gridVis_wBM_kernel(d_grd,d_bm,d_cnt,d_u,d_v,d_re,d_im,nx,du,gcount,umax,vmax,\
			block=blocksize2D,grid=gridsize2D)
  ## apply weights
  wgtGrid_kernel(d_bm,d_cnt,briggs,nx,block=blocksize2D,grid=gridsize2D)
  hfac = np.int32(1)
  dblGrid_kernel(d_bm,nx,hfac,block=blocksize2D,grid=gridsize2D)
  shiftGrid_kernel(d_bm,d_nbm,nx,block=blocksize2D,grid=gridsize2D)
  ## normalize
  wgtGrid_kernel(d_grd,d_cnt,briggs,nx,block=blocksize2D,grid=gridsize2D)
  ## Reflect grid about v axis
  hfac = np.int32(-1)
  dblGrid_kernel(d_grd,nx,hfac,block=blocksize2D,grid=gridsize2D)
  ## Shift both
  shiftGrid_kernel(d_grd,d_ngrd,nx,block=blocksize2D,grid=gridsize2D)

  # ------------------------
  # Make the beam
  # ------------------------
  ## Transform to image plane
  fft.fft(d_nbm,d_bm,plan)
  ## Shift
  shiftGrid_kernel(d_bm,d_nbm,nx,block=blocksize2D,grid=gridsize2D)
  ## Correct for C
  corrGrid_kernel(d_nbm,d_corr,nx,block=blocksize2D,grid=gridsize2D)
  # Trim
  trimIm_kernel(d_nbm,d_fim,noff,nx,imsize,block=blocksizeF2D,grid=gridsizeF2D)
  ## Normalize
  d_bmax = gpu.max(d_fim)
  bmax = d_bmax.get()
  bmax = np.float32(1./bmax)
  nrmBeam_kernel(d_fim,bmax,imsize,block=blocksizeF2D,grid=gridsizeF2D)
  ## Pull onto CPU
  dpsf  = d_fim.get()

  # ------------------------
  # Make the map
  # ------------------------
  ## Transform to image plane
  fft.fft(d_ngrd,d_grd,plan)
  ## Shift
  shiftGrid_kernel(d_grd,d_ngrd,nx,block=blocksize2D,grid=gridsize2D)
  ## Correct for C
  corrGrid_kernel(d_ngrd,d_corr,nx,block=blocksize2D,grid=gridsize2D)
  ## Trim
  trimIm_kernel(d_ngrd,d_fim,noff,nx,imsize,block=blocksizeF2D,grid=gridsizeF2D)
  ## Normalize (Jy/beam)
  nrmGrid_kernel(d_fim,bmax,imsize,block=blocksizeF2D,grid=gridsizeF2D)

  ## Finish timers
  t_end=time.time()
  t_full=t_end-t_start
  print "Gridding execution time %0.5f"%t_full+' s'
  print "\t%0.5f"%(t_full/gcount)+' s per visibility'

  ## Return dirty psf (CPU) and dirty image (GPU)
  return dpsf,d_fim
Exemplo n.º 7
0
N = 100000

# --- Create random vectorson the CPU
h_a = np.random.randn(1, N)
h_b = np.random.randn(1, N)

# --- Set CPU arrays as single precision
h_a = h_a.astype(np.float32)
h_b = h_b.astype(np.float32)
h_c = np.empty_like(h_a)

d_a = gpuarray.to_gpu(h_a)
d_b = gpuarray.to_gpu(h_b)

start.record()
d_c = (cumath.sqrt(cumath.fabs(d_a)) + cumath.exp(d_b))
end.record()
end.synchronize()
secs = start.time_till(end) * 1e-3
print("Processing time = %fs" % (secs))

h_c = d_c.get()

if np.all(abs(h_c - (np.sqrt(np.abs(h_a)) + np.exp(h_b))) < 1e-5):
    print("Test passed!")
else:
    print("Error!")

# --- Flush context printf buffer
cuda.Context.synchronize()
Exemplo n.º 8
0
def cuda_gridvis(sub_array, f, settings, plan, chan):
    """
    Grid the visibilities parallelized by pixel.
    References:
      - Chapter 10 in "Interferometry and Synthesis in Radio Astronomy"
          by Thompson, Moran, & Swenson
      - Daniel Brigg's PhD Thesis: http://www.aoc.nrao.edu/dissertations/dbriggs/
    """
    print "Gridding the visibilities"
    t_start = time.time()
    if sub_array==1:
        Antennas = 40
    else:
        Antennas = 60

    # unpack parameters
    vfile = settings['vfile']
    briggs = settings['briggs']
    imsize = settings['imsize']
    cell = settings['cell']
    nx = np.int32(2 * imsize)
    noff = np.int32((nx - imsize) / 2)

    ## constants
    arc2rad = np.float32(np.pi / 180. / 3600.)
    du = np.float32(1. / (arc2rad * cell * nx))

    # determin the file type (uvfits or fitsidi)
    h_u = np.ndarray(shape=(Antennas*(Antennas-1)//2, 1), dtype='float64')
    h_v = np.ndarray(shape=(Antennas*(Antennas-1)//2, 1), dtype='float64')
    h_re = np.ndarray(shape=(Antennas*(Antennas-1)//2, 1), dtype='float32')
    h_im = np.ndarray(shape=(Antennas*(Antennas-1)//2, 1), dtype='float32')

    #Get Visibility Data and values of UVW
    if settings['vfile'].find('.uvfits') != -1:
        freq = 3.45E11 #np.float32(f[0].header['CRVAL4'])
        light_speed = 299792458.
        good = np.where(f[0].data.data[:, 0, 0, chan, 0, 0] != 0)

        h_u = np.float32(light_speed * f[0].data.par('uu')[good])
        print "h_u", h_u.shape
        h_v = np.float32(light_speed * f[0].data.par('vv')[good])
        gcount = np.int32(np.size(h_u))
        ## assume data is unpolarized
        h_re = np.float32(f[0].data.data[good, 0, 0, chan, 0, 0])
        h_im = np.float32(f[0].data.data[good, 0, 0, chan, 0, 1])

        freq = 1702500000.
        light_speed = 299792458.  # Speed of light


        ## assume data is unpolarized
        #print chan
        print 'GCOUNT', gcount

        # h_ : host,  d_ : device
        h_grd = np.zeros((nx, nx), dtype=np.complex64)
        h_cnt = np.zeros((nx, nx), dtype=np.int32)
        d_u = gpu.to_gpu(np.array(h_u,dtype='float32'))
        d_v = gpu.to_gpu(np.array(h_v,dtype='float32'))
        d_re = gpu.to_gpu(np.array(h_re,dtype='float32'))
        d_im = gpu.to_gpu(np.array(h_im,dtype='float32'))
        d_cnt = gpu.zeros((np.int(nx), np.int(nx)), np.int32)
        d_grd = gpu.zeros((np.int(nx), np.int(nx)), np.complex64)
        d_ngrd = gpu.zeros_like(d_grd)
        d_bm = gpu.zeros_like(d_grd)
        d_nbm = gpu.zeros_like(d_grd)
        d_fim = gpu.zeros((np.int(imsize), np.int(imsize)), np.float32)

        ## define kernel parameters
        if imsize == 1024:
            blocksize2D = (8, 16, 1)
            gridsize2D = (np.int(np.ceil(1. * nx / blocksize2D[0])), np.int(np.ceil(1. * nx / blocksize2D[1])))
            blocksizeF2D = (16, 16, 1)
            gridsizeF2D = (np.int(np.ceil(1. * imsize / blocksizeF2D[0])), np.int(np.ceil(1. * imsize / blocksizeF2D[1])))
            blocksize1D = (256, 1, 1)
        else:
            blocksize2D = (16, 32, 1)
            gridsize2D = (np.int(np.ceil(1. * nx / blocksize2D[0])), np.int(np.ceil(1. * nx / blocksize2D[1])))
            blocksizeF2D = (32, 32, 1)
            gridsizeF2D = (np.int(np.ceil(1. * imsize / blocksizeF2D[0])), np.int(np.ceil(1. * imsize / blocksizeF2D[1])))
            blocksize1D = (512, 1, 1)

        gridsize1D = (np.int(np.ceil(1. * gcount / blocksize1D[0])), 1)

        # ------------------------
        # make gridding kernels
        # ------------------------
        ## make spheroidal convolution kernel (don't mess with these!)
        width = 6.
        ngcf = 24.
        h_cgf = gcf(ngcf, width)
        ## make grid correction
        h_corr = corrfun(nx, width)
        d_cgf = module.get_global('cgf')[0]
        d_corr = gpu.to_gpu(h_corr)
        cu.memcpy_htod(d_cgf, h_cgf)

        # ------------------------
        # grid it up
        # ------------------------
        d_umax = gpu.max(cumath.fabs(d_u))
        d_vmax = gpu.max(cumath.fabs(d_v))
        umax = np.int32(np.ceil(d_umax.get() / du))
        vmax = np.int32(np.ceil(d_vmax.get() / du))

        ## grid ($$)
        #  This should be improvable via:
        #    - shared memory solution? I tried...
        #    - better coalesced memory access? I tried...
        #    - reorganzing and indexing UV data beforehand?
        #       (i.e. http://www.nvidia.com/docs/IO/47905/ECE757_Project_Report_Gregerson.pdf)
        #    - storing V(u,v) in texture memory?

        # Each pixel in the uv plane goes through the data and check to see whether the pixel is included in the convolution.
        # This kernel also calculates the point spread function and the local sampling
        # from the data (for applying the weights later).
        gridVis_wBM_kernel(d_grd, d_bm, d_cnt, d_u, d_v, d_re, d_im, nx, du, gcount, umax, vmax, \
                           block=blocksize2D, grid=gridsize2D)

        ## apply weights
        wgtGrid_kernel(d_bm, d_cnt, briggs, nx, block=blocksize2D, grid=gridsize2D)
        hfac = np.int32(1)
        dblGrid_kernel(d_bm, nx, hfac, block=blocksize2D, grid=gridsize2D)
        shiftGrid_kernel(d_bm, d_nbm, nx, block=blocksize2D, grid=gridsize2D)
        ## normalize

        wgtGrid_kernel(d_grd, d_cnt, briggs, nx, block=blocksize2D, grid=gridsize2D)
        ## Reflect grid about v axis
        hfac = np.int32(-1)
        dblGrid_kernel(d_grd, nx, hfac, block=blocksize2D, grid=gridsize2D)
        ## Shift both
        shiftGrid_kernel(d_grd, d_ngrd, nx, block=blocksize2D, grid=gridsize2D)

        # ------------------------
        # Make the beam
        # ------------------------
        ## Transform to image plane
        fft.fft(d_nbm, d_bm, plan)
        ## Shift
        shiftGrid_kernel(d_bm, d_nbm, nx, block=blocksize2D, grid=gridsize2D)
        ## Correct for C
        corrGrid_kernel(d_nbm, d_corr, nx, block=blocksize2D, grid=gridsize2D)
        # Trim
        trimIm_kernel(d_nbm, d_fim, noff, nx, imsize, block=blocksizeF2D, grid=gridsizeF2D)
        ## Normalize
        d_bmax = gpu.max(d_fim)
        bmax = d_bmax.get()
        bmax = np.float32(1. / bmax)
        nrmBeam_kernel(d_fim, bmax, imsize, block=blocksizeF2D, grid=gridsizeF2D)
        ## Pull onto CPU
        dpsf = d_fim.get()

        # ------------------------
        # Make the map
        # ------------------------
        ## Transform to image plane
        fft.fft(d_ngrd, d_grd, plan)
        ## Shift
        shiftGrid_kernel(d_grd, d_ngrd, nx, block=blocksize2D, grid=gridsize2D)
        ## Correct for C
        corrGrid_kernel(d_ngrd, d_corr, nx, block=blocksize2D, grid=gridsize2D)
        ## Trim
        trimIm_kernel(d_ngrd, d_fim, noff, nx, imsize, block=blocksizeF2D, grid=gridsizeF2D)
        ## Normalize (Jy/beam)
        nrmGrid_kernel(d_fim, bmax, imsize, block=blocksizeF2D, grid=gridsizeF2D)

        ## Finish timers
        t_end = time.time()
        t_full = t_end - t_start
        print "Gridding execution time %0.5f" % t_full + ' s'
        print "\t%0.5f" % (t_full / gcount) + ' s per visibility'

        ## Return dirty psf (CPU) and dirty image (GPU)
        return dpsf, d_fim
Exemplo n.º 9
0
 def kernel(a):
     from pycuda.cumath import fabs
     return my_max(fabs(a))
Exemplo n.º 10
0
def calculateTimestep(PropSpeedsGPU, cellDim):

    maxPropSpeed = gpuarray.max(cumath.fabs(PropSpeedsGPU)).get()
    return cellDim / (4.0 * maxPropSpeed), maxPropSpeed
Exemplo n.º 11
0
 def abs_t(self, a, out):
     cumath.fabs(a, out=out)
Exemplo n.º 12
0
H = 240

cap.set(cv.CAP_PROP_FRAME_HEIGHT, H)
cap.set(cv.CAP_PROP_FRAME_WIDTH, W)

ret, frame = cap.read()
gray_a = cv.cvtColor(frame, cv.COLOR_RGB2GRAY)

img_ori_gpu = gpuarray.to_gpu(gray_a.astype(np.float32))
img_buf_gpu = gpuarray.empty_like(img_ori_gpu)
img_sub = gpuarray.ones_like(img_ori_gpu)
img_sub = 25 * img_sub
img_bgm = gpuarray.zeros_like(img_sub)
while True:
    ret, frame = cap.read()
    gray_buff = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    img_res_gpu = gpuarray.to_gpu(gray_buff.astype(np.float32))
    img_buf_gpu = cmath.fabs(img_ori_gpu - img_res_gpu)
    img_buf_gpu = img_buf_gpu - img_sub
    img_ori_gpu = img_res_gpu.copy()
    img_res_gpu = gpuarray.if_positive(img_buf_gpu, img_bgm, img_res_gpu)
    gray_buff = img_res_gpu.get()
    gray_buff = gray_buff.astype(np.uint8)
    frame = cv.cvtColor(gray_buff, cv.COLOR_GRAY2RGB)
    cv.imshow("Moving Detecting!", frame)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()
Exemplo n.º 13
0
def cuda_gridvis(csrh_sun, csrh_satellite, settings, plan, chan):
    """
    Grid the visibilities parallelized by pixel.
    References:
      - Chapter 10 in "Interferometry and Synthesis in Radio Astronomy"
          by Thompson, Moran, & Swenson
      - Daniel Brigg's PhD Thesis: http://www.aoc.nrao.edu/dissertations/dbriggs/
    """
    print "Gridding the visibilities"
    t_start = time.time()

    #f = pyfits.open(settings['vfile'])

    # unpack parameters
    vfile = settings['vfile']
    briggs = settings['briggs']
    imsize = settings['imsize']
    cell = settings['cell']
    nx = np.int32(2 * imsize)
    noff = np.int32((nx - imsize) / 2)

    ## constants
    arc2rad = np.float32(np.pi / 180. / 3600.)
    du = np.float32(1. / (arc2rad * cell * nx))
    ## grab data
    #f = pyfits.open(settings['vfile'])

    Data = np.ndarray(shape=(44, 44, 16), dtype=complex)
    UVW = np.ndarray(shape=(780, 1), dtype='float64')
    Data, UVW = visibility(csrh_sun, csrh_satellite, chan)
    print "UVW*****\n", UVW

    # determin the file type (uvfits or fitsidi)
    h_uu = np.ndarray(shape=(780), dtype='float64')
    h_vv = np.ndarray(shape=(780), dtype='float64')
    h_rere = np.ndarray(shape=(780), dtype='float32')
    h_imim = np.ndarray(shape=(780), dtype='float32')

    freq = 1702500000.
    light_speed = 299792458.  # Speed of light

    ## quickly figure out what data is not flagged
    #np.float32(f[7].header['CRVAL3']) 299792458vvvv
    #good  = np.where(f[0].data.data[:,0,0,0,0,0,0] != 0)

    #h_u   = np.float32(freq*f[0].data.par('uu')[good])
    #h_v   = np.float32(freq*f[0].data.par('vv')[good])

    blen = 0

    for antenna1 in range(0, 39):
        for antenna2 in range(antenna1 + 1, 40):
            h_rere[blen] = Data[antenna1][antenna2][chan].real
            h_imim[blen] = Data[antenna1][antenna2][chan].imag
            h_uu[blen] = freq * UVW[blen][0]
            h_vv[blen] = freq * UVW[blen][1]
            blen += 1

    print "h_u", h_uu
    #h_u = np.float32(h_u.ravel())
    #h_v = np.float32(h_v.ravel())
    gcount = np.int32(np.size(h_uu))
    #gcount = len(gcount.ravel())
    #h_re = np.float32(h_re.ravel())
    #h_im = np.float32(h_im.ravel())
    #freq = 3.45E11  #np.float32(f[0].header['CRVAL4'])

    blen = 0
    bl_order = np.ndarray(shape=(780, 2), dtype=int)
    good = []

    for border1 in range(0, 39):
        for border2 in range(border1 + 1, 40):
            bl_order[blen][0] = border1
            bl_order[blen][1] = border2
            blen = blen + 1

    blen = 0

    h_u = []
    h_v = []
    h_re = []
    h_im = []
    Flag_Ant = [
        0, 4, 8, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26,
        28, 29, 37, 38, 39
    ]
    for blen in range(0, 780):
        if (bl_order[blen][0] not in Flag_Ant) and (bl_order[blen][1]
                                                    not in Flag_Ant):
            good.append(blen)
            h_u.append(h_uu[blen])
            h_v.append(h_vv[blen])
            h_re.append(h_rere[blen])
            h_im.append(h_imim[blen])

    #print "Good:",good

    gcount = np.int32(np.size(h_u))
    ## assume data is unpolarized
    #print chan
    print 'GCOUNT', gcount
    #print "H_U", h_u
    #print "H_V", h_v
    #print h_re
    #print h_im

    # h_ : host,  d_ : device
    h_grd = np.zeros((nx, nx), dtype=np.complex64)
    h_cnt = np.zeros((nx, nx), dtype=np.int32)
    d_u = gpu.to_gpu(np.array(h_uu, dtype='float32'))
    d_v = gpu.to_gpu(np.array(h_vv, dtype='float32'))
    d_re = gpu.to_gpu(np.array(h_rere, dtype='float32'))
    d_im = gpu.to_gpu(np.array(h_imim, dtype='float32'))
    d_cnt = gpu.zeros((np.int(nx), np.int(nx)), np.int32)
    d_grd = gpu.zeros((np.int(nx), np.int(nx)), np.complex64)
    d_ngrd = gpu.zeros_like(d_grd)
    d_bm = gpu.zeros_like(d_grd)
    d_nbm = gpu.zeros_like(d_grd)
    d_fim = gpu.zeros((np.int(imsize), np.int(imsize)), np.float32)
    ## define kernel parameters
    if imsize == 1024:
        blocksize2D = (8, 16, 1)
        gridsize2D = (np.int(np.ceil(1. * nx / blocksize2D[0])),
                      np.int(np.ceil(1. * nx / blocksize2D[1])))
        blocksizeF2D = (16, 16, 1)
        gridsizeF2D = (np.int(np.ceil(1. * imsize / blocksizeF2D[0])),
                       np.int(np.ceil(1. * imsize / blocksizeF2D[1])))
        blocksize1D = (256, 1, 1)
    else:
        blocksize2D = (16, 32, 1)
        gridsize2D = (np.int(np.ceil(1. * nx / blocksize2D[0])),
                      np.int(np.ceil(1. * nx / blocksize2D[1])))
        blocksizeF2D = (32, 32, 1)
        gridsizeF2D = (np.int(np.ceil(1. * imsize / blocksizeF2D[0])),
                       np.int(np.ceil(1. * imsize / blocksizeF2D[1])))
        blocksize1D = (512, 1, 1)

    gridsize1D = (np.int(np.ceil(1. * gcount / blocksize1D[0])), 1)

    # ------------------------
    # make gridding kernels
    # ------------------------
    ## make spheroidal convolution kernel (don't mess with these!)
    width = 6.
    ngcf = 24.
    h_cgf = gcf(ngcf, width)
    ## make grid correction
    h_corr = corrfun(nx, width)
    d_cgf = module.get_global('cgf')[0]
    d_corr = gpu.to_gpu(h_corr)
    cu.memcpy_htod(d_cgf, h_cgf)

    # ------------------------
    # grid it up
    # ------------------------
    d_umax = gpu.max(cumath.fabs(d_u))
    d_vmax = gpu.max(cumath.fabs(d_v))
    umax = np.int32(np.ceil(d_umax.get() / du))
    vmax = np.int32(np.ceil(d_vmax.get() / du))

    ## grid ($$)
    #  This should be improvable via:
    #    - shared memory solution? I tried...
    #    - better coalesced memory access? I tried...
    #    - reorganzing and indexing UV data beforehand?
    #       (i.e. http://www.nvidia.com/docs/IO/47905/ECE757_Project_Report_Gregerson.pdf)
    #    - storing V(u,v) in texture memory?
    gridVis_wBM_kernel(d_grd, d_bm, d_cnt, d_u, d_v, d_re, d_im, nx, du, gcount, umax, vmax, \
                       block=blocksize2D, grid=gridsize2D)

    ## apply weights
    wgtGrid_kernel(d_bm, d_cnt, briggs, nx, block=blocksize2D, grid=gridsize2D)
    hfac = np.int32(1)
    dblGrid_kernel(d_bm, nx, hfac, block=blocksize2D, grid=gridsize2D)
    shiftGrid_kernel(d_bm, d_nbm, nx, block=blocksize2D, grid=gridsize2D)
    ## normalize

    wgtGrid_kernel(d_grd,
                   d_cnt,
                   briggs,
                   nx,
                   block=blocksize2D,
                   grid=gridsize2D)
    ## Reflect grid about v axis
    hfac = np.int32(-1)
    dblGrid_kernel(d_grd, nx, hfac, block=blocksize2D, grid=gridsize2D)
    ## Shift both
    shiftGrid_kernel(d_grd, d_ngrd, nx, block=blocksize2D, grid=gridsize2D)

    # ------------------------
    # Make the beam
    # ------------------------
    ## Transform to image plane
    fft.fft(d_nbm, d_bm, plan)
    ## Shift
    shiftGrid_kernel(d_bm, d_nbm, nx, block=blocksize2D, grid=gridsize2D)
    ## Correct for C
    corrGrid_kernel(d_nbm, d_corr, nx, block=blocksize2D, grid=gridsize2D)
    # Trim
    trimIm_kernel(d_nbm,
                  d_fim,
                  noff,
                  nx,
                  imsize,
                  block=blocksizeF2D,
                  grid=gridsizeF2D)
    ## Normalize
    d_bmax = gpu.max(d_fim)
    bmax = d_bmax.get()
    bmax = np.float32(1. / bmax)
    nrmBeam_kernel(d_fim, bmax, imsize, block=blocksizeF2D, grid=gridsizeF2D)
    ## Pull onto CPU
    dpsf = d_fim.get()

    # ------------------------
    # Make the map
    # ------------------------
    ## Transform to image plane
    fft.fft(d_ngrd, d_grd, plan)
    ## Shift
    shiftGrid_kernel(d_grd, d_ngrd, nx, block=blocksize2D, grid=gridsize2D)
    ## Correct for C
    corrGrid_kernel(d_ngrd, d_corr, nx, block=blocksize2D, grid=gridsize2D)
    ## Trim
    trimIm_kernel(d_ngrd,
                  d_fim,
                  noff,
                  nx,
                  imsize,
                  block=blocksizeF2D,
                  grid=gridsizeF2D)
    ## Normalize (Jy/beam)
    nrmGrid_kernel(d_fim, bmax, imsize, block=blocksizeF2D, grid=gridsizeF2D)

    ## Finish timers
    t_end = time.time()
    t_full = t_end - t_start
    print "Gridding execution time %0.5f" % t_full + ' s'
    print "\t%0.5f" % (t_full / gcount) + ' s per visibility'

    ## Return dirty psf (CPU) and dirty image (GPU)
    return dpsf, d_fim
Exemplo n.º 14
0
 def kernel(a):
     from pycuda.cumath import fabs
     return my_max(fabs(a))
Exemplo n.º 15
0
def cuda_gridvis(settings, plan):
    """
    Grid the visibilities parallelized by pixel.
    References:
      - Chapter 10 in "Interferometry and Synthesis in Radio Astronomy"
          by Thompson, Moran, & Swenson
      - Daniel Brigg's PhD Thesis: http://www.aoc.nrao.edu/dissertations/dbriggs/
    """
    print "Gridding the visibilities"
    t_start = time.time()

    # unpack parameters
    vfile = settings['vfile']
    briggs = settings['briggs']
    imsize = settings['imsize']
    cell = settings['cell']
    nx = np.int32(2 * imsize)
    noff = np.int32((nx - imsize) / 2)

    ## constants
    arc2rad = np.float32(np.pi / 180 / 3600.)
    du = np.float32(1. / (arc2rad * cell * nx))
    ## grab data
    f = pyfits.open(settings['vfile'])

    # determin the file type (uvfits or fitsidi)

    if settings['vfile'].find('.fitsidi') != -1:

        ## quickly figure out what data is not flagged
        freq = 3.45E11 #np.float32(f[7].header['CRVAL3']) 299792458vvvv
        #good  = np.where(f[0].data.data[:,0,0,0,0,0,0] != 0)

        #h_u   = np.float32(freq*f[0].data.par('uu')[good])
        #h_v   = np.float32(freq*f[0].data.par('vv')[good])
        light_speed = 299792458.         # Speed of light

        h_u = np.ndarray(shape=(780, 1),dtype='float64')
        h_v = np.ndarray(shape=(780, 1),dtype='float64')
        h_re = np.ndarray(shape=(780, 1),dtype='float32')
        h_im = np.ndarray(shape=(780, 1),dtype='float32')

        h_u = np.float64(light_speed * f[0].data[:].UU)
        h_v = np.float64(light_speed * f[0].data[:].VV)

        for bl in range(0, 780):

            #gcount += np.int32(np.size(h_u[bl]))
            ## assume data is unpolarized
            #h_re   = np.float32(0.5*(f[0].data.data[good,0,0,0,0,0,0]+f[0].data.data[good,0,0,0,0,1,0]))
            #h_im   = np.float32(0.5*(f[0].data.data[good,0,0,0,0,0,1]+f[0].data.data[good,0,0,0,0,1,1]))
            h_re[bl] = np.float32(f[0].data[:].data[bl][0][0][0][0][0][0])
            h_im[bl] = np.float32(f[0].data[:].data[bl][0][0][0][0][0][1])
            ## make GPU arrays

        h_u = np.float32(h_u.ravel())
        h_v = np.float32(h_v.ravel())
        gcount = np.int32(np.size(h_u))
        #gcount = len(gcount.ravel())
        h_re = np.float32(h_re.ravel())
        h_im = np.float32(h_im.ravel())
        print len(h_re),len(h_im)
    elif settings['vfile'].find('.uvfits') != -1:
        freq = 3.45E11 #np.float32(f[0].header['CRVAL4'])
        light_speed = 299792458.
        good = np.where(f[0].data.data[:, 0, 0, 0, 0, 0] != 0)
        h_u = np.float32(light_speed * f[0].data.par('uu')[good])
        h_v = np.float32(light_speed * f[0].data.par('vv')[good])
        gcount = np.int32(np.size(h_u))
        ## assume data is unpolarized
        h_re = np.float32(f[0].data.data[good, 0, 0, 0, 0, 0])
        h_im = np.float32(f[0].data.data[good, 0, 0, 0, 0, 1])

    print h_u

    # h_ : host,  d_ : device
    h_grd = np.zeros((nx, nx), dtype=np.complex64)
    h_cnt = np.zeros((nx, nx), dtype=np.int32)
    d_u = gpu.to_gpu(h_u)
    d_v = gpu.to_gpu(h_v)
    d_re = gpu.to_gpu(h_re)
    d_im = gpu.to_gpu(h_im)
    d_cnt = gpu.zeros((np.int(nx), np.int(nx)), np.int32)
    d_grd = gpu.zeros((np.int(nx), np.int(nx)), np.complex64)
    d_ngrd = gpu.zeros_like(d_grd)
    d_bm = gpu.zeros_like(d_grd)
    d_nbm = gpu.zeros_like(d_grd)
    d_fim = gpu.zeros((np.int(imsize), np.int(imsize)), np.float32)
    ## define kernel parameters
    blocksize2D = (8, 16, 1)
    gridsize2D = (np.int(np.ceil(1. * nx / blocksize2D[0])), np.int(np.ceil(1. * nx / blocksize2D[1])))
    blocksizeF2D = (16, 16, 1)
    gridsizeF2D = (np.int(np.ceil(1. * imsize / blocksizeF2D[0])), np.int(np.ceil(1. * imsize / blocksizeF2D[1])))
    blocksize1D = (256, 1, 1)
    gridsize1D = (np.int(np.ceil(1. * gcount / blocksize1D[0])), 1)

    # ------------------------
    # make gridding kernels
    # ------------------------
    ## make spheroidal convolution kernel (don't mess with these!)
    width = 6.
    ngcf = 24.
    h_cgf = gcf(ngcf, width)
    ## make grid correction
    h_corr = corrfun(nx, width)
    d_cgf = module.get_global('cgf')[0]
    d_corr = gpu.to_gpu(h_corr)
    cu.memcpy_htod(d_cgf, h_cgf)

    # ------------------------
    # grid it up
    # ------------------------
    d_umax = gpu.max(cumath.fabs(d_u))
    d_vmax = gpu.max(cumath.fabs(d_v))
    umax = np.int32(np.ceil(d_umax.get() / du))
    vmax = np.int32(np.ceil(d_vmax.get() / du))

    ## grid ($$)
    #  This should be improvable via:
    #    - shared memory solution? I tried...
    #    - better coalesced memory access? I tried...
    #    - reorganzing and indexing UV data beforehand?
    #       (i.e. http://www.nvidia.com/docs/IO/47905/ECE757_Project_Report_Gregerson.pdf)
    #    - storing V(u,v) in texture memory?
    gridVis_wBM_kernel(d_grd, d_bm, d_cnt, d_u, d_v, d_re, d_im, nx, du, gcount, umax, vmax, \
                       block=blocksize2D, grid=gridsize2D)
    ## apply weights
    wgtGrid_kernel(d_bm, d_cnt, briggs, nx, block=blocksize2D, grid=gridsize2D)
    hfac = np.int32(1)
    dblGrid_kernel(d_bm, nx, hfac, block=blocksize2D, grid=gridsize2D)
    shiftGrid_kernel(d_bm, d_nbm, nx, block=blocksize2D, grid=gridsize2D)
    ## normalize
    wgtGrid_kernel(d_grd, d_cnt, briggs, nx, block=blocksize2D, grid=gridsize2D)
    ## Reflect grid about v axis
    hfac = np.int32(-1)
    dblGrid_kernel(d_grd, nx, hfac, block=blocksize2D, grid=gridsize2D)
    ## Shift both
    shiftGrid_kernel(d_grd, d_ngrd, nx, block=blocksize2D, grid=gridsize2D)

    # ------------------------
    # Make the beam
    # ------------------------
    ## Transform to image plane
    fft.fft(d_nbm, d_bm, plan)
    ## Shift
    shiftGrid_kernel(d_bm, d_nbm, nx, block=blocksize2D, grid=gridsize2D)
    ## Correct for C
    corrGrid_kernel(d_nbm, d_corr, nx, block=blocksize2D, grid=gridsize2D)
    # Trim
    trimIm_kernel(d_nbm, d_fim, noff, nx, imsize, block=blocksizeF2D, grid=gridsizeF2D)
    ## Normalize
    d_bmax = gpu.max(d_fim)
    bmax = d_bmax.get()
    bmax = np.float32(1. / bmax)
    nrmBeam_kernel(d_fim, bmax, imsize, block=blocksizeF2D, grid=gridsizeF2D)
    ## Pull onto CPU
    dpsf = d_fim.get()

    # ------------------------
    # Make the map
    # ------------------------
    ## Transform to image plane
    fft.fft(d_ngrd, d_grd, plan)
    ## Shift
    shiftGrid_kernel(d_grd, d_ngrd, nx, block=blocksize2D, grid=gridsize2D)
    ## Correct for C
    corrGrid_kernel(d_ngrd, d_corr, nx, block=blocksize2D, grid=gridsize2D)
    ## Trim
    trimIm_kernel(d_ngrd, d_fim, noff, nx, imsize, block=blocksizeF2D, grid=gridsizeF2D)
    ## Normalize (Jy/beam)
    nrmGrid_kernel(d_fim, bmax, imsize, block=blocksizeF2D, grid=gridsizeF2D)

    ## Finish timers
    t_end = time.time()
    t_full = t_end - t_start
    print "Gridding execution time %0.5f" % t_full + ' s'
    print "\t%0.5f" % (t_full / gcount) + ' s per visibility'

    ## Return dirty psf (CPU) and dirty image (GPU)
    return dpsf, d_fim
Exemplo n.º 16
0
    def cuda_gridvis(self, plan, x_offset, y_offset):
        """
        Grid the visibilities parallelized by pixel.
        References:
          - Chapter 10 in "Interferometry and Synthesis in Radio Astronomy"
              by Thompson, Moran, & Swenson
          - Daniel Brigg's PhD Thesis: http://www.aoc.nrao.edu/dissertations/dbriggs/

        If the size of the image is 1024x1024, the plan should be at least 1024*1.414 (about 25 degrees' rotation)
        And to satisfy the requirements of CLEAN, the dirty image should be 1024* 2.828
        """
        logger.debug("Gridding the visibilities")
        t_start = time.time()

        nx = np.int32(2 * self.imsize)
        noff = np.int32((nx - self.imsize) / 2)
        arc2rad = np.float32(np.pi / 180. / 3600.)
        du = np.float32(1. / (arc2rad * self.cell)) / (self.imsize * 2.)
        logger.debug("1 Pixel DU  = %f" % du)

        h_uu = np.float32(self.h_uu.ravel())
        h_vv = np.float32(self.h_vv.ravel())
        h_rere = np.float32(self.h_rere.ravel())
        h_imim = np.float32(self.h_imim.ravel())

        blen = 0
        bl_order = np.ndarray(shape=(self.baseline_number, 2), dtype=int)
        good = []

        if self.baseline_number == 780:  # MUSER-I
            antennas = 40
        else:
            antennas = 60
        # print antennas
        for border1 in range(0, antennas - 1):
            for border2 in range(border1 + 1, antennas):
                bl_order[blen][0] = border1
                bl_order[blen][1] = border2
                blen = blen + 1

        h_u = []
        h_v = []
        h_re = []
        h_im = []
        for blen in range(0, self.baseline_number):
            if (bl_order[blen][0]
                    not in self.Flag_Ant) and (bl_order[blen][1]
                                               not in self.Flag_Ant):
                good.append(blen)

                h_u.append(h_uu[blen])
                h_v.append(h_vv[blen])
                h_re.append(h_rere[blen])
                h_im.append(h_imim[blen])

        gcount = np.int32(np.size(h_u))

        # h_ : host,  d_ : device
        # h_grd = np.zeros((nx, nx), dtype=np.complex64)
        # h_cnt = np.zeros((nx, nx), dtype=np.int32)
        d_u = gpu.to_gpu(np.array(h_u, dtype='float32'))
        d_v = gpu.to_gpu(np.array(h_v, dtype='float32'))
        d_re = gpu.to_gpu(np.array(h_re, dtype='float32'))
        d_im = gpu.to_gpu(np.array(h_im, dtype='float32'))
        d_cnt = gpu.zeros((np.int(nx), np.int(nx)), np.int32)
        d_grd = gpu.zeros((np.int(nx), np.int(nx)), np.complex64)
        d_ngrd = gpu.zeros_like(d_grd)
        d_bm = gpu.zeros_like(d_grd)
        d_nbm = gpu.zeros_like(d_grd)
        d_cbm = gpu.zeros_like(d_grd)

        d_fbm = gpu.zeros((np.int(nx), np.int(nx)), np.float32)
        d_fim = gpu.zeros((np.int(self.imsize), np.int(self.imsize)),
                          np.float32)
        d_dim = gpu.zeros((np.int(self.imsize), np.int(self.imsize)),
                          np.float32)

        d_sun_disk = gpu.zeros_like(d_grd)
        d_fdisk = gpu.zeros((np.int(self.imsize), np.int(self.imsize)),
                            np.float32)

        ## define kernel parameters
        self.calc_gpu_thread(nx, self.imsize, gcount)

        width = 6.
        ngcf = 24.
        h_cgf = self.gcf(ngcf, width)

        ## make grid correction
        h_corr = self.corrfun(nx, width)
        d_cgf = self.module.get_global('cgf')[0]
        d_corr = gpu.to_gpu(h_corr)
        cu.memcpy_htod(d_cgf, h_cgf)

        # ------------------------
        # grid it up
        # ------------------------
        d_umax = gpu.max(cumath.fabs(d_u))
        d_vmax = gpu.max(cumath.fabs(d_v))
        umax = np.int32(np.ceil(d_umax.get() / du))
        vmax = np.int32(np.ceil(d_vmax.get() / du))

        self.gridVis_wBM_kernel(d_grd,
                                d_bm,
                                d_cbm,
                                d_cnt,
                                d_u,
                                d_v,
                                d_re,
                                d_im,
                                np.int32(nx),
                                np.float32(du),
                                np.int32(gcount),
                                np.int32(umax),
                                np.int32(vmax),
                                np.int32(1 if self.correct_p_angle else 0),
                                block=self.blocksize_2D,
                                grid=self.gridsize_2D)

        ## apply weights
        self.wgtGrid_kernel(d_bm,
                            d_cnt,
                            self.briggs,
                            nx,
                            0,
                            block=self.blocksize_2D,
                            grid=self.gridsize_2D)
        hfac = np.int32(1)
        self.dblGrid_kernel(d_bm,
                            nx,
                            hfac,
                            block=self.blocksize_2D,
                            grid=self.gridsize_2D)
        self.dblGrid_kernel(d_cbm,
                            nx,
                            hfac,
                            block=self.blocksize_2D,
                            grid=self.gridsize_2D)

        self.shiftGrid_kernel(d_bm,
                              d_nbm,
                              nx,
                              block=self.blocksize_2D,
                              grid=self.gridsize_2D)

        self.shiftGrid_kernel(d_cbm,
                              d_bm,
                              nx,
                              block=self.blocksize_2D,
                              grid=self.gridsize_2D)

        ## normalize
        self.wgtGrid_kernel(d_grd,
                            d_cnt,
                            self.briggs,
                            nx,
                            0,
                            block=self.blocksize_2D,
                            grid=self.gridsize_2D)
        ## Reflect grid about v axis
        hfac = np.int32(-1)
        self.dblGrid_kernel(d_grd,
                            nx,
                            hfac,
                            block=self.blocksize_2D,
                            grid=self.gridsize_2D)
        ## Shift both
        self.shiftGrid_kernel(d_grd,
                              d_ngrd,
                              nx,
                              block=self.blocksize_2D,
                              grid=self.gridsize_2D)

        fft.fft(d_ngrd, d_grd, plan)
        ## Shift
        self.shiftGrid_kernel(d_grd,
                              d_ngrd,
                              nx,
                              block=self.blocksize_2D,
                              grid=self.gridsize_2D)
        ## Correct for C
        self.corrGrid_kernel(d_ngrd,
                             d_corr,
                             nx,
                             block=self.blocksize_2D,
                             grid=self.gridsize_2D)
        ## Trim
        self.trimIm_kernel(d_ngrd,
                           d_dim,
                           nx,
                           self.imsize,
                           block=self.blocksize_F2D,
                           grid=self.gridsize_F2D)
        self.copyIm_kernel(d_ngrd,
                           d_fbm,
                           nx,
                           block=self.blocksize_2D,
                           grid=self.gridsize_2D)
        ## Normalize (Jy/beam)i
        # self.nrmGrid_kernel(d_dim, bmax1, self.imsize, block=self.blocksize_F2D, grid=self.gridsize_F2D)
        # self.nrmGrid_kernel(d_fbm, bmax2, nx, block=self.blocksize_2D, grid=self.gridsize_2D)

        ## Finish timers
        t_end = time.time()
        t_full = t_end - t_start
        logger.debug("Gridding execution time %0.5f" % t_full + ' s')
        logger.debug("\t%0.5f" % (t_full / gcount) + ' s per visibility')

        # ----------------------

        ## Return dirty psf (CPU), dirty image (GPU) and sun disk

        return d_dim
Exemplo n.º 17
0
 def fabs(self):
     return CUDAArray(cumath.fabs(self.arr))