Пример #1
0
def profileNCSC(n_reps):
    """
   The C reference version for comparison.
   """
    numpy.random.seed(1)

    data = numpy.random.uniform(low=10.0,
                                high=20.0,
                                size=(n_reps, n_pts,
                                      n_pts)).astype(dtype=numpy.float32)
    gamma = numpy.random.uniform(low=2.0, high=4.0,
                                 size=(n_pts,
                                       n_pts)).astype(dtype=numpy.float32)
    otf_mask = numpy.fft.fftshift(pyRef.createOTFMask().reshape(16, 16))

    ref_u = numpy.zeros_like(data)

    ncs_sr = ncsC.NCSCSubRegion(r_size=n_pts)

    start_time = time.time()
    for i in range(n_reps):
        ncs_sr.newRegion(data[i, :, :], gamma)
        ncs_sr.setOTFMask(otf_mask)
        ref_u[i, :, :] = ncs_sr.cSolve(alpha, verbose=False)
    e_time = time.time() - start_time

    ncs_sr.cleanup()
    print("CNSC {0:.6f} seconds".format(e_time))
Пример #2
0
def test_ncs_noise_reduction_2():

   # Setup
   numpy.random.seed(1)
   n_reps = 10

   data = numpy.random.uniform(low = 10.0, high = 20.0, size = (n_reps, n_pts, n_pts)).astype(dtype = numpy.float32)
   gamma = numpy.random.uniform(low = 2.0, high = 4.0, size = (n_reps, n_pts, n_pts)).astype(dtype = numpy.float32)
   otf_mask_shift = pyRef.createOTFMask()

   # OpenCL Setup.
   u = numpy.zeros((n_reps, n_pts, n_pts), dtype = numpy.float32)
   iters = numpy.zeros(n_reps, dtype = numpy.int32)
   status = numpy.zeros(n_reps, dtype = numpy.int32)
   
   data_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, 
                           hostbuf = data)
   gamma_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, 
                            hostbuf = gamma)
   otf_mask_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, 
                               hostbuf = otf_mask_shift)
   u_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, 
                        hostbuf = u)
   iters_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, 
                            hostbuf = iters)
   status_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, 
                             hostbuf = status)

   # OpenCL noise reduction.
   program.ncsReduceNoise(queue, (n_reps*16,), (16,),
                          data_buffer,
                          gamma_buffer,
                          otf_mask_buffer,
                          u_buffer,
                          iters_buffer,
                          status_buffer,
                          numpy.float32(alpha))

   cl.enqueue_copy(queue, u, u_buffer).wait()
   cl.enqueue_copy(queue, iters, iters_buffer).wait()
   cl.enqueue_copy(queue, status, status_buffer).wait()
   queue.finish()

   # NCSC noise reduction.
   otf_mask = numpy.fft.fftshift(otf_mask_shift.reshape(16, 16))
   
   ref_u = numpy.zeros_like(data)

   ncs_sr = ncsC.NCSCSubRegion(r_size = n_pts)

   for i in range(n_reps):
      ncs_sr.newRegion(data[i,:,:], gamma[i,:,:])
      ncs_sr.setOTFMask(otf_mask)
      ref_u[i,:,:] = ncs_sr.cSolve(alpha, verbose = False)

   ncs_sr.cleanup()

   for i in range(n_reps):
      norm_diff = numpy.max(numpy.abs(u[i,:,:] - ref_u[i,:,:]))/numpy.max(ref_u[i,:,:])
      assert(norm_diff < 1.0e-2), "failed {0:d} {1:.3f}".format(i, norm_diff)
Пример #3
0
def profile(n_reps):
    """
   Report how long it takes to reduce the noise in X sub-regions.
   """

    # Setup
    numpy.random.seed(1)

    data = numpy.random.uniform(low=10.0,
                                high=20.0,
                                size=(n_reps, n_pts,
                                      n_pts)).astype(dtype=numpy.float32)
    gamma = numpy.random.uniform(low=2.0, high=4.0,
                                 size=(n_pts,
                                       n_pts)).astype(dtype=numpy.float32)
    otf_mask_shift = pyRef.createOTFMask()

    # OpenCL Setup.
    u = numpy.zeros((n_reps, n_pts, n_pts), dtype=numpy.float32)
    iters = numpy.zeros(n_reps, dtype=numpy.int32)
    status = numpy.zeros(n_reps, dtype=numpy.int32)

    data_buffer = cl.Buffer(context,
                            cl.mem_flags.READ_ONLY
                            | cl.mem_flags.COPY_HOST_PTR,
                            hostbuf=data)
    gamma_buffer = cl.Buffer(context,
                             cl.mem_flags.READ_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=gamma)
    otf_mask_buffer = cl.Buffer(context,
                                cl.mem_flags.READ_ONLY
                                | cl.mem_flags.COPY_HOST_PTR,
                                hostbuf=otf_mask_shift)
    u_buffer = cl.Buffer(context,
                         cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR,
                         hostbuf=u)
    iters_buffer = cl.Buffer(context,
                             cl.mem_flags.WRITE_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=iters)
    status_buffer = cl.Buffer(context,
                              cl.mem_flags.WRITE_ONLY
                              | cl.mem_flags.COPY_HOST_PTR,
                              hostbuf=status)

    ev1 = program.ncsReduceNoise(queue, (n_reps, ), (1, ), data_buffer,
                                 gamma_buffer, otf_mask_buffer, u_buffer,
                                 iters_buffer, status_buffer,
                                 numpy.float32(alpha))

    cl.enqueue_copy(queue, u, u_buffer).wait()
    cl.enqueue_copy(queue, iters, iters_buffer).wait()
    cl.enqueue_copy(queue, status, status_buffer).wait()
    queue.finish()

    e_time = 1.0e-9 * (ev1.profile.end - ev1.profile.start)
    print("OpenCL {0:.6f} seconds".format(e_time))
Пример #4
0
def test_calc_nc():
    n_pts = 16

    for i in range(100):

        # OpenCL
        u = numpy.random.uniform(low=1.0, high=10.0,
                                 size=(n_pts,
                                       n_pts)).astype(dtype=numpy.float32)
        otf_mask_shift = pyRef.createOTFMask()

        nc = numpy.zeros(1, dtype=numpy.float32)

        u_buffer = cl.Buffer(context,
                             cl.mem_flags.READ_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=u)
        otf_mask_buffer = cl.Buffer(context,
                                    cl.mem_flags.READ_ONLY
                                    | cl.mem_flags.COPY_HOST_PTR,
                                    hostbuf=otf_mask_shift)
        nc_buffer = cl.Buffer(context,
                              cl.mem_flags.WRITE_ONLY
                              | cl.mem_flags.COPY_HOST_PTR,
                              hostbuf=nc)

        program.calc_nc_test(queue, (1, ), (1, ), u_buffer, otf_mask_buffer,
                             nc_buffer)
        cl.enqueue_copy(queue, nc, nc_buffer).wait()
        queue.finish()

        # Reference 1
        otf_mask = numpy.fft.fftshift(otf_mask_shift.reshape(16, 16))
        ncs_sr = ncsC.NCSCSubRegion(r_size=n_pts)
        ncs_sr.setOTFMask(otf_mask)
        ncs_sr.setU(u)
        ref1_nc = ncs_sr.calcNoiseContribution()
        ncs_sr.cleanup()

        norm_diff = abs(nc[0] - ref1_nc) / abs(ref1_nc)
        assert (norm_diff <
                1.0e-3), "Difference in results! {0:.6f}".format(norm_diff)

        # Reference 2
        u_r = numpy.copy(u).flatten()
        u_c = numpy.zeros_like(u_r)
        u_fft_r = numpy.zeros_like(u_r)
        u_fft_c = numpy.zeros_like(u_c)
        otf_mask_sqr = (otf_mask_shift * otf_mask_shift).flatten()

        pyRef.fft_16x16(u_r, u_c, u_fft_r, u_fft_c)
        ref2_nc = pyRef.calcNoiseContribution(u_fft_r, u_fft_c, otf_mask_sqr)

        norm_diff = abs(nc[0] - ref2_nc) / abs(ref2_nc)
        assert (norm_diff <
                1.0e-3), "Difference in results! {0:.6f}".format(norm_diff)
Пример #5
0
def test_calc_nc_grad_1():
   n_pts = 16
   
   for i in range(10):
      
      # OpenCL gradient calculation.
      u = numpy.random.uniform(low = 1.0, high = 10.0, size = (n_pts, n_pts)).astype(dtype = numpy.float32)
      otf_mask_shift = pyRef.createOTFMask()
      grad = numpy.zeros((n_pts, n_pts)).astype(numpy.float32)
      
      u_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf = u)
      otf_mask_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf = otf_mask_shift)
      grad_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf = grad)
      
      program.calc_nc_grad_test(queue, (16,), (16,),
                                u_buffer,
                                otf_mask_buffer,
                                grad_buffer) 

      cl.enqueue_copy(queue, grad, grad_buffer).wait()
      queue.finish()
      
      # Reference 1
      otf_mask = numpy.fft.fftshift(otf_mask_shift.reshape(16, 16))
      ncs_sr = ncsC.NCSCSubRegion(r_size = n_pts)
      ncs_sr.setOTFMask(otf_mask)
      ncs_sr.setU(u)
      ncs_sr.calcNoiseContribution()
      ref1_grad = ncs_sr.calcNCGradient().reshape(grad.shape)
      ncs_sr.cleanup()
      
      ref_norm = numpy.abs(ref1_grad)
      ref_norm[(ref_norm<1.0)] = 1.0

      max_diff = numpy.max(numpy.abs(grad - ref1_grad)/ref_norm)
      assert (max_diff < 1.0e-5), "Difference in results! {0:.8f}".format(max_diff)

      # Reference 2
      u_r = numpy.copy(u).flatten()
      u_c = numpy.zeros_like(u_r)
      u_fft_r = numpy.zeros_like(u_r)
      u_fft_c = numpy.zeros_like(u_r)
      ref2_grad = numpy.zeros_like(u_r)
      otf_mask_sqr = otf_mask_shift * otf_mask_shift
      
      pyRef.fft_16x16(u_r, u_c, u_fft_r, u_fft_c)
      pyRef.calcNCGradientIFFT(u_fft_r, u_fft_c, otf_mask_sqr, ref2_grad)

      ref_norm = numpy.abs(ref2_grad)
      ref_norm[(ref_norm<1.0)] = 1.0

      max_diff = numpy.max(numpy.abs(grad.flatten() - ref2_grad)/ref_norm)
      assert (max_diff < 1.0e-5), "Difference in results! {0:.8f}".format(max_diff)
Пример #6
0
def test_ncs_noise_reduction_2():

    # Setup
    numpy.random.seed(1)
    n_reps = 10

    data = numpy.random.uniform(low=10.0,
                                high=20.0,
                                size=(n_reps, n_pts,
                                      n_pts)).astype(dtype=numpy.float32)
    gamma = numpy.random.uniform(low=2.0,
                                 high=4.0,
                                 size=(n_reps, n_pts,
                                       n_pts)).astype(dtype=numpy.float32)
    otf_mask_shift = pyRef.createOTFMask()

    # CUDA Setup.
    u = numpy.zeros((n_reps, n_pts, n_pts), dtype=numpy.float32)
    iters = numpy.zeros(n_reps, dtype=numpy.int32)
    status = numpy.zeros(n_reps, dtype=numpy.int32)

    # CUDA noise reduction.
    ncsReduceNoise(drv.In(data),
                   drv.In(gamma),
                   drv.In(otf_mask_shift),
                   drv.Out(u),
                   drv.Out(iters),
                   drv.Out(status),
                   numpy.float32(alpha),
                   block=(16, 1, 1),
                   grid=(n_reps, 1))

    # NCSC noise reduction.
    otf_mask = numpy.fft.fftshift(otf_mask_shift.reshape(16, 16))

    ref_u = numpy.zeros_like(data)

    ncs_sr = ncsC.NCSCSubRegion(r_size=n_pts)

    for i in range(n_reps):
        ncs_sr.newRegion(data[i, :, :], gamma[i, :, :])
        ncs_sr.setOTFMask(otf_mask)
        ref_u[i, :, :] = ncs_sr.cSolve(alpha, verbose=False)

    ncs_sr.cleanup()

    for i in range(n_reps):
        norm_diff = numpy.max(
            numpy.abs(u[i, :, :] - ref_u[i, :, :])) / numpy.max(ref_u[i, :, :])
        assert (norm_diff < 1.0e-2), "failed {0:d} {1:.3f}".format(
            i, norm_diff)
Пример #7
0
def test_ncs_noise_reduction_1():

    # Setup
    numpy.random.seed(1)

    data = numpy.random.uniform(low=10.0, high=20.0,
                                size=(n_pts,
                                      n_pts)).astype(dtype=numpy.float32)
    gamma = numpy.random.uniform(low=2.0, high=4.0,
                                 size=(n_pts,
                                       n_pts)).astype(dtype=numpy.float32)
    otf_mask_shift = pyRef.createOTFMask()

    # CUDA Setup.
    u = numpy.zeros((n_pts, n_pts), dtype=numpy.float32)
    iters = numpy.zeros(1, dtype=numpy.int32)
    status = numpy.zeros(1, dtype=numpy.int32)

    # CUDA noise reduction.
    ncsReduceNoise(drv.In(data),
                   drv.In(gamma),
                   drv.In(otf_mask_shift),
                   drv.Out(u),
                   drv.Out(iters),
                   drv.Out(status),
                   numpy.float32(alpha),
                   block=(16, 1, 1),
                   grid=(1, 1))

    # Python reference version.
    ref_u = numpy.zeros(data.size)
    ref_iters = numpy.zeros_like(iters)
    ref_status = numpy.zeros_like(status)

    [py_u_fft_grad_r, py_u_fft_grad_c] = pyRef.createUFFTGrad()
    pyRef.ncsReduceNoise(py_u_fft_grad_r, py_u_fft_grad_c, data, gamma,
                         otf_mask_shift, ref_u, ref_iters, ref_status,
                         numpy.float32(alpha))

    ref_u = numpy.reshape(ref_u, data.shape)
    norm_diff = numpy.max(numpy.abs(u[:, :] - ref_u[:, :])) / numpy.max(
        ref_u[:, :])
    assert (norm_diff < 1.0e-2), str(norm_diff)
Пример #8
0
def test_ncs_noise_reduction_1():

    # Setup
    numpy.random.seed(1)

    data = numpy.random.uniform(low=10.0, high=20.0,
                                size=(n_pts,
                                      n_pts)).astype(dtype=numpy.float32)
    gamma = numpy.random.uniform(low=2.0, high=4.0,
                                 size=(n_pts,
                                       n_pts)).astype(dtype=numpy.float32)
    otf_mask_shift = pyRef.createOTFMask()

    # OpenCL Setup.
    u = numpy.zeros((n_pts, n_pts), dtype=numpy.float32)
    iters = numpy.zeros(1, dtype=numpy.int32)
    status = numpy.zeros(1, dtype=numpy.int32)

    data_buffer = cl.Buffer(context,
                            cl.mem_flags.READ_ONLY
                            | cl.mem_flags.COPY_HOST_PTR,
                            hostbuf=data)
    gamma_buffer = cl.Buffer(context,
                             cl.mem_flags.READ_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=gamma)
    otf_mask_buffer = cl.Buffer(context,
                                cl.mem_flags.READ_ONLY
                                | cl.mem_flags.COPY_HOST_PTR,
                                hostbuf=otf_mask_shift)
    u_buffer = cl.Buffer(context,
                         cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR,
                         hostbuf=u)
    iters_buffer = cl.Buffer(context,
                             cl.mem_flags.WRITE_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=iters)
    status_buffer = cl.Buffer(context,
                              cl.mem_flags.WRITE_ONLY
                              | cl.mem_flags.COPY_HOST_PTR,
                              hostbuf=status)

    # OpenCL noise reduction.
    program.ncsReduceNoise(queue, (1, ), (1, ), data_buffer, gamma_buffer,
                           otf_mask_buffer, u_buffer, iters_buffer,
                           status_buffer, numpy.float32(alpha))

    cl.enqueue_copy(queue, u, u_buffer).wait()
    cl.enqueue_copy(queue, iters, iters_buffer).wait()
    cl.enqueue_copy(queue, status, status_buffer).wait()
    queue.finish()

    # Python reference version.
    ref_u = numpy.zeros(data.size)
    ref_iters = numpy.zeros_like(iters)
    ref_status = numpy.zeros_like(status)

    [py_u_fft_grad_r, py_u_fft_grad_c] = pyRef.createUFFTGrad()
    pyRef.ncsReduceNoise(py_u_fft_grad_r, py_u_fft_grad_c, data, gamma,
                         otf_mask_shift, ref_u, ref_iters, ref_status,
                         numpy.float32(alpha))

    ref_u = numpy.reshape(ref_u, data.shape)
    norm_diff = numpy.max(numpy.abs(u[:, :] - ref_u[:, :])) / numpy.max(
        ref_u[:, :])
    assert (norm_diff < 1.0e-2), str(norm_diff)