Exemplos de impad_gpu em Python, exemplos de gputools.impad_gpu em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: vmbd_chimney.py Projeto: shashikant-ghangare/slitSpectrographBlind

    pl.figure(1)
    pl.draw()

# ----------------------------------------------------------------------------
# Code for initialising the online multi-frame deconvolution
# ----------------------------------------------------------------------------
# Initialisation of latent image by averaging the first 20 frames
y0 = 0.
for i in np.arange(1, N0):
    y0 += yload(i)

y0 /= N0
y_gpu = cua.to_gpu(y0)

# Pad image since we perform deconvolution with valid boundary conditions
x_gpu = gputools.impad_gpu(y_gpu, sf - 1)

# Create windows for OlaGPU
sx = y0.shape + sf - 1
sf2 = np.floor(sf / 2)
winaux = imagetools.win2winaux(sx, csf, overlap)

# ----------------------------------------------------------------------------
# Loop over all frames and do online blind deconvolution
# ----------------------------------------------------------------------------
import time as t
ti = t.clock()

for i in np.arange(1, N + 1):

    print 'Processing frame %d/%d \r' % (i, N)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: vmbd_chimney.py Projeto: aasensio/slitSpectrographBlind

    pl.figure(1)
    pl.draw()

# ----------------------------------------------------------------------------
# Code for initialising the online multi-frame deconvolution
# ----------------------------------------------------------------------------
# Initialisation of latent image by averaging the first 20 frames
y0 = 0.
for i in np.arange(1,N0):
    y0 += yload(i)

y0 /= N0
y_gpu = cua.to_gpu(y0)

# Pad image since we perform deconvolution with valid boundary conditions
x_gpu = gputools.impad_gpu(y_gpu, sf-1)

# Create windows for OlaGPU
sx      = y0.shape + sf - 1
sf2     = np.floor(sf/2)
winaux  = imagetools.win2winaux(sx, csf, overlap)

# ----------------------------------------------------------------------------
# Loop over all frames and do online blind deconvolution
# ----------------------------------------------------------------------------
import time as t
ti = t.clock()

for i in np.arange(1,N+1):

    print 'Processing frame %d/%d \r' % (i,N)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: run_vmbd.py Projeto: matthiaslee/VMBD

def process(opts):
    # ============================================================================
    # Specify some parameter settings 
    # ----------------------------------------------------------------------------
    
    # Specify data path and file identifier
    DATAPATH = '/DATA/LSST/FITS'
    RESPATH  = '../../../DATA/results';
    BASE_N = 141
    FILENAME = lambda i: '%s/v88827%03d-fz.R22.S11.fits' % (DATAPATH,(BASE_N+i))
    ID       = 'LSST'
    
    # ----------------------------------------------------------------------------
    # Specify parameter settings
    
    # General
    doshow   = opts.doShow             # put 1 to show intermediate results
    backup   = opts.backup                  # put 1 to write intermediate results to disk
    N        = opts.N                # how many frames to process
    N0       = opts.N0                # number of averaged frames for initialisation
    
    # OlaGPU parameters
    sf      = np.array([40,40])   # estimated size of PSF
    csf     =(3,3)               # number of kernels across x and y direction
    overlap = 0.5                 # overlap of neighboring patches in percent
    
    # Regularization parameters for kernel estimation
    f_alpha = opts.f_alpha                 # promotes smoothness
    f_beta  = opts.f_beta         # Thikhonov regularization
    optiter = opts.optiter        # number of iterations for minimization
    tol     = opts.tol        # tolerance for when to stop minimization
    # ============================================================================
    
    # Create helper functions for file handling
    
    # # # HACK for chunking into available GPU mem # # #
    #     - loads one 1kx1k block out of the fits image
    xOffset=2000
    yOffset=0
    chunkSize=1000
    yload = lambda i: 1. * fitsTools.readFITS(FILENAME(i), use_mask=True, norm=True)[yOffset:yOffset+chunkSize,xOffset:xOffset+chunkSize]
    
    # ----------------------------------------------------------------------------
    # Some more code for backuping the results
    # ----------------------------------------------------------------------------
    # For backup purposes
    EXPPATH = '%s/%s_sf%dx%d_csf%dx%d_maxiter%d_alpha%.2f_beta%.2f' % \
          (RESPATH,ID,sf[0],sf[1],csf[0],csf[1],optiter,f_alpha,f_beta)
          
    xname = lambda i: '%s/x_%04d.png' % (EXPPATH,i)
    yname = lambda i: '%s/y_%04d.png' % (EXPPATH,i)
    fname = lambda i: '%s/f_%04d.png' % (EXPPATH,i)
    
    
    if os.path.exists(EXPPATH) and opts.overwrite:
        try:
            rmtree(EXPPATH)
        except:
            print "[ERROR] removing old results dir:",EXPPATH
            exit()
            
    elif os.path.exists(EXPPATH):
        print "[ERROR] results directory already exists, please remove or use '-o' to overwrite"
        exit()
        
    # Create results path if not existing
    try:
        os.makedirs(EXPPATH)
    except:
        print "[ERROR] creating results dir:",EXPPATH
        exit()
    
    print 'Results are saved to: \n %s \n' % EXPPATH
    
    # ----------------------------------------------------------------------------
    # For displaying intermediate results create target figure
    # ----------------------------------------------------------------------------
    # Create figure for displaying intermediate results
    if doshow:
        print "showing intermediate results is currently disabled.."
        #pl.figure(1)
        #pl.draw()
    
    # ----------------------------------------------------------------------------
    # Code for initialising the online multi-frame deconvolution
    # ----------------------------------------------------------------------------
    # Initialisation of latent image by averaging the first 20 frames
    y0 = 0.
    for i in np.arange(1,N0):
        y0 += yload(i)
    
    y0 /= N0
    y_gpu = cua.to_gpu(y0)
    
    # Pad image since we perform deconvolution with valid boundary conditions
    x_gpu = gputools.impad_gpu(y_gpu, sf-1)
    
    # Create windows for OlaGPU
    sx      = y0.shape + sf - 1
    sf2     = np.floor(sf/2)
    winaux  = imagetools.win2winaux(sx, csf, overlap)
    
    # ----------------------------------------------------------------------------
    # Loop over all frames and do online blind deconvolution
    # ----------------------------------------------------------------------------
    import time as t
    ti = t.clock()
    t1 = stopwatch.timer()
    t2 = stopwatch.timer()
    t3 = stopwatch.timer()
    t4 = stopwatch.timer()
    t4.start()
    for i in np.arange(1,N+1):
        print 'Processing frame %d/%d \r' % (i,N)
    
        # Load next observed image
        t3.start()
        y = yload(i)
        print "TIMER load:", t3.elapsed()
        
        # Compute mask for determining saturated regions
        mask_gpu = 1. * cua.to_gpu(y < 1.)
        y_gpu    = cua.to_gpu(y)
    
        # ------------------------------------------------------------------------
        # PSF estimation
        # ------------------------------------------------------------------------
        # Create OlaGPU instance with current estimate of latent image
        t2.start()
        X = olaGPU.OlaGPU(x_gpu,sf,'valid',winaux=winaux)
        print "TIMER GPU: ", t2.elapsed()
        
        t1.start()
        # PSF estimation for given estimate of latent image and current observation
        f = X.deconv(y_gpu, mode = 'lbfgsb', alpha = f_alpha, beta = f_beta,
             maxfun = optiter, verbose = 10)
        print "TIMER Optimization: ", t1.elapsed()

        fs = f[0]
    
        # Normalize PSF kernels to sum up to one
        fs = gputools.normalize(fs)
    
        # ------------------------------------------------------------------------
        # Latent image estimation
        # ------------------------------------------------------------------------
        # Create OlaGPU instance with estimated PSF
        t2.start()
        F = olaGPU.OlaGPU(fs,sx,'valid',winaux=winaux)
    
        # Latent image estimation by performing one gradient descent step
        # multiplicative update is used which preserves positivity 
        factor_gpu = F.cnvtp(mask_gpu*y_gpu)/(F.cnvtp(mask_gpu*F.cnv(x_gpu))+tol)
        gputools.cliplower_GPU(factor_gpu, tol)
        x_gpu = x_gpu * factor_gpu
        x_max = x_gpu.get()[sf[0]:-sf[0],sf[1]:-sf[1]].max()
        
        gputools.clipupper_GPU(x_gpu, x_max)
        print "TIMER GPU: ", t2.elapsed()
        
        # ------------------------------------------------------------------------
        # For backup intermediate results
        # ------------------------------------------------------------------------
        if backup or i == N:
            # Write intermediate results to disk incl. input
            y_img = y_gpu.get()*1e5
            fitsTools.asinhScale(y_img, 450, -50, minCut=0.0, maxCut=40000, fname=yname(i))
            
            # Crop image to input size
            xi = (x_gpu.get()[sf2[0]:-sf2[0],sf2[1]:-sf2[1]] / x_max)*1e5

            fitsTools.fitsStats(xi)
            fitsTools.asinhScale(xi, 450, -50, minCut=0.0, maxCut=40000, fname=xname(i))
        
            # Concatenate PSF kernels for ease of visualisation
            f = imagetools.gridF(fs,csf)
            f = f*1e5
            
            fitsTools.asinhScale(f, 450, -50, minCut=0.0, maxCut=40000, fname=fname(i))

    
        # ------------------------------------------------------------------------
        # For displaying intermediate results
        # ------------------------------------------------------------------------
        '''
        if np.mod(i,1) == 0 and doshow:
        pl.figure(1)
        pl.subplot(121)
        # what is SY?
        pl.imshow(imagetools.crop(x_gpu.get(),sy,np.ceil(sf/2)),'gray')
        pl.title('x after %d observations' % i)
        pl.subplot(122)
        pl.imshow(y_gpu.get(),'gray')
        pl.title('y(%d)' % i)
        pl.draw()
        pl.figure(2)
        pl.title('PSF(%d)' % i)
        imagetools.cellplot(fs, winaux.csf)
        tf = t.clock()
        print('Time elapsed after %d frames %.3f' % (i,(tf-ti)))
        '''
    tf = t.clock()
    print('Time elapsed for total image sequence %.3f' % (tf-ti))
    # ----------------------------------------------------------------------------
    print "TOTAL: %.3f" % (t4.elapsed())
    print "OptimizeCPUtime %.3f %.3f" % (t1.getTotal(), 100*(t1.getTotal()/t4.getTotal()))
    print "GPUtime %.3f %.3f" % (t2.getTotal(), 100*(t2.getTotal()/t4.getTotal()))
    print "LoadTime %.3f %.3f" % (t3.getTotal(), 100*(t3.getTotal()/t4.getTotal()))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: olaGPU.py Projeto: matthiaslee/VMBD

    def deconv(self, y, z0=None, mode='lbfgsb', maxfun=100, alpha=0., beta=0.01,
               verbose=10, m=5, edgetapering=1, factor=3, gamma=1e-4):
        """
        deconv implements various deconvolution methods. It expects a
        blurry image and outputs an estimated blur kernel or a sharp latent
        image. Currently, the following algorithms are implemented:

        'lbfgsb'   uses the lbfgsb optimization code to minimize the following
                   constrained regularized problem:

                   |y-Zu|^2 + alpha * |grad(u)|^2 + beta * |u|^2 s.t. u>0

                   The alpha term promotes smoothness of the solution, while
                   the beta term is an ordinary Thikhonov regularization 
                   
        'direct'   as above but solves the problem directly, i.e. via
                   division in Fourier space instead of an iterative
                   minimization scheme at the cost of the positivity
                   constraint.

        'xdirect'  as 'direct' but without corrective term which reduces
                   artifacts stemming from the windowing

        'gdirect'  solves the following problem

                   |grad(y)-grad(Zu)|^2 + alpha * |grad(u)|^2 + beta * |u|^2

                   This is particularly useful for kernel estimation in the
                   case of blurred natural images featuring many edges. The
                   advantage vs. 'direct' is the suppression of noise in the
                   estimated PSF kernels. 

        'xdirect'  as 'direct' but without corrective term which reduces
                   artifacts stemming from the windowing

                   'Fast Image Deconvolution using Hyper-Laplacian Priors'
                   by Dilip Krishnan and Rob Fergus, NIPS 2009.
                   It minimizes the following problem

                   |y-Zu|^2 + gamma * |grad(u)|^(2/3)

                   via half-quadratic splitting. See paper for details.
                   
        ----------------------------------------------------------------------
        Usage:
    
        Call:  Z = OlaGPU(z, sw, mode, winaux)
               u = Z.deconv(y)
 
        Input:  y   blurry image
        Ouput:  u   either image or PSF sized object
        """

        from numpy import array
        
        if not all(array(y.shape) == self.sy):
            raise IOError ('Sizes incompatible. Expected blurred image!')

        # Potential data transfer to GPU
        if y.__class__ == cua.GPUArray:
            y_gpu = 1. * y
        else:
            y_gpu = cua.to_gpu(y.astype(np.float32))


        # --------------------------------------------------------------------
        if mode == 'lbfgsb':

            from scipy.optimize import fmin_l_bfgs_b
                
            self.res_gpu = cua.empty_like(y_gpu)

            if self.__id__ == 'X':
                sz = ((int(np.prod(self.winaux.csf)),
                       int(self.sz[0]),int(self.sz[1])))
                
            elif self.__id__ == 'F':
                sz = self.sz            
                
            lf = np.prod(sz)
            if z0 == None:
                z0_gpu = self.cnvtp(y_gpu)
                z0 = z0_gpu.get()
                z0 = z0.flatten()
                
                #z0 = np.ones(lf)/(1. * lf)    # initialisation with flat kernels
            else:
                z0 = z0.flatten()

            lb = 0.                 # lower bound
            ub = np.infty           # upper bound
            zhat = fmin_l_bfgs_b(func = self.cnvinv_objfun, x0 = z0, \
                                 fprime = self.cnvinv_gradfun,\
                                 args = [sz, y_gpu, alpha, beta],\
                                 factr = 10., pgtol = 10e-15, \
                                 maxfun = maxfun, bounds = [(lb, ub)] * lf,\
                                 m = m, iprint = verbose)
            return np.reshape(zhat[0], sz), zhat[1], zhat[2]        


        # --------------------------------------------------------------------
        elif mode == 'gdirect':
            
            # Use this method only for estimating the PSF
            if self.__id__ != 'X':
                raise Exception('Use direct mode for image estimation!')
    
            # Compute Laplacian
            if alpha > 0.:
                gx_gpu = gputools.pad_cpu2gpu(
                    np.array([[-1,1],[-1,1],[-1,1]]),
                    self.sfft_gpu, dtype='complex')
                
                gy_gpu = gputools.pad_cpu2gpu(
                    np.array([[-1,-1,-1],[1,1,1]]),
                    self.sfft_gpu, dtype='complex')
                
                self.plan.execute(gx_gpu)
                self.plan.execute(gy_gpu)
                L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj()
            else:
                L_gpu = cua.zeros(self.fft_gpu.shape, np.complex64)
                 
            if edgetapering == 1:
                gputools.edgetaper_gpu(y_gpu, 2*self.sf, 'barthann')

            # Transfer to GPU
            if self.x.__class__ == cua.GPUArray:
                x_gpu = self.x
            else:
                x_gpu = cua.to_gpu(self.x)        

            # Compute gradient images             
            xx_gpu, xy_gpu = gputools.gradient_gpu(x_gpu)
            yx_gpu, yy_gpu = gputools.gradient_gpu(y_gpu)

            # Chop and pad business
            if self.mode == 'valid':
                yx_gpu = gputools.chop_pad_GPU(yx_gpu, self.winaux.csf,
                                               self.winaux.sw, self.winaux.nhop,
                                               self.sfft_gpu, self.sf-1,
                                               'complex')                
                yy_gpu = gputools.chop_pad_GPU(yy_gpu, self.winaux.csf,
                                               self.winaux.sw, self.winaux.nhop,
                                               self.sfft_gpu, self.sf-1,
                                               'complex')
                
            elif self.mode == 'same':
                yx_gpu = gputools.chop_pad_GPU(yx_gpu, self.winaux.csf,
                                               self.winaux.sw, self.winaux.nhop,
                                               self.sfft_gpu,
                                               np.floor(self.sf/2), 'complex')
                yy_gpu = gputools.chop_pad_GPU(yy_gpu, self.winaux.csf,
                                               self.winaux.sw, self.winaux.nhop,
                                               self.sfft_gpu,
                                               np.floor(self.sf/2), 'complex')
            else:
                raise NotImplementedError('Not a valid mode!')

            xx_gpu = gputools.chop_pad_GPU(xx_gpu, self.winaux.csf,
                                           self.winaux.sw, self.winaux.nhop,
                                           self.sfft_gpu, dtype='complex')
            xy_gpu = gputools.chop_pad_GPU(xy_gpu, self.winaux.csf,
                                           self.winaux.sw, self.winaux.nhop,
                                           self.sfft_gpu, dtype='complex')            

            # Here each patch should be windowed to reduce ringing artifacts,
            # however since we are working in the gradient domain, the effect
            # is negligible
            # ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu,
            #                                 self.sfft_gpu, self.sf-1,
            #                                 dtype='complex') 
            # xx_gpu = ws_gpu * xx_gpu
            # xy_gpu = ws_gpu * xy_gpu
            # yx_gpu = ws_gpu * yx_gpu
            # yy_gpu = ws_gpu * yy_gpu

            # Compute Fourier transform
            self.fft(yx_gpu, self.fft_gpu.shape[0])
            self.fft(yy_gpu, self.fft_gpu.shape[0])
            self.fft(xx_gpu, self.fft_gpu.shape[0])
            self.fft(xy_gpu, self.fft_gpu.shape[0])

            # Do division in Fourier space
            z_gpu = cua.zeros(xy_gpu.shape, np.complex64)            
            z_gpu = gputools.comp_ola_gdeconv(xx_gpu, xy_gpu,
                                              yx_gpu, yy_gpu,
                                              L_gpu, alpha, beta)

            # Computing the inverse FFT
            z_gpu = z_gpu.conj() 
            self.fft(z_gpu, self.fft_gpu.shape[0])
            z_gpu = z_gpu.conj()/np.prod(z_gpu.shape[-2::])

            # Crop out the kernels
            zc_gpu = gputools.crop_stack_GPU(z_gpu, self.sf)
            return zc_gpu
 

        # --------------------------------------------------------------------
        elif mode == 'direct':

            const_gpu = cua.empty_like(y_gpu)
            const_gpu.fill(1.)

            # First deconvolution without corrective term
            y_gpu = self.deconv(y_gpu, mode = 'xdirect', alpha = alpha,
                                beta = beta, edgetapering = edgetapering)      
            gputools.cliplower_GPU(y_gpu,0)

            # Now same for constant image to get rid of window artifacts
            if edgetapering == 1:
                gputools.edgetaper_gpu(const_gpu, 2*self.sf, 'barthann')
                
            const_gpu = self.deconv(const_gpu, mode = 'xdirect', alpha = alpha,
                                    beta = beta, edgetapering = edgetapering)
            gputools.edgetaper_gpu(const_gpu, 2*self.sf, 'barthann')
            gputools.clip_GPU(const_gpu, 0.01, 10.)            

            # Division of deconvolved latent and constant image to get rid
            # of artifacts stemming from windowing
            y_gpu = y_gpu / const_gpu
            sz    = y_gpu.shape
            #gputools.clip_GPU(y_gpu, 0., 1.0)
            #gputools.edgetaper_gpu(y_gpu, 3*self.sf, 'barthann')

            # Do cropping and padding since edges are corrupted by division
            y_gpu = gputools.crop_gpu2cpu(y_gpu, sz-factor*self.sf-1,
                                        offset=np.floor((factor*self.sf-1)/2.))
            y_gpu = gputools.impad_gpu(y_gpu, tuple(np.array(sz)-y_gpu.shape))

            return y_gpu


        # --------------------------------------------------------------------
        elif mode == 'xdirect':

            # Compute Laplacian
            if alpha > 0.:
                gx_gpu = gputools.pad_cpu2gpu(
                    np.array([[-1,1]]), self.sfft_gpu, dtype='complex')
                gy_gpu = gputools.pad_cpu2gpu(
                    np.array([[-1],[1]]), self.sfft_gpu, dtype='complex')
                self.plan.execute(gx_gpu)
                self.plan.execute(gy_gpu)
                L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj()
            else:
                L_gpu = cua.zeros(self.fft_gpu.shape, np.complex64)

            # Edgetapering of blurry input image
            if edgetapering == 1:
                gputools.edgetaper_gpu(y_gpu, 3*self.sf, 'barthann')
                
            if self.mode == 'valid':
                #y_gpu = gputools.pad_cpu2gpu(y_gpu, self.sx, self.sf-1, dtype='real')
                offset = self.sf-1
            elif self.mode == 'same':
                offset = np.floor(self.sf/2)
            else:
                raise NotImplementedError('Not a valid mode!')

            # Chop and pad business
            y_gpu = gputools.chop_pad_GPU(y, self.winaux.csf,
                                          self.winaux.sw, self.winaux.nhop,
                                          self.sfft_gpu, offset, 'complex')     
            ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu, self.sfft_gpu,
                                            dtype='complex')

            # Windowing
            y_gpu  = ws_gpu * y_gpu

            # Compute FFT
            self.fft(y_gpu, self.fft_gpu.shape[0])

            # Do division in Fourier space
            z_gpu = gputools.comp_ola_deconv(self.fft_gpu, y_gpu, L_gpu,
                                             alpha, beta)

            # Computing the inverse FFT
            z_gpu = z_gpu.conj() 
            self.fft(z_gpu, self.fft_gpu.shape[0])
            z_gpu = z_gpu.conj()/np.prod(z_gpu.shape[-2::])

            # Crop the solution to correct output size 
            if self.__id__ == 'X':
                zc_gpu = gputools.crop_stack_GPU(z_gpu, self.sf)                
                return zc_gpu
       
            elif self.__id__ == 'F':
                zs_gpu = gputools.crop_stack_GPU(z_gpu, self.winaux.sw)
                #zs_gpu = self.winaux.ws_gpu * zs_gpu
                zc_gpu = gputools.ola_GPU_test(zs_gpu, self.winaux.csf,
                                           self.winaux.sw, self.winaux.nhop)
                zc_gpu = gputools.crop_gpu2cpu(zc_gpu, self.sx)             
                return zc_gpu


        # --------------------------------------------------------------------
        elif mode == 'sparse':

            # Compute Laplacian
            gx_gpu = gputools.pad_cpu2gpu(np.sqrt(2.)/2. *
                np.array([[-1,1]]), self.sfft_gpu, dtype='complex')
            gy_gpu = gputools.pad_cpu2gpu(np.sqrt(2.)/2. *
                np.array([[-1],[1]]), self.sfft_gpu, dtype='complex')
            self.plan.execute(gx_gpu)
            self.plan.execute(gy_gpu)
            L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj()

            const_gpu = cua.empty_like(y_gpu)
            const_gpu.fill(1.)

            # Edgetapering
            if edgetapering == 1:
                gputools.edgetaper_gpu(y_gpu, 2*self.sf, 'barthann')
                gputools.edgetaper_gpu(const_gpu, 2*self.sf, 'barthann')

            # Parameter settings
            beta = 1.
            beta_rate = 2. * np.sqrt(2.)
            beta_max = 2.**8

            # Initialisation of x with padded version of y
            x_gpu = 1 * y_gpu
            if self.mode == 'valid':
                offset = self.sf-1
            elif self.mode == 'same':
                offset = np.floor(self.sf/2)
            else:
                raise NotImplementedError('Not a valid mode!')

            # Chop and pad business
            y_gpu = gputools.chop_pad_GPU(y_gpu, self.winaux.csf,
                                          self.winaux.sw, self.winaux.nhop,
                                          self.sfft_gpu, offset,'complex')
            const_gpu = gputools.chop_pad_GPU(const_gpu, self.winaux.csf,
                                          self.winaux.sw, self.winaux.nhop,
                                          self.sfft_gpu, offset,'complex')
            ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu, self.sfft_gpu,
                                            offset, dtype='complex')

            # Windowing
            y_gpu = y_gpu * ws_gpu

            # Constant image for corrective weighting term
            const_gpu = const_gpu * ws_gpu
            del ws_gpu
            
            self.fft(const_gpu, self.fft_gpu.shape[0])
            const_gpu = gputools.comp_ola_deconv(self.fft_gpu, const_gpu,
                                                 L_gpu, alpha, gamma)
            const_gpu = const_gpu.conj()
            self.fft(const_gpu, self.fft_gpu.shape[0])
            const_gpu = const_gpu.conj()/np.prod(const_gpu.shape[-2::])
            const_gpu = gputools.crop_stack_GPU(const_gpu, self.winaux.sw)
            const_gpu = const_gpu * self.winaux.ws_gpu
            const_gpu = gputools.ola_GPU_test(const_gpu, self.winaux.csf,
                                              self.winaux.sw, self.winaux.nhop)
            const_gpu = gputools.crop_gpu2cpu(const_gpu, self.sx)
            # For debugging purposes
            #scipy.misc.imsave('const1.png', const_gpu.get()/const_gpu.get().max())
            gputools.cliplower_GPU(const_gpu, 0.01)
            const_gpu = 0.01 / const_gpu
            
            # Precompute F'y
            self.fft(y_gpu, self.fft_gpu.shape[0])
            y_gpu = y_gpu * self.fft_gpu.conj()
            
            
            while beta < beta_max:
                # Compute gradient images of x
                xx_gpu, xy_gpu = gputools.gradient_gpu(x_gpu)
                del x_gpu

                # w sub-problem for alpha 2/3
                gputools.modify_sparse23_gpu(xx_gpu, beta) 
                gputools.modify_sparse23_gpu(xy_gpu, beta)
                #gputools.modify_sparse_gpu(xx_gpu, beta, 0.01)
                #gputools.modify_sparse_gpu(xy_gpu, beta, 0.01)

                # Chop and pad to size of FFT
                xx_gpu = gputools.chop_pad_GPU(xx_gpu, self.winaux.csf,
                                           self.winaux.sw, self.winaux.nhop,
                                           self.sfft_gpu, dtype='complex')
                xy_gpu = gputools.chop_pad_GPU(xy_gpu, self.winaux.csf,
                                           self.winaux.sw, self.winaux.nhop,
                                           self.sfft_gpu, dtype='complex')  

                # Compute Fourier transform
                self.fft(xx_gpu, self.fft_gpu.shape[0])
                self.fft(xy_gpu, self.fft_gpu.shape[0])

                # Do division in Fourier space
                x_gpu = gputools.comp_ola_sdeconv(gx_gpu, gy_gpu,
                                                  xx_gpu, xy_gpu,
                                                  y_gpu, self.fft_gpu,
                                                  L_gpu, alpha, beta, gamma)
                del xx_gpu, xy_gpu

                # Computing the inverse FFT
                x_gpu = x_gpu.conj()
                self.fft(x_gpu, self.fft_gpu.shape[0])
                x_gpu = x_gpu.conj()
                x_gpu /= np.prod(x_gpu.shape[-2::])

                # Ola and cropping
                x_gpu = gputools.crop_stack_GPU(x_gpu, self.winaux.sw)
                x_gpu = x_gpu * self.winaux.ws_gpu
                x_gpu = gputools.ola_GPU_test(x_gpu, self.winaux.csf,
                                           self.winaux.sw, self.winaux.nhop)
                x_gpu = gputools.crop_gpu2cpu(x_gpu, self.sx)

                # Enforce positivity
                x_gpu = x_gpu * const_gpu
                gputools.cliplower_GPU(x_gpu, 0.)

                beta *= beta_rate

            return x_gpu
                       
        else:
            raise NotImplementedError('Not a valid deconv mode!')

Exemplo n.º 5

0

Exibir arquivo

    def deconv(self,
               y,
               z0=None,
               mode='lbfgsb',
               maxfun=100,
               alpha=0.,
               beta=0.01,
               verbose=10,
               m=5,
               edgetapering=1,
               factor=3,
               gamma=1e-4):
        """
        deconv implements various deconvolution methods. It expects a
        blurry image and outputs an estimated blur kernel or a sharp latent
        image. Currently, the following algorithms are implemented:

        'lbfgsb'   uses the lbfgsb optimization code to minimize the following
                   constrained regularized problem:

                   |y-Zu|^2 + alpha * |grad(u)|^2 + beta * |u|^2 s.t. u>0

                   The alpha term promotes smoothness of the solution, while
                   the beta term is an ordinary Thikhonov regularization 
                   
        'direct'   as above but solves the problem directly, i.e. via
                   division in Fourier space instead of an iterative
                   minimization scheme at the cost of the positivity
                   constraint.

        'xdirect'  as 'direct' but without corrective term which reduces
                   artifacts stemming from the windowing

        'gdirect'  solves the following problem

                   |grad(y)-grad(Zu)|^2 + alpha * |grad(u)|^2 + beta * |u|^2

                   This is particularly useful for kernel estimation in the
                   case of blurred natural images featuring many edges. The
                   advantage vs. 'direct' is the suppression of noise in the
                   estimated PSF kernels. 

        'xdirect'  as 'direct' but without corrective term which reduces
                   artifacts stemming from the windowing

                   'Fast Image Deconvolution using Hyper-Laplacian Priors'
                   by Dilip Krishnan and Rob Fergus, NIPS 2009.
                   It minimizes the following problem

                   |y-Zu|^2 + gamma * |grad(u)|^(2/3)

                   via half-quadratic splitting. See paper for details.
                   
        ----------------------------------------------------------------------
        Usage:
    
        Call:  Z = OlaGPU(z, sw, mode, winaux)
               u = Z.deconv(y)
 
        Input:  y   blurry image
        Ouput:  u   either image or PSF sized object
        """

        from numpy import array

        if not all(array(y.shape) == self.sy):
            raise IOError('Sizes incompatible. Expected blurred image!')

        # Potential data transfer to GPU
        if y.__class__ == cua.GPUArray:
            y_gpu = 1. * y
        else:
            y_gpu = cua.to_gpu(y.astype(np.float32))

        # --------------------------------------------------------------------
        if mode == 'lbfgsb':

            from scipy.optimize import fmin_l_bfgs_b

            self.res_gpu = cua.empty_like(y_gpu)

            if self.__id__ == 'X':
                sz = ((int(np.prod(self.winaux.csf)), int(self.sz[0]),
                       int(self.sz[1])))

            elif self.__id__ == 'F':
                sz = self.sz

            lf = np.prod(sz)
            if z0 == None:
                z0_gpu = self.cnvtp(y_gpu)
                z0 = z0_gpu.get()
                z0 = z0.flatten()

                #z0 = np.zeros(self.sf)   # initialisation with flat kernels
                #z0[self.sf[0]/2,self.sf[1]/2] = 1.
                #z0 = np.tile(z0, [np.prod(self.csf),1,1])
                #z0 = z0.flatten()

            else:
                z0 = z0.flatten()

            lb = 0.  # lower bound
            ub = np.infty  # upper bound
            zhat = fmin_l_bfgs_b(func = self.cnvinv_objfun, x0 = z0, \
                                 fprime = self.cnvinv_gradfun,\
                                 args = [sz, y_gpu, alpha, beta],\
                                 factr = 10., pgtol = 10e-15, \
                                 maxfun = maxfun, bounds = [(lb, ub)] * lf,\
                                 m = m, iprint = verbose)

            return np.reshape(zhat[0], sz), zhat[1], zhat[2]

        # --------------------------------------------------------------------
        elif mode == 'gdirect':

            # Use this method only for estimating the PSF
            if self.__id__ != 'X':
                raise Exception('Use direct mode for image estimation!')

            # Compute Laplacian
            if alpha > 0.:
                gx_gpu = gputools.pad_cpu2gpu(np.array([[-1, 1], [-1, 1],
                                                        [-1, 1]]),
                                              self.sfft_gpu,
                                              dtype='complex')

                gy_gpu = gputools.pad_cpu2gpu(np.array([[-1, -1, -1],
                                                        [1, 1, 1]]),
                                              self.sfft_gpu,
                                              dtype='complex')

                self.plan.execute(gx_gpu)
                self.plan.execute(gy_gpu)
                L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj()
            else:
                L_gpu = cua.zeros(self.fft_gpu.shape, np.complex64)

            if edgetapering == 1:
                gputools.edgetaper_gpu(y_gpu, 2 * self.sf, 'barthann')

            # Transfer to GPU
            if self.x.__class__ == cua.GPUArray:
                x_gpu = self.x
            else:
                x_gpu = cua.to_gpu(self.x)

            # Compute gradient images
            xx_gpu, xy_gpu = gputools.gradient_gpu(x_gpu)
            yx_gpu, yy_gpu = gputools.gradient_gpu(y_gpu)

            # Chop and pad business
            if self.mode == 'valid':
                yx_gpu = gputools.chop_pad_GPU(yx_gpu, self.winaux.csf,
                                               self.winaux.sw,
                                               self.winaux.nhop, self.sfft_gpu,
                                               self.sf - 1, 'complex')
                yy_gpu = gputools.chop_pad_GPU(yy_gpu, self.winaux.csf,
                                               self.winaux.sw,
                                               self.winaux.nhop, self.sfft_gpu,
                                               self.sf - 1, 'complex')

            elif self.mode == 'same':
                yx_gpu = gputools.chop_pad_GPU(yx_gpu, self.winaux.csf,
                                               self.winaux.sw,
                                               self.winaux.nhop, self.sfft_gpu,
                                               np.floor(self.sf / 2),
                                               'complex')
                yy_gpu = gputools.chop_pad_GPU(yy_gpu, self.winaux.csf,
                                               self.winaux.sw,
                                               self.winaux.nhop, self.sfft_gpu,
                                               np.floor(self.sf / 2),
                                               'complex')
            else:
                raise NotImplementedError('Not a valid mode!')

            xx_gpu = gputools.chop_pad_GPU(xx_gpu,
                                           self.winaux.csf,
                                           self.winaux.sw,
                                           self.winaux.nhop,
                                           self.sfft_gpu,
                                           dtype='complex')
            xy_gpu = gputools.chop_pad_GPU(xy_gpu,
                                           self.winaux.csf,
                                           self.winaux.sw,
                                           self.winaux.nhop,
                                           self.sfft_gpu,
                                           dtype='complex')

            # Here each patch should be windowed to reduce ringing artifacts,
            # however since we are working in the gradient domain, the effect
            # is negligible
            # ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu,
            #                                 self.sfft_gpu, self.sf-1,
            #                                 dtype='complex')
            # xx_gpu = ws_gpu * xx_gpu
            # xy_gpu = ws_gpu * xy_gpu
            # yx_gpu = ws_gpu * yx_gpu
            # yy_gpu = ws_gpu * yy_gpu

            # Compute Fourier transform
            self.fft(yx_gpu, self.fft_gpu.shape[0])
            self.fft(yy_gpu, self.fft_gpu.shape[0])
            self.fft(xx_gpu, self.fft_gpu.shape[0])
            self.fft(xy_gpu, self.fft_gpu.shape[0])

            # Do division in Fourier space
            z_gpu = cua.zeros(xy_gpu.shape, np.complex64)
            z_gpu = gputools.comp_ola_gdeconv(xx_gpu, xy_gpu, yx_gpu, yy_gpu,
                                              L_gpu, alpha, beta)

            # Computing the inverse FFT
            z_gpu = z_gpu.conj()
            self.fft(z_gpu, self.fft_gpu.shape[0])
            z_gpu = z_gpu.conj() / np.prod(z_gpu.shape[-2::])

            # Crop out the kernels
            zc_gpu = gputools.crop_stack_GPU(z_gpu, self.sf)
            return zc_gpu

        # --------------------------------------------------------------------
        elif mode == 'direct':

            const_gpu = cua.empty_like(y_gpu)
            const_gpu.fill(1.)

            # First deconvolution without corrective term
            y_gpu = self.deconv(y_gpu,
                                mode='xdirect',
                                alpha=alpha,
                                beta=beta,
                                edgetapering=edgetapering)
            gputools.cliplower_GPU(y_gpu, 0)

            # Now same for constant image to get rid of window artifacts
            if edgetapering == 1:
                gputools.edgetaper_gpu(const_gpu, 2 * self.sf, 'barthann')

            const_gpu = self.deconv(const_gpu,
                                    mode='xdirect',
                                    alpha=alpha,
                                    beta=beta,
                                    edgetapering=edgetapering)
            gputools.edgetaper_gpu(const_gpu, 2 * self.sf, 'barthann')
            gputools.clip_GPU(const_gpu, 0.01, 10.)

            # Division of deconvolved latent and constant image to get rid
            # of artifacts stemming from windowing
            y_gpu = y_gpu / const_gpu
            sz = y_gpu.shape
            #gputools.clip_GPU(y_gpu, 0., 1.0)
            #gputools.edgetaper_gpu(y_gpu, 3*self.sf, 'barthann')

            # Do cropping and padding since edges are corrupted by division
            y_gpu = gputools.crop_gpu2cpu(y_gpu,
                                          sz - factor * self.sf - 1,
                                          offset=np.floor(
                                              (factor * self.sf - 1) / 2.))
            y_gpu = gputools.impad_gpu(y_gpu,
                                       tuple(np.array(sz) - y_gpu.shape))

            return y_gpu

        # --------------------------------------------------------------------
        elif mode == 'xdirect':

            # Compute Laplacian
            if alpha > 0.:
                gx_gpu = gputools.pad_cpu2gpu(np.array([[-1, 1]]),
                                              self.sfft_gpu,
                                              dtype='complex')
                gy_gpu = gputools.pad_cpu2gpu(np.array([[-1], [1]]),
                                              self.sfft_gpu,
                                              dtype='complex')
                self.plan.execute(gx_gpu)
                self.plan.execute(gy_gpu)
                L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj()
            else:
                L_gpu = cua.zeros(self.fft_gpu.shape, np.complex64)

            # Edgetapering of blurry input image
            if edgetapering == 1:
                gputools.edgetaper_gpu(y_gpu, 3 * self.sf, 'barthann')

            if self.mode == 'valid':
                #y_gpu = gputools.pad_cpu2gpu(y_gpu, self.sx, self.sf-1, dtype='real')
                offset = self.sf - 1
            elif self.mode == 'same':
                offset = np.floor(self.sf / 2)
            else:
                raise NotImplementedError('Not a valid mode!')

            # Chop and pad business
            y_gpu = gputools.chop_pad_GPU(y, self.winaux.csf, self.winaux.sw,
                                          self.winaux.nhop, self.sfft_gpu,
                                          offset, 'complex')
            ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu,
                                            self.sfft_gpu,
                                            dtype='complex')

            # Windowing
            y_gpu = ws_gpu * y_gpu

            # Compute FFT
            self.fft(y_gpu, self.fft_gpu.shape[0])

            # Do division in Fourier space
            z_gpu = gputools.comp_ola_deconv(self.fft_gpu, y_gpu, L_gpu, alpha,
                                             beta)

            # Computing the inverse FFT
            z_gpu = z_gpu.conj()
            self.fft(z_gpu, self.fft_gpu.shape[0])
            z_gpu = z_gpu.conj() / np.prod(z_gpu.shape[-2::])

            # Crop the solution to correct output size
            if self.__id__ == 'X':
                zc_gpu = gputools.crop_stack_GPU(z_gpu, self.sf)
                return zc_gpu

            elif self.__id__ == 'F':
                zs_gpu = gputools.crop_stack_GPU(z_gpu, self.winaux.sw)
                #zs_gpu = self.winaux.ws_gpu * zs_gpu
                zc_gpu = gputools.ola_GPU_test(zs_gpu, self.winaux.csf,
                                               self.winaux.sw,
                                               self.winaux.nhop)
                zc_gpu = gputools.crop_gpu2cpu(zc_gpu, self.sx)
                return zc_gpu

        # --------------------------------------------------------------------
        elif mode == 'sparse':

            # Compute Laplacian
            gx_gpu = gputools.pad_cpu2gpu(np.sqrt(2.) / 2. *
                                          np.array([[-1, 1]]),
                                          self.sfft_gpu,
                                          dtype='complex')
            gy_gpu = gputools.pad_cpu2gpu(np.sqrt(2.) / 2. *
                                          np.array([[-1], [1]]),
                                          self.sfft_gpu,
                                          dtype='complex')
            self.plan.execute(gx_gpu)
            self.plan.execute(gy_gpu)
            L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj()

            const_gpu = cua.empty_like(y_gpu)
            const_gpu.fill(1.)

            # Edgetapering
            if edgetapering == 1:
                gputools.edgetaper_gpu(y_gpu, 2 * self.sf, 'barthann')
                gputools.edgetaper_gpu(const_gpu, 2 * self.sf, 'barthann')

            # Parameter settings
            beta = 1.
            beta_rate = 2. * np.sqrt(2.)
            beta_max = 2.**8

            # Initialisation of x with padded version of y
            x_gpu = 1 * y_gpu
            if self.mode == 'valid':
                offset = self.sf - 1
            elif self.mode == 'same':
                offset = np.floor(self.sf / 2)
            else:
                raise NotImplementedError('Not a valid mode!')

            # Chop and pad business
            y_gpu = gputools.chop_pad_GPU(y_gpu, self.winaux.csf,
                                          self.winaux.sw, self.winaux.nhop,
                                          self.sfft_gpu, offset, 'complex')
            const_gpu = gputools.chop_pad_GPU(const_gpu, self.winaux.csf,
                                              self.winaux.sw, self.winaux.nhop,
                                              self.sfft_gpu, offset, 'complex')
            ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu,
                                            self.sfft_gpu,
                                            offset,
                                            dtype='complex')

            # Windowing
            y_gpu = y_gpu * ws_gpu

            # Constant image for corrective weighting term
            const_gpu = const_gpu * ws_gpu
            del ws_gpu

            self.fft(const_gpu, self.fft_gpu.shape[0])
            const_gpu = gputools.comp_ola_deconv(self.fft_gpu, const_gpu,
                                                 L_gpu, alpha, gamma)
            const_gpu = const_gpu.conj()
            self.fft(const_gpu, self.fft_gpu.shape[0])
            const_gpu = const_gpu.conj() / np.prod(const_gpu.shape[-2::])
            const_gpu = gputools.crop_stack_GPU(const_gpu, self.winaux.sw)
            const_gpu = const_gpu * self.winaux.ws_gpu
            const_gpu = gputools.ola_GPU_test(const_gpu, self.winaux.csf,
                                              self.winaux.sw, self.winaux.nhop)
            const_gpu = gputools.crop_gpu2cpu(const_gpu, self.sx)
            # For debugging purposes
            #scipy.misc.imsave('const1.png', const_gpu.get()/const_gpu.get().max())
            gputools.cliplower_GPU(const_gpu, 0.01)
            const_gpu = 0.01 / const_gpu

            # Precompute F'y
            self.fft(y_gpu, self.fft_gpu.shape[0])
            y_gpu = y_gpu * self.fft_gpu.conj()

            while beta < beta_max:
                # Compute gradient images of x
                xx_gpu, xy_gpu = gputools.gradient_gpu(x_gpu)
                del x_gpu

                # w sub-problem for alpha 2/3
                gputools.modify_sparse23_gpu(xx_gpu, beta)
                gputools.modify_sparse23_gpu(xy_gpu, beta)
                #gputools.modify_sparse_gpu(xx_gpu, beta, 0.01)
                #gputools.modify_sparse_gpu(xy_gpu, beta, 0.01)

                # Chop and pad to size of FFT
                xx_gpu = gputools.chop_pad_GPU(xx_gpu,
                                               self.winaux.csf,
                                               self.winaux.sw,
                                               self.winaux.nhop,
                                               self.sfft_gpu,
                                               dtype='complex')
                xy_gpu = gputools.chop_pad_GPU(xy_gpu,
                                               self.winaux.csf,
                                               self.winaux.sw,
                                               self.winaux.nhop,
                                               self.sfft_gpu,
                                               dtype='complex')

                # Compute Fourier transform
                self.fft(xx_gpu, self.fft_gpu.shape[0])
                self.fft(xy_gpu, self.fft_gpu.shape[0])

                # Do division in Fourier space
                x_gpu = gputools.comp_ola_sdeconv(gx_gpu, gy_gpu, xx_gpu,
                                                  xy_gpu, y_gpu, self.fft_gpu,
                                                  L_gpu, alpha, beta, gamma)
                del xx_gpu, xy_gpu

                # Computing the inverse FFT
                x_gpu = x_gpu.conj()
                self.fft(x_gpu, self.fft_gpu.shape[0])
                x_gpu = x_gpu.conj()
                x_gpu /= np.prod(x_gpu.shape[-2::])

                # Ola and cropping
                x_gpu = gputools.crop_stack_GPU(x_gpu, self.winaux.sw)
                x_gpu = x_gpu * self.winaux.ws_gpu
                x_gpu = gputools.ola_GPU_test(x_gpu, self.winaux.csf,
                                              self.winaux.sw, self.winaux.nhop)
                x_gpu = gputools.crop_gpu2cpu(x_gpu, self.sx)

                # Enforce positivity
                x_gpu = x_gpu * const_gpu
                gputools.cliplower_GPU(x_gpu, 0.)

                beta *= beta_rate

            return x_gpu

        else:
            raise NotImplementedError('Not a valid deconv mode!')

Exemplo n.º 6

0

Exibir arquivo

Arquivo: run_vmbd.py Projeto: dyq0811/VMBD

def process(opts):
    # ============================================================================
    # Specify some parameter settings
    # ----------------------------------------------------------------------------

    # Specify data path and file identifier
    DATAPATH = '/DATA/LSST/FITS'
    RESPATH = '../../../DATA/results'
    BASE_N = 141
    FILENAME = lambda i: '%s/v88827%03d-fz.R22.S11.fits' % (DATAPATH,
                                                            (BASE_N + i))
    ID = 'LSST'

    # ----------------------------------------------------------------------------
    # Specify parameter settings

    # General
    doshow = opts.doShow  # put 1 to show intermediate results
    backup = opts.backup  # put 1 to write intermediate results to disk
    N = opts.N  # how many frames to process
    N0 = opts.N0  # number of averaged frames for initialisation

    # OlaGPU parameters
    sf = np.array([40, 40])  # estimated size of PSF
    csf = (3, 3)  # number of kernels across x and y direction
    overlap = 0.5  # overlap of neighboring patches in percent

    # Regularization parameters for kernel estimation
    f_alpha = opts.f_alpha  # promotes smoothness
    f_beta = opts.f_beta  # Thikhonov regularization
    optiter = opts.optiter  # number of iterations for minimization
    tol = opts.tol  # tolerance for when to stop minimization
    # ============================================================================

    # Create helper functions for file handling

    # # # HACK for chunking into available GPU mem # # #
    #     - loads one 1kx1k block out of the fits image
    xOffset = 2000
    yOffset = 0
    chunkSize = 1000
    yload = lambda i: 1. * fitsTools.readFITS(
        FILENAME(i), use_mask=True, norm=True)[yOffset:yOffset + chunkSize,
                                               xOffset:xOffset + chunkSize]

    # ----------------------------------------------------------------------------
    # Some more code for backuping the results
    # ----------------------------------------------------------------------------
    # For backup purposes
    EXPPATH = '%s/%s_sf%dx%d_csf%dx%d_maxiter%d_alpha%.2f_beta%.2f' % \
          (RESPATH,ID,sf[0],sf[1],csf[0],csf[1],optiter,f_alpha,f_beta)

    xname = lambda i: '%s/x_%04d.png' % (EXPPATH, i)
    yname = lambda i: '%s/y_%04d.png' % (EXPPATH, i)
    fname = lambda i: '%s/f_%04d.png' % (EXPPATH, i)

    if os.path.exists(EXPPATH) and opts.overwrite:
        try:
            rmtree(EXPPATH)
        except:
            print "[ERROR] removing old results dir:", EXPPATH
            exit()

    elif os.path.exists(EXPPATH):
        print "[ERROR] results directory already exists, please remove or use '-o' to overwrite"
        exit()

    # Create results path if not existing
    try:
        os.makedirs(EXPPATH)
    except:
        print "[ERROR] creating results dir:", EXPPATH
        exit()

    print 'Results are saved to: \n %s \n' % EXPPATH

    # ----------------------------------------------------------------------------
    # For displaying intermediate results create target figure
    # ----------------------------------------------------------------------------
    # Create figure for displaying intermediate results
    if doshow:
        print "showing intermediate results is currently disabled.."
        #pl.figure(1)
        #pl.draw()

    # ----------------------------------------------------------------------------
    # Code for initialising the online multi-frame deconvolution
    # ----------------------------------------------------------------------------
    # Initialisation of latent image by averaging the first 20 frames
    y0 = 0.
    for i in np.arange(1, N0):
        y0 += yload(i)

    y0 /= N0
    y_gpu = cua.to_gpu(y0)

    # Pad image since we perform deconvolution with valid boundary conditions
    x_gpu = gputools.impad_gpu(y_gpu, sf - 1)

    # Create windows for OlaGPU
    sx = y0.shape + sf - 1
    sf2 = np.floor(sf / 2)
    winaux = imagetools.win2winaux(sx, csf, overlap)

    # ----------------------------------------------------------------------------
    # Loop over all frames and do online blind deconvolution
    # ----------------------------------------------------------------------------
    import time as t
    ti = t.clock()
    t1 = stopwatch.timer()
    t2 = stopwatch.timer()
    t3 = stopwatch.timer()
    t4 = stopwatch.timer()
    t4.start()
    for i in np.arange(1, N + 1):
        print 'Processing frame %d/%d \r' % (i, N)

        # Load next observed image
        t3.start()
        y = yload(i)
        print "TIMER load:", t3.elapsed()

        # Compute mask for determining saturated regions
        mask_gpu = 1. * cua.to_gpu(y < 1.)
        y_gpu = cua.to_gpu(y)

        # ------------------------------------------------------------------------
        # PSF estimation
        # ------------------------------------------------------------------------
        # Create OlaGPU instance with current estimate of latent image
        t2.start()
        X = olaGPU.OlaGPU(x_gpu, sf, 'valid', winaux=winaux)
        print "TIMER GPU: ", t2.elapsed()

        t1.start()
        # PSF estimation for given estimate of latent image and current observation
        f = X.deconv(y_gpu,
                     mode='lbfgsb',
                     alpha=f_alpha,
                     beta=f_beta,
                     maxfun=optiter,
                     verbose=10)
        print "TIMER Optimization: ", t1.elapsed()

        fs = f[0]

        # Normalize PSF kernels to sum up to one
        fs = gputools.normalize(fs)

        # ------------------------------------------------------------------------
        # Latent image estimation
        # ------------------------------------------------------------------------
        # Create OlaGPU instance with estimated PSF
        t2.start()
        F = olaGPU.OlaGPU(fs, sx, 'valid', winaux=winaux)

        # Latent image estimation by performing one gradient descent step
        # multiplicative update is used which preserves positivity
        factor_gpu = F.cnvtp(
            mask_gpu * y_gpu) / (F.cnvtp(mask_gpu * F.cnv(x_gpu)) + tol)
        gputools.cliplower_GPU(factor_gpu, tol)
        x_gpu = x_gpu * factor_gpu
        x_max = x_gpu.get()[sf[0]:-sf[0], sf[1]:-sf[1]].max()

        gputools.clipupper_GPU(x_gpu, x_max)
        print "TIMER GPU: ", t2.elapsed()

        # ------------------------------------------------------------------------
        # For backup intermediate results
        # ------------------------------------------------------------------------
        if backup or i == N:
            # Write intermediate results to disk incl. input
            y_img = y_gpu.get() * 1e5
            fitsTools.asinhScale(y_img,
                                 450,
                                 -50,
                                 minCut=0.0,
                                 maxCut=40000,
                                 fname=yname(i))

            # Crop image to input size
            xi = (x_gpu.get()[sf2[0]:-sf2[0], sf2[1]:-sf2[1]] / x_max) * 1e5

            fitsTools.fitsStats(xi)
            fitsTools.asinhScale(xi,
                                 450,
                                 -50,
                                 minCut=0.0,
                                 maxCut=40000,
                                 fname=xname(i))

            # Concatenate PSF kernels for ease of visualisation
            f = imagetools.gridF(fs, csf)
            f = f * 1e5

            fitsTools.asinhScale(f,
                                 450,
                                 -50,
                                 minCut=0.0,
                                 maxCut=40000,
                                 fname=fname(i))

        # ------------------------------------------------------------------------
        # For displaying intermediate results
        # ------------------------------------------------------------------------
        '''
        if np.mod(i,1) == 0 and doshow:
        pl.figure(1)
        pl.subplot(121)
        # what is SY?
        pl.imshow(imagetools.crop(x_gpu.get(),sy,np.ceil(sf/2)),'gray')
        pl.title('x after %d observations' % i)
        pl.subplot(122)
        pl.imshow(y_gpu.get(),'gray')
        pl.title('y(%d)' % i)
        pl.draw()
        pl.figure(2)
        pl.title('PSF(%d)' % i)
        imagetools.cellplot(fs, winaux.csf)
        tf = t.clock()
        print('Time elapsed after %d frames %.3f' % (i,(tf-ti)))
        '''
    tf = t.clock()
    print('Time elapsed for total image sequence %.3f' % (tf - ti))
    # ----------------------------------------------------------------------------
    print "TOTAL: %.3f" % (t4.elapsed())
    print "OptimizeCPUtime %.3f %.3f" % (t1.getTotal(), 100 *
                                         (t1.getTotal() / t4.getTotal()))
    print "GPUtime %.3f %.3f" % (t2.getTotal(), 100 *
                                 (t2.getTotal() / t4.getTotal()))
    print "LoadTime %.3f %.3f" % (t3.getTotal(), 100 *
                                  (t3.getTotal() / t4.getTotal()))