# ----------------------------------------------------------------------------
# Initialisation of latent image by averaging the first 20 frames
y0 = 0.
for i in np.arange(1, N0):
    y0 += yload(i)

y0 /= N0
y_gpu = cua.to_gpu(y0)

# Pad image since we perform deconvolution with valid boundary conditions
x_gpu = gputools.impad_gpu(y_gpu, sf - 1)

# Create windows for OlaGPU
sx = y0.shape + sf - 1
sf2 = np.floor(sf / 2)
winaux = imagetools.win2winaux(sx, csf, overlap)

# ----------------------------------------------------------------------------
# Loop over all frames and do online blind deconvolution
# ----------------------------------------------------------------------------
import time as t
ti = t.clock()

for i in np.arange(1, N + 1):

    print 'Processing frame %d/%d \r' % (i, N)

    # Load next observed image
    y = yload(i)

    # Compute mask for determining saturated regions
Пример #2
0
def galaxyConvolution(mode='same'):
    import pycuda.autoinit
    import pycuda.gpuarray as cua
    import numpy as np
    import pyfits as pf
    import scipy
    from scipy import signal
    from scipy import ndimage
    import time

    # Load VMDB libraries
    import gputools
    import imagetools
    import olaGPU as ola

    np.random.seed(123)

    #data
    x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy37.fits')
    #x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy57.fits')
    #x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy22.fits')
    x = ndimage.zoom(
        x, 4.0, order=0)  #oversampling = 12 leads to out of memory error...
    x[x <= 0] = 1e-8
    x /= np.max(x)
    x *= 4000.
    x = x.astype(np.float32)
    scipy.misc.imsave('original.jpg', np.log10(x))

    #kernel
    kernel = pf.getdata('/Users/sammy/EUCLID/vissim-python/data/psf4x.fits')
    kernel /= np.sum(kernel)
    kernel = kernel.astype(np.float32)
    scipy.misc.imsave('kernel.jpg', np.log10(kernel))

    print x.shape, kernel.shape

    x_gpu = cua.to_gpu(x)

    sx = x.shape
    #csf = (5,5)  #affects needed memory
    csf = (2, 2)
    overlap = 0.2

    fs = np.tile(kernel, (np.prod(csf), 1, 1))

    winaux = imagetools.win2winaux(sx, csf, overlap)

    print "-------------------"
    print "Create Kernel"
    start = time.clock()
    F = ola.OlaGPU(fs, sx, mode=mode, winaux=winaux)

    print "Compute Convolution "
    yF_gpu = F.cnv(x_gpu)
    print "Copy to CPU "
    result = yF_gpu.get()
    print "Time elapsed: %.4f" % (time.clock() - start)

    #other way around
    #fs_gpu = cua.to_gpu(fs)
    #X = ola.OlaGPU(x_gpu, kernel.shape, mode=mode, winaux=winaux)
    #yX_gpu = X.cnv(fs_gpu)
    #result2 = yX_gpu.get()

    print "-------------------"
    print "SciPy FFT convolution "
    start = time.clock()
    conv = signal.fftconvolve(x, kernel, mode=mode)
    print "Time elapsed: %.4f" % (time.clock() - start)
    print "-------------------"

    #save images
    r = result.copy()
    r[r <= 0.] = 1e-5
    c = conv.copy()
    c[c <= 0.] = 1e-5
    scipy.misc.imsave('convolvedCUDA.jpg', np.log10(r))
    scipy.misc.imsave('convolvedSciPy.jpg', np.log10(c))

    print 'Shapes:', result.shape, conv.shape
    print 'Max:', np.max(result), np.max(conv)

    print 'Differences:'
    if 'full' in mode:
        print '> 1e-2?'
        print np.testing.assert_allclose(result[100:-100, 100:-100],
                                         conv[100:-100, 100:-100],
                                         rtol=1e-2)
    else:
        print '> 1e-4?'
        print np.testing.assert_allclose(result, conv, rtol=1e-4)
Пример #3
0
  raise NotImplementedError('Yet to be implemented')
    
if __name__ == "__main__":

    import pylab as pl
    import gputools
    import imagetools as it

    x = pl.imread('butcher.png')
    #x = np.random.rand(,1200).astype(np.float32)
    #x = it.rgb2gray(x)

    csf = (7,7)
    overlap = 0.5

    winaux = it.win2winaux(x.shape, csf, overlap)

    x_gpu = cua.to_gpu(x)
    xs_gpu = gputools.chop_pad_GPU(x_gpu, winaux.csf, winaux.sw, winaux.nhop, dtype='complex')
    start = time.clock()
    x_gpu = gputools.ola_GPU_test(xs_gpu, winaux.csf, winaux.sw, winaux.nhop)
    print "Time elapsed %.6f" % (time.clock()-start)


    x = np.real(x_gpu.get())

    #it.cellplot(xs,csf)
    pl.imshow(x)
    pl.show()

    #offset = (0,0)
# ----------------------------------------------------------------------------
# Initialisation of latent image by averaging the first 20 frames
y0 = 0.
for i in np.arange(1,N0):
    y0 += yload(i)

y0 /= N0
y_gpu = cua.to_gpu(y0)

# Pad image since we perform deconvolution with valid boundary conditions
x_gpu = gputools.impad_gpu(y_gpu, sf-1)

# Create windows for OlaGPU
sx      = y0.shape + sf - 1
sf2     = np.floor(sf/2)
winaux  = imagetools.win2winaux(sx, csf, overlap)

# ----------------------------------------------------------------------------
# Loop over all frames and do online blind deconvolution
# ----------------------------------------------------------------------------
import time as t
ti = t.clock()

for i in np.arange(1,N+1):

    print 'Processing frame %d/%d \r' % (i,N)

    # Load next observed image
    y = yload(i)

    # Compute mask for determining saturated regions
Пример #5
0
def simpleConvolution(mode='valid'):
    """
    Simple convolution test with random data. Tests if the GPU convolution
    returns the same result as SciPy.signal.fftconvolve. This example
    uses single precision and an image that is about 2k x 2k and a kernel
    that is about 200 x 200.

    :param mode: the resulted convolution area (valid, same, full)
    :type mode: str

    :return: None
    """
    import pycuda.autoinit
    import pycuda.gpuarray as cua
    import numpy as np
    import scipy
    from scipy import signal
    import time

    # Load VMDB libraries
    import gputools
    import imagetools
    import olaGPU as ola

    np.random.seed(123)

    #data
    x = np.random.random((2099, 2100)).astype(
        np.float32)  #don't make the array too large, not enough GPU memory
    scipy.misc.imsave('originalSimple.jpg', np.log10(x))

    #kernel
    kernel = np.random.random((299, 299)).astype(np.float32)
    kernel /= np.sum(kernel)
    scipy.misc.imsave('kernelSimple.jpg', np.log10(kernel))

    print x.shape, kernel.shape

    x_gpu = cua.to_gpu(x)

    sx = x.shape
    csf = (5, 5)
    overlap = 0.5

    fs = np.tile(kernel, (np.prod(csf), 1, 1))

    winaux = imagetools.win2winaux(sx, csf, overlap)

    print "-------------------"
    print "Create CUDA Kernel"
    start = time.clock()
    F = ola.OlaGPU(fs, sx, mode=mode, winaux=winaux)

    print "Compute Convolution with the GPU using FFTs"
    yF_gpu = F.cnv(x_gpu)

    print "Copy results to CPU"
    result = yF_gpu.get()
    cutime = time.clock() - start

    print "Time elapsed: %.4f" % cutime
    print "-------------------"

    print "SciPy FFT convolution on CPU"
    start = time.clock()
    conv = signal.fftconvolve(x, kernel, mode=mode)
    sptime = time.clock() - start
    print "Time elapsed: %.4f" % sptime
    print "-------------------"
    print 'CUDA is a factor of %.2f faster' % (sptime / cutime)

    #save images
    scipy.misc.imsave('convolvedCUDASimple.jpg', np.log10(result))
    scipy.misc.imsave('convolvedSciPySimple.jpg', np.log10(conv))

    print '\n\n\nShapes:', result.shape, conv.shape
    print 'Max values:', np.max(result), np.max(conv)

    print '\n\nDifference:'
    if 'full' or 'same' in mode:
        print '> 1e-5?'
        print np.testing.assert_allclose(result[100:-100, 100:-100],
                                         conv[100:-100, 100:-100],
                                         rtol=1e-5)
    else:
        print '> 1e-6?'
        print np.testing.assert_allclose(result, conv, rtol=1e-6)
Пример #6
0
def process(opts):
    # ============================================================================
    # Specify some parameter settings 
    # ----------------------------------------------------------------------------
    
    # Specify data path and file identifier
    DATAPATH = '/DATA/LSST/FITS'
    RESPATH  = '../../../DATA/results';
    BASE_N = 141
    FILENAME = lambda i: '%s/v88827%03d-fz.R22.S11.fits' % (DATAPATH,(BASE_N+i))
    ID       = 'LSST'
    
    # ----------------------------------------------------------------------------
    # Specify parameter settings
    
    # General
    doshow   = opts.doShow             # put 1 to show intermediate results
    backup   = opts.backup                  # put 1 to write intermediate results to disk
    N        = opts.N                # how many frames to process
    N0       = opts.N0                # number of averaged frames for initialisation
    
    # OlaGPU parameters
    sf      = np.array([40,40])   # estimated size of PSF
    csf     =(3,3)               # number of kernels across x and y direction
    overlap = 0.5                 # overlap of neighboring patches in percent
    
    # Regularization parameters for kernel estimation
    f_alpha = opts.f_alpha                 # promotes smoothness
    f_beta  = opts.f_beta         # Thikhonov regularization
    optiter = opts.optiter        # number of iterations for minimization
    tol     = opts.tol        # tolerance for when to stop minimization
    # ============================================================================
    
    # Create helper functions for file handling
    
    # # # HACK for chunking into available GPU mem # # #
    #     - loads one 1kx1k block out of the fits image
    xOffset=2000
    yOffset=0
    chunkSize=1000
    yload = lambda i: 1. * fitsTools.readFITS(FILENAME(i), use_mask=True, norm=True)[yOffset:yOffset+chunkSize,xOffset:xOffset+chunkSize]
    
    # ----------------------------------------------------------------------------
    # Some more code for backuping the results
    # ----------------------------------------------------------------------------
    # For backup purposes
    EXPPATH = '%s/%s_sf%dx%d_csf%dx%d_maxiter%d_alpha%.2f_beta%.2f' % \
          (RESPATH,ID,sf[0],sf[1],csf[0],csf[1],optiter,f_alpha,f_beta)
          
    xname = lambda i: '%s/x_%04d.png' % (EXPPATH,i)
    yname = lambda i: '%s/y_%04d.png' % (EXPPATH,i)
    fname = lambda i: '%s/f_%04d.png' % (EXPPATH,i)
    
    
    if os.path.exists(EXPPATH) and opts.overwrite:
        try:
            rmtree(EXPPATH)
        except:
            print "[ERROR] removing old results dir:",EXPPATH
            exit()
            
    elif os.path.exists(EXPPATH):
        print "[ERROR] results directory already exists, please remove or use '-o' to overwrite"
        exit()
        
    # Create results path if not existing
    try:
        os.makedirs(EXPPATH)
    except:
        print "[ERROR] creating results dir:",EXPPATH
        exit()
    
    print 'Results are saved to: \n %s \n' % EXPPATH
    
    # ----------------------------------------------------------------------------
    # For displaying intermediate results create target figure
    # ----------------------------------------------------------------------------
    # Create figure for displaying intermediate results
    if doshow:
        print "showing intermediate results is currently disabled.."
        #pl.figure(1)
        #pl.draw()
    
    # ----------------------------------------------------------------------------
    # Code for initialising the online multi-frame deconvolution
    # ----------------------------------------------------------------------------
    # Initialisation of latent image by averaging the first 20 frames
    y0 = 0.
    for i in np.arange(1,N0):
        y0 += yload(i)
    
    y0 /= N0
    y_gpu = cua.to_gpu(y0)
    
    # Pad image since we perform deconvolution with valid boundary conditions
    x_gpu = gputools.impad_gpu(y_gpu, sf-1)
    
    # Create windows for OlaGPU
    sx      = y0.shape + sf - 1
    sf2     = np.floor(sf/2)
    winaux  = imagetools.win2winaux(sx, csf, overlap)
    
    # ----------------------------------------------------------------------------
    # Loop over all frames and do online blind deconvolution
    # ----------------------------------------------------------------------------
    import time as t
    ti = t.clock()
    t1 = stopwatch.timer()
    t2 = stopwatch.timer()
    t3 = stopwatch.timer()
    t4 = stopwatch.timer()
    t4.start()
    for i in np.arange(1,N+1):
        print 'Processing frame %d/%d \r' % (i,N)
    
        # Load next observed image
        t3.start()
        y = yload(i)
        print "TIMER load:", t3.elapsed()
        
        # Compute mask for determining saturated regions
        mask_gpu = 1. * cua.to_gpu(y < 1.)
        y_gpu    = cua.to_gpu(y)
    
        # ------------------------------------------------------------------------
        # PSF estimation
        # ------------------------------------------------------------------------
        # Create OlaGPU instance with current estimate of latent image
        t2.start()
        X = olaGPU.OlaGPU(x_gpu,sf,'valid',winaux=winaux)
        print "TIMER GPU: ", t2.elapsed()
        
        t1.start()
        # PSF estimation for given estimate of latent image and current observation
        f = X.deconv(y_gpu, mode = 'lbfgsb', alpha = f_alpha, beta = f_beta,
             maxfun = optiter, verbose = 10)
        print "TIMER Optimization: ", t1.elapsed()

        fs = f[0]
    
        # Normalize PSF kernels to sum up to one
        fs = gputools.normalize(fs)
    
        # ------------------------------------------------------------------------
        # Latent image estimation
        # ------------------------------------------------------------------------
        # Create OlaGPU instance with estimated PSF
        t2.start()
        F = olaGPU.OlaGPU(fs,sx,'valid',winaux=winaux)
    
        # Latent image estimation by performing one gradient descent step
        # multiplicative update is used which preserves positivity 
        factor_gpu = F.cnvtp(mask_gpu*y_gpu)/(F.cnvtp(mask_gpu*F.cnv(x_gpu))+tol)
        gputools.cliplower_GPU(factor_gpu, tol)
        x_gpu = x_gpu * factor_gpu
        x_max = x_gpu.get()[sf[0]:-sf[0],sf[1]:-sf[1]].max()
        
        gputools.clipupper_GPU(x_gpu, x_max)
        print "TIMER GPU: ", t2.elapsed()
        
        # ------------------------------------------------------------------------
        # For backup intermediate results
        # ------------------------------------------------------------------------
        if backup or i == N:
            # Write intermediate results to disk incl. input
            y_img = y_gpu.get()*1e5
            fitsTools.asinhScale(y_img, 450, -50, minCut=0.0, maxCut=40000, fname=yname(i))
            
            # Crop image to input size
            xi = (x_gpu.get()[sf2[0]:-sf2[0],sf2[1]:-sf2[1]] / x_max)*1e5

            fitsTools.fitsStats(xi)
            fitsTools.asinhScale(xi, 450, -50, minCut=0.0, maxCut=40000, fname=xname(i))
        
            # Concatenate PSF kernels for ease of visualisation
            f = imagetools.gridF(fs,csf)
            f = f*1e5
            
            fitsTools.asinhScale(f, 450, -50, minCut=0.0, maxCut=40000, fname=fname(i))

    
        # ------------------------------------------------------------------------
        # For displaying intermediate results
        # ------------------------------------------------------------------------
        '''
        if np.mod(i,1) == 0 and doshow:
        pl.figure(1)
        pl.subplot(121)
        # what is SY?
        pl.imshow(imagetools.crop(x_gpu.get(),sy,np.ceil(sf/2)),'gray')
        pl.title('x after %d observations' % i)
        pl.subplot(122)
        pl.imshow(y_gpu.get(),'gray')
        pl.title('y(%d)' % i)
        pl.draw()
        pl.figure(2)
        pl.title('PSF(%d)' % i)
        imagetools.cellplot(fs, winaux.csf)
        tf = t.clock()
        print('Time elapsed after %d frames %.3f' % (i,(tf-ti)))
        '''
    tf = t.clock()
    print('Time elapsed for total image sequence %.3f' % (tf-ti))
    # ----------------------------------------------------------------------------
    print "TOTAL: %.3f" % (t4.elapsed())
    print "OptimizeCPUtime %.3f %.3f" % (t1.getTotal(), 100*(t1.getTotal()/t4.getTotal()))
    print "GPUtime %.3f %.3f" % (t2.getTotal(), 100*(t2.getTotal()/t4.getTotal()))
    print "LoadTime %.3f %.3f" % (t3.getTotal(), 100*(t3.getTotal()/t4.getTotal()))
Пример #7
0
  raise NotImplementedError('Yet to be implemented')
    
if __name__ == "__main__":

    import pylab as pl
    import gputools
    import imagetools as it

    x = pl.imread('butcher.png')
    #x = np.random.rand(,1200).astype(np.float32)
    #x = it.rgb2gray(x)

    csf = (7,7)
    overlap = 0.5

    winaux = it.win2winaux(x.shape, csf, overlap)

    x_gpu = cua.to_gpu(x)
    xs_gpu = gputools.chop_pad_GPU(x_gpu, winaux.csf, winaux.sw, winaux.nhop, dtype='complex')
    start = time.clock()
    x_gpu = gputools.ola_GPU_test(xs_gpu, winaux.csf, winaux.sw, winaux.nhop)
    print "Time elapsed %.6f" % (time.clock()-start)


    x = np.real(x_gpu.get())

    #it.cellplot(xs,csf)
    pl.imshow(x)
    pl.show()

    #offset = (0,0)
Пример #8
0
def galaxyConvolution(mode='same'):
    import pycuda.autoinit
    import pycuda.gpuarray as cua
    import numpy as np
    import pyfits as pf
    import scipy
    from scipy import signal
    from scipy import ndimage
    import time

    # Load VMDB libraries
    import gputools
    import imagetools
    import olaGPU as ola

    np.random.seed(123)

    #data
    x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy37.fits')
    #x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy57.fits')
    #x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy22.fits')
    x = ndimage.zoom(x, 4.0, order=0)  #oversampling = 12 leads to out of memory error...
    x[x <= 0] = 1e-8
    x /= np.max(x)
    x *= 4000.
    x = x.astype(np.float32)
    scipy.misc.imsave('original.jpg', np.log10(x))

    #kernel
    kernel = pf.getdata('/Users/sammy/EUCLID/vissim-python/data/psf4x.fits')
    kernel /= np.sum(kernel)
    kernel = kernel.astype(np.float32)
    scipy.misc.imsave('kernel.jpg', np.log10(kernel))

    print x.shape, kernel.shape

    x_gpu = cua.to_gpu(x)

    sx = x.shape
    #csf = (5,5)  #affects needed memory
    csf = (2,2)
    overlap = 0.2

    fs = np.tile(kernel, (np.prod(csf), 1, 1))

    winaux = imagetools.win2winaux(sx, csf, overlap)

    print "-------------------"
    print "Create Kernel"
    start = time.clock()
    F = ola.OlaGPU(fs, sx, mode=mode, winaux=winaux)

    print "Compute Convolution "
    yF_gpu = F.cnv(x_gpu)
    print "Copy to CPU "
    result = yF_gpu.get()
    print "Time elapsed: %.4f" % (time.clock()-start)

    #other way around
    #fs_gpu = cua.to_gpu(fs)
    #X = ola.OlaGPU(x_gpu, kernel.shape, mode=mode, winaux=winaux)
    #yX_gpu = X.cnv(fs_gpu)
    #result2 = yX_gpu.get()

    print "-------------------"
    print "SciPy FFT convolution "
    start = time.clock()
    conv = signal.fftconvolve(x, kernel, mode=mode)
    print "Time elapsed: %.4f" % (time.clock()-start)
    print "-------------------"

    #save images
    r = result.copy()
    r[r <= 0.] = 1e-5
    c = conv.copy()
    c[c <= 0.] = 1e-5
    scipy.misc.imsave('convolvedCUDA.jpg', np.log10(r))
    scipy.misc.imsave('convolvedSciPy.jpg', np.log10(c))

    print 'Shapes:', result.shape, conv.shape
    print 'Max:', np.max(result), np.max(conv)

    print 'Differences:'
    if 'full' in mode:
        print '> 1e-2?'
        print np.testing.assert_allclose(result[100:-100, 100:-100], conv[100:-100, 100:-100], rtol=1e-2)
    else:
        print  '> 1e-4?'
        print np.testing.assert_allclose(result, conv, rtol=1e-4)
Пример #9
0
def simpleConvolution(mode='valid'):
    """
    Simple convolution test with random data. Tests if the GPU convolution
    returns the same result as SciPy.signal.fftconvolve. This example
    uses single precision and an image that is about 2k x 2k and a kernel
    that is about 200 x 200.

    :param mode: the resulted convolution area (valid, same, full)
    :type mode: str

    :return: None
    """
    import pycuda.autoinit
    import pycuda.gpuarray as cua
    import numpy as np
    import scipy
    from scipy import signal
    import time

    # Load VMDB libraries
    import gputools
    import imagetools
    import olaGPU as ola

    np.random.seed(123)

    #data
    x = np.random.random((2099, 2100)).astype(np.float32)  #don't make the array too large, not enough GPU memory
    scipy.misc.imsave('originalSimple.jpg', np.log10(x))

    #kernel
    kernel = np.random.random((299, 299)).astype(np.float32)
    kernel /= np.sum(kernel)
    scipy.misc.imsave('kernelSimple.jpg', np.log10(kernel))

    print x.shape, kernel.shape

    x_gpu = cua.to_gpu(x)

    sx = x.shape
    csf = (5,5)
    overlap = 0.5

    fs = np.tile(kernel, (np.prod(csf), 1, 1))

    winaux = imagetools.win2winaux(sx, csf, overlap)

    print "-------------------"
    print "Create CUDA Kernel"
    start = time.clock()
    F = ola.OlaGPU(fs, sx, mode=mode, winaux=winaux)

    print "Compute Convolution with the GPU using FFTs"
    yF_gpu = F.cnv(x_gpu)

    print "Copy results to CPU"
    result = yF_gpu.get()
    cutime = time.clock()-start

    print "Time elapsed: %.4f" % cutime
    print "-------------------"

    print "SciPy FFT convolution on CPU"
    start = time.clock()
    conv = signal.fftconvolve(x, kernel, mode=mode)
    sptime = time.clock()-start
    print "Time elapsed: %.4f" % sptime
    print "-------------------"
    print 'CUDA is a factor of %.2f faster' % (sptime / cutime)

    #save images
    scipy.misc.imsave('convolvedCUDASimple.jpg', np.log10(result))
    scipy.misc.imsave('convolvedSciPySimple.jpg', np.log10(conv))

    print '\n\n\nShapes:', result.shape, conv.shape
    print 'Max values:', np.max(result), np.max(conv)

    print '\n\nDifference:'
    if 'full' or 'same' in mode:
        print '> 1e-5?'
        print np.testing.assert_allclose(result[100:-100, 100:-100], conv[100:-100, 100:-100], rtol=1e-5)
    else:
        print '> 1e-6?'
        print np.testing.assert_allclose(result, conv, rtol=1e-6)
Пример #10
0
def process(opts):
    # ============================================================================
    # Specify some parameter settings
    # ----------------------------------------------------------------------------

    # Specify data path and file identifier
    DATAPATH = '/DATA/LSST/FITS'
    RESPATH = '../../../DATA/results'
    BASE_N = 141
    FILENAME = lambda i: '%s/v88827%03d-fz.R22.S11.fits' % (DATAPATH,
                                                            (BASE_N + i))
    ID = 'LSST'

    # ----------------------------------------------------------------------------
    # Specify parameter settings

    # General
    doshow = opts.doShow  # put 1 to show intermediate results
    backup = opts.backup  # put 1 to write intermediate results to disk
    N = opts.N  # how many frames to process
    N0 = opts.N0  # number of averaged frames for initialisation

    # OlaGPU parameters
    sf = np.array([40, 40])  # estimated size of PSF
    csf = (3, 3)  # number of kernels across x and y direction
    overlap = 0.5  # overlap of neighboring patches in percent

    # Regularization parameters for kernel estimation
    f_alpha = opts.f_alpha  # promotes smoothness
    f_beta = opts.f_beta  # Thikhonov regularization
    optiter = opts.optiter  # number of iterations for minimization
    tol = opts.tol  # tolerance for when to stop minimization
    # ============================================================================

    # Create helper functions for file handling

    # # # HACK for chunking into available GPU mem # # #
    #     - loads one 1kx1k block out of the fits image
    xOffset = 2000
    yOffset = 0
    chunkSize = 1000
    yload = lambda i: 1. * fitsTools.readFITS(
        FILENAME(i), use_mask=True, norm=True)[yOffset:yOffset + chunkSize,
                                               xOffset:xOffset + chunkSize]

    # ----------------------------------------------------------------------------
    # Some more code for backuping the results
    # ----------------------------------------------------------------------------
    # For backup purposes
    EXPPATH = '%s/%s_sf%dx%d_csf%dx%d_maxiter%d_alpha%.2f_beta%.2f' % \
          (RESPATH,ID,sf[0],sf[1],csf[0],csf[1],optiter,f_alpha,f_beta)

    xname = lambda i: '%s/x_%04d.png' % (EXPPATH, i)
    yname = lambda i: '%s/y_%04d.png' % (EXPPATH, i)
    fname = lambda i: '%s/f_%04d.png' % (EXPPATH, i)

    if os.path.exists(EXPPATH) and opts.overwrite:
        try:
            rmtree(EXPPATH)
        except:
            print "[ERROR] removing old results dir:", EXPPATH
            exit()

    elif os.path.exists(EXPPATH):
        print "[ERROR] results directory already exists, please remove or use '-o' to overwrite"
        exit()

    # Create results path if not existing
    try:
        os.makedirs(EXPPATH)
    except:
        print "[ERROR] creating results dir:", EXPPATH
        exit()

    print 'Results are saved to: \n %s \n' % EXPPATH

    # ----------------------------------------------------------------------------
    # For displaying intermediate results create target figure
    # ----------------------------------------------------------------------------
    # Create figure for displaying intermediate results
    if doshow:
        print "showing intermediate results is currently disabled.."
        #pl.figure(1)
        #pl.draw()

    # ----------------------------------------------------------------------------
    # Code for initialising the online multi-frame deconvolution
    # ----------------------------------------------------------------------------
    # Initialisation of latent image by averaging the first 20 frames
    y0 = 0.
    for i in np.arange(1, N0):
        y0 += yload(i)

    y0 /= N0
    y_gpu = cua.to_gpu(y0)

    # Pad image since we perform deconvolution with valid boundary conditions
    x_gpu = gputools.impad_gpu(y_gpu, sf - 1)

    # Create windows for OlaGPU
    sx = y0.shape + sf - 1
    sf2 = np.floor(sf / 2)
    winaux = imagetools.win2winaux(sx, csf, overlap)

    # ----------------------------------------------------------------------------
    # Loop over all frames and do online blind deconvolution
    # ----------------------------------------------------------------------------
    import time as t
    ti = t.clock()
    t1 = stopwatch.timer()
    t2 = stopwatch.timer()
    t3 = stopwatch.timer()
    t4 = stopwatch.timer()
    t4.start()
    for i in np.arange(1, N + 1):
        print 'Processing frame %d/%d \r' % (i, N)

        # Load next observed image
        t3.start()
        y = yload(i)
        print "TIMER load:", t3.elapsed()

        # Compute mask for determining saturated regions
        mask_gpu = 1. * cua.to_gpu(y < 1.)
        y_gpu = cua.to_gpu(y)

        # ------------------------------------------------------------------------
        # PSF estimation
        # ------------------------------------------------------------------------
        # Create OlaGPU instance with current estimate of latent image
        t2.start()
        X = olaGPU.OlaGPU(x_gpu, sf, 'valid', winaux=winaux)
        print "TIMER GPU: ", t2.elapsed()

        t1.start()
        # PSF estimation for given estimate of latent image and current observation
        f = X.deconv(y_gpu,
                     mode='lbfgsb',
                     alpha=f_alpha,
                     beta=f_beta,
                     maxfun=optiter,
                     verbose=10)
        print "TIMER Optimization: ", t1.elapsed()

        fs = f[0]

        # Normalize PSF kernels to sum up to one
        fs = gputools.normalize(fs)

        # ------------------------------------------------------------------------
        # Latent image estimation
        # ------------------------------------------------------------------------
        # Create OlaGPU instance with estimated PSF
        t2.start()
        F = olaGPU.OlaGPU(fs, sx, 'valid', winaux=winaux)

        # Latent image estimation by performing one gradient descent step
        # multiplicative update is used which preserves positivity
        factor_gpu = F.cnvtp(
            mask_gpu * y_gpu) / (F.cnvtp(mask_gpu * F.cnv(x_gpu)) + tol)
        gputools.cliplower_GPU(factor_gpu, tol)
        x_gpu = x_gpu * factor_gpu
        x_max = x_gpu.get()[sf[0]:-sf[0], sf[1]:-sf[1]].max()

        gputools.clipupper_GPU(x_gpu, x_max)
        print "TIMER GPU: ", t2.elapsed()

        # ------------------------------------------------------------------------
        # For backup intermediate results
        # ------------------------------------------------------------------------
        if backup or i == N:
            # Write intermediate results to disk incl. input
            y_img = y_gpu.get() * 1e5
            fitsTools.asinhScale(y_img,
                                 450,
                                 -50,
                                 minCut=0.0,
                                 maxCut=40000,
                                 fname=yname(i))

            # Crop image to input size
            xi = (x_gpu.get()[sf2[0]:-sf2[0], sf2[1]:-sf2[1]] / x_max) * 1e5

            fitsTools.fitsStats(xi)
            fitsTools.asinhScale(xi,
                                 450,
                                 -50,
                                 minCut=0.0,
                                 maxCut=40000,
                                 fname=xname(i))

            # Concatenate PSF kernels for ease of visualisation
            f = imagetools.gridF(fs, csf)
            f = f * 1e5

            fitsTools.asinhScale(f,
                                 450,
                                 -50,
                                 minCut=0.0,
                                 maxCut=40000,
                                 fname=fname(i))

        # ------------------------------------------------------------------------
        # For displaying intermediate results
        # ------------------------------------------------------------------------
        '''
        if np.mod(i,1) == 0 and doshow:
        pl.figure(1)
        pl.subplot(121)
        # what is SY?
        pl.imshow(imagetools.crop(x_gpu.get(),sy,np.ceil(sf/2)),'gray')
        pl.title('x after %d observations' % i)
        pl.subplot(122)
        pl.imshow(y_gpu.get(),'gray')
        pl.title('y(%d)' % i)
        pl.draw()
        pl.figure(2)
        pl.title('PSF(%d)' % i)
        imagetools.cellplot(fs, winaux.csf)
        tf = t.clock()
        print('Time elapsed after %d frames %.3f' % (i,(tf-ti)))
        '''
    tf = t.clock()
    print('Time elapsed for total image sequence %.3f' % (tf - ti))
    # ----------------------------------------------------------------------------
    print "TOTAL: %.3f" % (t4.elapsed())
    print "OptimizeCPUtime %.3f %.3f" % (t1.getTotal(), 100 *
                                         (t1.getTotal() / t4.getTotal()))
    print "GPUtime %.3f %.3f" % (t2.getTotal(), 100 *
                                 (t2.getTotal() / t4.getTotal()))
    print "LoadTime %.3f %.3f" % (t3.getTotal(), 100 *
                                  (t3.getTotal() / t4.getTotal()))