# ---------------------------------------------------------------------------- # Initialisation of latent image by averaging the first 20 frames y0 = 0. for i in np.arange(1, N0): y0 += yload(i) y0 /= N0 y_gpu = cua.to_gpu(y0) # Pad image since we perform deconvolution with valid boundary conditions x_gpu = gputools.impad_gpu(y_gpu, sf - 1) # Create windows for OlaGPU sx = y0.shape + sf - 1 sf2 = np.floor(sf / 2) winaux = imagetools.win2winaux(sx, csf, overlap) # ---------------------------------------------------------------------------- # Loop over all frames and do online blind deconvolution # ---------------------------------------------------------------------------- import time as t ti = t.clock() for i in np.arange(1, N + 1): print 'Processing frame %d/%d \r' % (i, N) # Load next observed image y = yload(i) # Compute mask for determining saturated regions
def galaxyConvolution(mode='same'): import pycuda.autoinit import pycuda.gpuarray as cua import numpy as np import pyfits as pf import scipy from scipy import signal from scipy import ndimage import time # Load VMDB libraries import gputools import imagetools import olaGPU as ola np.random.seed(123) #data x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy37.fits') #x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy57.fits') #x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy22.fits') x = ndimage.zoom( x, 4.0, order=0) #oversampling = 12 leads to out of memory error... x[x <= 0] = 1e-8 x /= np.max(x) x *= 4000. x = x.astype(np.float32) scipy.misc.imsave('original.jpg', np.log10(x)) #kernel kernel = pf.getdata('/Users/sammy/EUCLID/vissim-python/data/psf4x.fits') kernel /= np.sum(kernel) kernel = kernel.astype(np.float32) scipy.misc.imsave('kernel.jpg', np.log10(kernel)) print x.shape, kernel.shape x_gpu = cua.to_gpu(x) sx = x.shape #csf = (5,5) #affects needed memory csf = (2, 2) overlap = 0.2 fs = np.tile(kernel, (np.prod(csf), 1, 1)) winaux = imagetools.win2winaux(sx, csf, overlap) print "-------------------" print "Create Kernel" start = time.clock() F = ola.OlaGPU(fs, sx, mode=mode, winaux=winaux) print "Compute Convolution " yF_gpu = F.cnv(x_gpu) print "Copy to CPU " result = yF_gpu.get() print "Time elapsed: %.4f" % (time.clock() - start) #other way around #fs_gpu = cua.to_gpu(fs) #X = ola.OlaGPU(x_gpu, kernel.shape, mode=mode, winaux=winaux) #yX_gpu = X.cnv(fs_gpu) #result2 = yX_gpu.get() print "-------------------" print "SciPy FFT convolution " start = time.clock() conv = signal.fftconvolve(x, kernel, mode=mode) print "Time elapsed: %.4f" % (time.clock() - start) print "-------------------" #save images r = result.copy() r[r <= 0.] = 1e-5 c = conv.copy() c[c <= 0.] = 1e-5 scipy.misc.imsave('convolvedCUDA.jpg', np.log10(r)) scipy.misc.imsave('convolvedSciPy.jpg', np.log10(c)) print 'Shapes:', result.shape, conv.shape print 'Max:', np.max(result), np.max(conv) print 'Differences:' if 'full' in mode: print '> 1e-2?' print np.testing.assert_allclose(result[100:-100, 100:-100], conv[100:-100, 100:-100], rtol=1e-2) else: print '> 1e-4?' print np.testing.assert_allclose(result, conv, rtol=1e-4)
raise NotImplementedError('Yet to be implemented') if __name__ == "__main__": import pylab as pl import gputools import imagetools as it x = pl.imread('butcher.png') #x = np.random.rand(,1200).astype(np.float32) #x = it.rgb2gray(x) csf = (7,7) overlap = 0.5 winaux = it.win2winaux(x.shape, csf, overlap) x_gpu = cua.to_gpu(x) xs_gpu = gputools.chop_pad_GPU(x_gpu, winaux.csf, winaux.sw, winaux.nhop, dtype='complex') start = time.clock() x_gpu = gputools.ola_GPU_test(xs_gpu, winaux.csf, winaux.sw, winaux.nhop) print "Time elapsed %.6f" % (time.clock()-start) x = np.real(x_gpu.get()) #it.cellplot(xs,csf) pl.imshow(x) pl.show() #offset = (0,0)
# ---------------------------------------------------------------------------- # Initialisation of latent image by averaging the first 20 frames y0 = 0. for i in np.arange(1,N0): y0 += yload(i) y0 /= N0 y_gpu = cua.to_gpu(y0) # Pad image since we perform deconvolution with valid boundary conditions x_gpu = gputools.impad_gpu(y_gpu, sf-1) # Create windows for OlaGPU sx = y0.shape + sf - 1 sf2 = np.floor(sf/2) winaux = imagetools.win2winaux(sx, csf, overlap) # ---------------------------------------------------------------------------- # Loop over all frames and do online blind deconvolution # ---------------------------------------------------------------------------- import time as t ti = t.clock() for i in np.arange(1,N+1): print 'Processing frame %d/%d \r' % (i,N) # Load next observed image y = yload(i) # Compute mask for determining saturated regions
def simpleConvolution(mode='valid'): """ Simple convolution test with random data. Tests if the GPU convolution returns the same result as SciPy.signal.fftconvolve. This example uses single precision and an image that is about 2k x 2k and a kernel that is about 200 x 200. :param mode: the resulted convolution area (valid, same, full) :type mode: str :return: None """ import pycuda.autoinit import pycuda.gpuarray as cua import numpy as np import scipy from scipy import signal import time # Load VMDB libraries import gputools import imagetools import olaGPU as ola np.random.seed(123) #data x = np.random.random((2099, 2100)).astype( np.float32) #don't make the array too large, not enough GPU memory scipy.misc.imsave('originalSimple.jpg', np.log10(x)) #kernel kernel = np.random.random((299, 299)).astype(np.float32) kernel /= np.sum(kernel) scipy.misc.imsave('kernelSimple.jpg', np.log10(kernel)) print x.shape, kernel.shape x_gpu = cua.to_gpu(x) sx = x.shape csf = (5, 5) overlap = 0.5 fs = np.tile(kernel, (np.prod(csf), 1, 1)) winaux = imagetools.win2winaux(sx, csf, overlap) print "-------------------" print "Create CUDA Kernel" start = time.clock() F = ola.OlaGPU(fs, sx, mode=mode, winaux=winaux) print "Compute Convolution with the GPU using FFTs" yF_gpu = F.cnv(x_gpu) print "Copy results to CPU" result = yF_gpu.get() cutime = time.clock() - start print "Time elapsed: %.4f" % cutime print "-------------------" print "SciPy FFT convolution on CPU" start = time.clock() conv = signal.fftconvolve(x, kernel, mode=mode) sptime = time.clock() - start print "Time elapsed: %.4f" % sptime print "-------------------" print 'CUDA is a factor of %.2f faster' % (sptime / cutime) #save images scipy.misc.imsave('convolvedCUDASimple.jpg', np.log10(result)) scipy.misc.imsave('convolvedSciPySimple.jpg', np.log10(conv)) print '\n\n\nShapes:', result.shape, conv.shape print 'Max values:', np.max(result), np.max(conv) print '\n\nDifference:' if 'full' or 'same' in mode: print '> 1e-5?' print np.testing.assert_allclose(result[100:-100, 100:-100], conv[100:-100, 100:-100], rtol=1e-5) else: print '> 1e-6?' print np.testing.assert_allclose(result, conv, rtol=1e-6)
def process(opts): # ============================================================================ # Specify some parameter settings # ---------------------------------------------------------------------------- # Specify data path and file identifier DATAPATH = '/DATA/LSST/FITS' RESPATH = '../../../DATA/results'; BASE_N = 141 FILENAME = lambda i: '%s/v88827%03d-fz.R22.S11.fits' % (DATAPATH,(BASE_N+i)) ID = 'LSST' # ---------------------------------------------------------------------------- # Specify parameter settings # General doshow = opts.doShow # put 1 to show intermediate results backup = opts.backup # put 1 to write intermediate results to disk N = opts.N # how many frames to process N0 = opts.N0 # number of averaged frames for initialisation # OlaGPU parameters sf = np.array([40,40]) # estimated size of PSF csf =(3,3) # number of kernels across x and y direction overlap = 0.5 # overlap of neighboring patches in percent # Regularization parameters for kernel estimation f_alpha = opts.f_alpha # promotes smoothness f_beta = opts.f_beta # Thikhonov regularization optiter = opts.optiter # number of iterations for minimization tol = opts.tol # tolerance for when to stop minimization # ============================================================================ # Create helper functions for file handling # # # HACK for chunking into available GPU mem # # # # - loads one 1kx1k block out of the fits image xOffset=2000 yOffset=0 chunkSize=1000 yload = lambda i: 1. * fitsTools.readFITS(FILENAME(i), use_mask=True, norm=True)[yOffset:yOffset+chunkSize,xOffset:xOffset+chunkSize] # ---------------------------------------------------------------------------- # Some more code for backuping the results # ---------------------------------------------------------------------------- # For backup purposes EXPPATH = '%s/%s_sf%dx%d_csf%dx%d_maxiter%d_alpha%.2f_beta%.2f' % \ (RESPATH,ID,sf[0],sf[1],csf[0],csf[1],optiter,f_alpha,f_beta) xname = lambda i: '%s/x_%04d.png' % (EXPPATH,i) yname = lambda i: '%s/y_%04d.png' % (EXPPATH,i) fname = lambda i: '%s/f_%04d.png' % (EXPPATH,i) if os.path.exists(EXPPATH) and opts.overwrite: try: rmtree(EXPPATH) except: print "[ERROR] removing old results dir:",EXPPATH exit() elif os.path.exists(EXPPATH): print "[ERROR] results directory already exists, please remove or use '-o' to overwrite" exit() # Create results path if not existing try: os.makedirs(EXPPATH) except: print "[ERROR] creating results dir:",EXPPATH exit() print 'Results are saved to: \n %s \n' % EXPPATH # ---------------------------------------------------------------------------- # For displaying intermediate results create target figure # ---------------------------------------------------------------------------- # Create figure for displaying intermediate results if doshow: print "showing intermediate results is currently disabled.." #pl.figure(1) #pl.draw() # ---------------------------------------------------------------------------- # Code for initialising the online multi-frame deconvolution # ---------------------------------------------------------------------------- # Initialisation of latent image by averaging the first 20 frames y0 = 0. for i in np.arange(1,N0): y0 += yload(i) y0 /= N0 y_gpu = cua.to_gpu(y0) # Pad image since we perform deconvolution with valid boundary conditions x_gpu = gputools.impad_gpu(y_gpu, sf-1) # Create windows for OlaGPU sx = y0.shape + sf - 1 sf2 = np.floor(sf/2) winaux = imagetools.win2winaux(sx, csf, overlap) # ---------------------------------------------------------------------------- # Loop over all frames and do online blind deconvolution # ---------------------------------------------------------------------------- import time as t ti = t.clock() t1 = stopwatch.timer() t2 = stopwatch.timer() t3 = stopwatch.timer() t4 = stopwatch.timer() t4.start() for i in np.arange(1,N+1): print 'Processing frame %d/%d \r' % (i,N) # Load next observed image t3.start() y = yload(i) print "TIMER load:", t3.elapsed() # Compute mask for determining saturated regions mask_gpu = 1. * cua.to_gpu(y < 1.) y_gpu = cua.to_gpu(y) # ------------------------------------------------------------------------ # PSF estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with current estimate of latent image t2.start() X = olaGPU.OlaGPU(x_gpu,sf,'valid',winaux=winaux) print "TIMER GPU: ", t2.elapsed() t1.start() # PSF estimation for given estimate of latent image and current observation f = X.deconv(y_gpu, mode = 'lbfgsb', alpha = f_alpha, beta = f_beta, maxfun = optiter, verbose = 10) print "TIMER Optimization: ", t1.elapsed() fs = f[0] # Normalize PSF kernels to sum up to one fs = gputools.normalize(fs) # ------------------------------------------------------------------------ # Latent image estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with estimated PSF t2.start() F = olaGPU.OlaGPU(fs,sx,'valid',winaux=winaux) # Latent image estimation by performing one gradient descent step # multiplicative update is used which preserves positivity factor_gpu = F.cnvtp(mask_gpu*y_gpu)/(F.cnvtp(mask_gpu*F.cnv(x_gpu))+tol) gputools.cliplower_GPU(factor_gpu, tol) x_gpu = x_gpu * factor_gpu x_max = x_gpu.get()[sf[0]:-sf[0],sf[1]:-sf[1]].max() gputools.clipupper_GPU(x_gpu, x_max) print "TIMER GPU: ", t2.elapsed() # ------------------------------------------------------------------------ # For backup intermediate results # ------------------------------------------------------------------------ if backup or i == N: # Write intermediate results to disk incl. input y_img = y_gpu.get()*1e5 fitsTools.asinhScale(y_img, 450, -50, minCut=0.0, maxCut=40000, fname=yname(i)) # Crop image to input size xi = (x_gpu.get()[sf2[0]:-sf2[0],sf2[1]:-sf2[1]] / x_max)*1e5 fitsTools.fitsStats(xi) fitsTools.asinhScale(xi, 450, -50, minCut=0.0, maxCut=40000, fname=xname(i)) # Concatenate PSF kernels for ease of visualisation f = imagetools.gridF(fs,csf) f = f*1e5 fitsTools.asinhScale(f, 450, -50, minCut=0.0, maxCut=40000, fname=fname(i)) # ------------------------------------------------------------------------ # For displaying intermediate results # ------------------------------------------------------------------------ ''' if np.mod(i,1) == 0 and doshow: pl.figure(1) pl.subplot(121) # what is SY? pl.imshow(imagetools.crop(x_gpu.get(),sy,np.ceil(sf/2)),'gray') pl.title('x after %d observations' % i) pl.subplot(122) pl.imshow(y_gpu.get(),'gray') pl.title('y(%d)' % i) pl.draw() pl.figure(2) pl.title('PSF(%d)' % i) imagetools.cellplot(fs, winaux.csf) tf = t.clock() print('Time elapsed after %d frames %.3f' % (i,(tf-ti))) ''' tf = t.clock() print('Time elapsed for total image sequence %.3f' % (tf-ti)) # ---------------------------------------------------------------------------- print "TOTAL: %.3f" % (t4.elapsed()) print "OptimizeCPUtime %.3f %.3f" % (t1.getTotal(), 100*(t1.getTotal()/t4.getTotal())) print "GPUtime %.3f %.3f" % (t2.getTotal(), 100*(t2.getTotal()/t4.getTotal())) print "LoadTime %.3f %.3f" % (t3.getTotal(), 100*(t3.getTotal()/t4.getTotal()))
def galaxyConvolution(mode='same'): import pycuda.autoinit import pycuda.gpuarray as cua import numpy as np import pyfits as pf import scipy from scipy import signal from scipy import ndimage import time # Load VMDB libraries import gputools import imagetools import olaGPU as ola np.random.seed(123) #data x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy37.fits') #x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy57.fits') #x = pf.getdata('/Users/sammy/EUCLID/vissim-python/objects/galaxy22.fits') x = ndimage.zoom(x, 4.0, order=0) #oversampling = 12 leads to out of memory error... x[x <= 0] = 1e-8 x /= np.max(x) x *= 4000. x = x.astype(np.float32) scipy.misc.imsave('original.jpg', np.log10(x)) #kernel kernel = pf.getdata('/Users/sammy/EUCLID/vissim-python/data/psf4x.fits') kernel /= np.sum(kernel) kernel = kernel.astype(np.float32) scipy.misc.imsave('kernel.jpg', np.log10(kernel)) print x.shape, kernel.shape x_gpu = cua.to_gpu(x) sx = x.shape #csf = (5,5) #affects needed memory csf = (2,2) overlap = 0.2 fs = np.tile(kernel, (np.prod(csf), 1, 1)) winaux = imagetools.win2winaux(sx, csf, overlap) print "-------------------" print "Create Kernel" start = time.clock() F = ola.OlaGPU(fs, sx, mode=mode, winaux=winaux) print "Compute Convolution " yF_gpu = F.cnv(x_gpu) print "Copy to CPU " result = yF_gpu.get() print "Time elapsed: %.4f" % (time.clock()-start) #other way around #fs_gpu = cua.to_gpu(fs) #X = ola.OlaGPU(x_gpu, kernel.shape, mode=mode, winaux=winaux) #yX_gpu = X.cnv(fs_gpu) #result2 = yX_gpu.get() print "-------------------" print "SciPy FFT convolution " start = time.clock() conv = signal.fftconvolve(x, kernel, mode=mode) print "Time elapsed: %.4f" % (time.clock()-start) print "-------------------" #save images r = result.copy() r[r <= 0.] = 1e-5 c = conv.copy() c[c <= 0.] = 1e-5 scipy.misc.imsave('convolvedCUDA.jpg', np.log10(r)) scipy.misc.imsave('convolvedSciPy.jpg', np.log10(c)) print 'Shapes:', result.shape, conv.shape print 'Max:', np.max(result), np.max(conv) print 'Differences:' if 'full' in mode: print '> 1e-2?' print np.testing.assert_allclose(result[100:-100, 100:-100], conv[100:-100, 100:-100], rtol=1e-2) else: print '> 1e-4?' print np.testing.assert_allclose(result, conv, rtol=1e-4)
def simpleConvolution(mode='valid'): """ Simple convolution test with random data. Tests if the GPU convolution returns the same result as SciPy.signal.fftconvolve. This example uses single precision and an image that is about 2k x 2k and a kernel that is about 200 x 200. :param mode: the resulted convolution area (valid, same, full) :type mode: str :return: None """ import pycuda.autoinit import pycuda.gpuarray as cua import numpy as np import scipy from scipy import signal import time # Load VMDB libraries import gputools import imagetools import olaGPU as ola np.random.seed(123) #data x = np.random.random((2099, 2100)).astype(np.float32) #don't make the array too large, not enough GPU memory scipy.misc.imsave('originalSimple.jpg', np.log10(x)) #kernel kernel = np.random.random((299, 299)).astype(np.float32) kernel /= np.sum(kernel) scipy.misc.imsave('kernelSimple.jpg', np.log10(kernel)) print x.shape, kernel.shape x_gpu = cua.to_gpu(x) sx = x.shape csf = (5,5) overlap = 0.5 fs = np.tile(kernel, (np.prod(csf), 1, 1)) winaux = imagetools.win2winaux(sx, csf, overlap) print "-------------------" print "Create CUDA Kernel" start = time.clock() F = ola.OlaGPU(fs, sx, mode=mode, winaux=winaux) print "Compute Convolution with the GPU using FFTs" yF_gpu = F.cnv(x_gpu) print "Copy results to CPU" result = yF_gpu.get() cutime = time.clock()-start print "Time elapsed: %.4f" % cutime print "-------------------" print "SciPy FFT convolution on CPU" start = time.clock() conv = signal.fftconvolve(x, kernel, mode=mode) sptime = time.clock()-start print "Time elapsed: %.4f" % sptime print "-------------------" print 'CUDA is a factor of %.2f faster' % (sptime / cutime) #save images scipy.misc.imsave('convolvedCUDASimple.jpg', np.log10(result)) scipy.misc.imsave('convolvedSciPySimple.jpg', np.log10(conv)) print '\n\n\nShapes:', result.shape, conv.shape print 'Max values:', np.max(result), np.max(conv) print '\n\nDifference:' if 'full' or 'same' in mode: print '> 1e-5?' print np.testing.assert_allclose(result[100:-100, 100:-100], conv[100:-100, 100:-100], rtol=1e-5) else: print '> 1e-6?' print np.testing.assert_allclose(result, conv, rtol=1e-6)
def process(opts): # ============================================================================ # Specify some parameter settings # ---------------------------------------------------------------------------- # Specify data path and file identifier DATAPATH = '/DATA/LSST/FITS' RESPATH = '../../../DATA/results' BASE_N = 141 FILENAME = lambda i: '%s/v88827%03d-fz.R22.S11.fits' % (DATAPATH, (BASE_N + i)) ID = 'LSST' # ---------------------------------------------------------------------------- # Specify parameter settings # General doshow = opts.doShow # put 1 to show intermediate results backup = opts.backup # put 1 to write intermediate results to disk N = opts.N # how many frames to process N0 = opts.N0 # number of averaged frames for initialisation # OlaGPU parameters sf = np.array([40, 40]) # estimated size of PSF csf = (3, 3) # number of kernels across x and y direction overlap = 0.5 # overlap of neighboring patches in percent # Regularization parameters for kernel estimation f_alpha = opts.f_alpha # promotes smoothness f_beta = opts.f_beta # Thikhonov regularization optiter = opts.optiter # number of iterations for minimization tol = opts.tol # tolerance for when to stop minimization # ============================================================================ # Create helper functions for file handling # # # HACK for chunking into available GPU mem # # # # - loads one 1kx1k block out of the fits image xOffset = 2000 yOffset = 0 chunkSize = 1000 yload = lambda i: 1. * fitsTools.readFITS( FILENAME(i), use_mask=True, norm=True)[yOffset:yOffset + chunkSize, xOffset:xOffset + chunkSize] # ---------------------------------------------------------------------------- # Some more code for backuping the results # ---------------------------------------------------------------------------- # For backup purposes EXPPATH = '%s/%s_sf%dx%d_csf%dx%d_maxiter%d_alpha%.2f_beta%.2f' % \ (RESPATH,ID,sf[0],sf[1],csf[0],csf[1],optiter,f_alpha,f_beta) xname = lambda i: '%s/x_%04d.png' % (EXPPATH, i) yname = lambda i: '%s/y_%04d.png' % (EXPPATH, i) fname = lambda i: '%s/f_%04d.png' % (EXPPATH, i) if os.path.exists(EXPPATH) and opts.overwrite: try: rmtree(EXPPATH) except: print "[ERROR] removing old results dir:", EXPPATH exit() elif os.path.exists(EXPPATH): print "[ERROR] results directory already exists, please remove or use '-o' to overwrite" exit() # Create results path if not existing try: os.makedirs(EXPPATH) except: print "[ERROR] creating results dir:", EXPPATH exit() print 'Results are saved to: \n %s \n' % EXPPATH # ---------------------------------------------------------------------------- # For displaying intermediate results create target figure # ---------------------------------------------------------------------------- # Create figure for displaying intermediate results if doshow: print "showing intermediate results is currently disabled.." #pl.figure(1) #pl.draw() # ---------------------------------------------------------------------------- # Code for initialising the online multi-frame deconvolution # ---------------------------------------------------------------------------- # Initialisation of latent image by averaging the first 20 frames y0 = 0. for i in np.arange(1, N0): y0 += yload(i) y0 /= N0 y_gpu = cua.to_gpu(y0) # Pad image since we perform deconvolution with valid boundary conditions x_gpu = gputools.impad_gpu(y_gpu, sf - 1) # Create windows for OlaGPU sx = y0.shape + sf - 1 sf2 = np.floor(sf / 2) winaux = imagetools.win2winaux(sx, csf, overlap) # ---------------------------------------------------------------------------- # Loop over all frames and do online blind deconvolution # ---------------------------------------------------------------------------- import time as t ti = t.clock() t1 = stopwatch.timer() t2 = stopwatch.timer() t3 = stopwatch.timer() t4 = stopwatch.timer() t4.start() for i in np.arange(1, N + 1): print 'Processing frame %d/%d \r' % (i, N) # Load next observed image t3.start() y = yload(i) print "TIMER load:", t3.elapsed() # Compute mask for determining saturated regions mask_gpu = 1. * cua.to_gpu(y < 1.) y_gpu = cua.to_gpu(y) # ------------------------------------------------------------------------ # PSF estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with current estimate of latent image t2.start() X = olaGPU.OlaGPU(x_gpu, sf, 'valid', winaux=winaux) print "TIMER GPU: ", t2.elapsed() t1.start() # PSF estimation for given estimate of latent image and current observation f = X.deconv(y_gpu, mode='lbfgsb', alpha=f_alpha, beta=f_beta, maxfun=optiter, verbose=10) print "TIMER Optimization: ", t1.elapsed() fs = f[0] # Normalize PSF kernels to sum up to one fs = gputools.normalize(fs) # ------------------------------------------------------------------------ # Latent image estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with estimated PSF t2.start() F = olaGPU.OlaGPU(fs, sx, 'valid', winaux=winaux) # Latent image estimation by performing one gradient descent step # multiplicative update is used which preserves positivity factor_gpu = F.cnvtp( mask_gpu * y_gpu) / (F.cnvtp(mask_gpu * F.cnv(x_gpu)) + tol) gputools.cliplower_GPU(factor_gpu, tol) x_gpu = x_gpu * factor_gpu x_max = x_gpu.get()[sf[0]:-sf[0], sf[1]:-sf[1]].max() gputools.clipupper_GPU(x_gpu, x_max) print "TIMER GPU: ", t2.elapsed() # ------------------------------------------------------------------------ # For backup intermediate results # ------------------------------------------------------------------------ if backup or i == N: # Write intermediate results to disk incl. input y_img = y_gpu.get() * 1e5 fitsTools.asinhScale(y_img, 450, -50, minCut=0.0, maxCut=40000, fname=yname(i)) # Crop image to input size xi = (x_gpu.get()[sf2[0]:-sf2[0], sf2[1]:-sf2[1]] / x_max) * 1e5 fitsTools.fitsStats(xi) fitsTools.asinhScale(xi, 450, -50, minCut=0.0, maxCut=40000, fname=xname(i)) # Concatenate PSF kernels for ease of visualisation f = imagetools.gridF(fs, csf) f = f * 1e5 fitsTools.asinhScale(f, 450, -50, minCut=0.0, maxCut=40000, fname=fname(i)) # ------------------------------------------------------------------------ # For displaying intermediate results # ------------------------------------------------------------------------ ''' if np.mod(i,1) == 0 and doshow: pl.figure(1) pl.subplot(121) # what is SY? pl.imshow(imagetools.crop(x_gpu.get(),sy,np.ceil(sf/2)),'gray') pl.title('x after %d observations' % i) pl.subplot(122) pl.imshow(y_gpu.get(),'gray') pl.title('y(%d)' % i) pl.draw() pl.figure(2) pl.title('PSF(%d)' % i) imagetools.cellplot(fs, winaux.csf) tf = t.clock() print('Time elapsed after %d frames %.3f' % (i,(tf-ti))) ''' tf = t.clock() print('Time elapsed for total image sequence %.3f' % (tf - ti)) # ---------------------------------------------------------------------------- print "TOTAL: %.3f" % (t4.elapsed()) print "OptimizeCPUtime %.3f %.3f" % (t1.getTotal(), 100 * (t1.getTotal() / t4.getTotal())) print "GPUtime %.3f %.3f" % (t2.getTotal(), 100 * (t2.getTotal() / t4.getTotal())) print "LoadTime %.3f %.3f" % (t3.getTotal(), 100 * (t3.getTotal() / t4.getTotal()))