Exemplo n.º 1
0
    def run(self, x, cpu_core):
        """
        In this method, the NUFFT_hsa are created and executed on a fixed CPU core.
        """
        pid= os.getpid()
        print('pid=', pid)
        os.system("taskset -p -c %d %d" % (cpu_core, pid))
        """
        Control the CPU affinity. Otherwise the process on one core can be switched to another core.
        """

        # create NUFFT
        NUFFT = NUFFT_hsa(self.API, self.device_number,0)
        
        # plan the NUFFT
        NUFFT.plan(self.om, self.Nd, self.Kd, self.Jd)

        # send the image to device
        gx = NUFFT.to_device(x)
        
        # carry out 10000 forward transform
        for pp in range(0, 100):
            gy = NUFFT.forward(gx)

        # return the object
        return gy.get()
Exemplo n.º 2
0
def test_cuda():

    import numpy
    import matplotlib.pyplot

    # load example image
    import pkg_resources

    ## Define the source of data
    DATA_PATH = pkg_resources.resource_filename('pynufft', 'src/data/')
    #     PHANTOM_FILE = pkg_resources.resource_filename('pynufft', 'data/phantom_256_256.txt')
    import scipy

    image = scipy.misc.ascent()
    image = scipy.misc.imresize(image, (256, 256))
    image = image.astype(numpy.float) / numpy.max(image[...])

    Nd = (256, 256)  # image space size
    Kd = (512, 512)  # k-space size
    Jd = (6, 6)  # interpolation size

    # load k-space points as M * 2 array
    om = numpy.load(DATA_PATH + 'om2D.npz')['arr_0']

    # Show the shape of om
    print('the shape of om = ', om.shape)

    # initiating NUFFT_cpu object
    nfft = NUFFT_cpu()  # CPU NUFFT class

    # Plan the nfft object
    nfft.plan(om, Nd, Kd, Jd)

    # initiating NUFFT_hsa object
    NufftObj = NUFFT_hsa('cuda', 0, 0)

    # Plan the NufftObj (similar to NUFFT_cpu)
    NufftObj.plan(om, Nd, Kd, Jd)

    import time
    t0 = time.time()
    for pp in range(0, 10):

        y = nfft.forward(image)

    t_cpu = (time.time() - t0) / 10.0

    ## Moving image to gpu
    ## gx is an gpu array, dtype = complex64
    gx = NufftObj.to_device(image)

    t0 = time.time()
    for pp in range(0, 100):
        gy = NufftObj.forward(gx)
    t_cu = (time.time() - t0) / 100

    print('t_cpu = ', t_cpu)
    print('t_cuda =, ', t_cu)

    print('gy close? = ',
          numpy.allclose(y, gy.get(), atol=numpy.linalg.norm(y) * 1e-3))
    print("acceleration=", t_cpu / t_cu)
    maxiter = 100
    import time
    t0 = time.time()
    x_cpu_cg = nfft.solve(y, 'cg', maxiter=maxiter)
    #     x2 =  nfft.solve(y2, 'L1TVLAD',maxiter=maxiter, rho = 2)
    t1 = time.time() - t0
    #     gy=NufftObj.thr.copy_array(NufftObj.thr.to_device(y2))

    t0 = time.time()
    x_cuda_cg = NufftObj.solve(gy, 'cg', maxiter=maxiter)
    #     x = NufftObj.solve(gy,'L1TVLAD', maxiter=maxiter, rho=2)

    t2 = time.time() - t0
    print(t1, t2)
    print('acceleration of cg=', t1 / t2)

    t0 = time.time()
    x_cpu_TV = nfft.solve(y, 'L1TVOLS', maxiter=maxiter, rho=2)
    t1 = time.time() - t0

    t0 = time.time()

    x_cuda_TV = NufftObj.solve(gy, 'L1TVOLS', maxiter=maxiter, rho=2)

    t2 = time.time() - t0
    print(t1, t2)
    print('acceleration of TV=', t1 / t2)

    matplotlib.pyplot.subplot(2, 2, 1)
    matplotlib.pyplot.imshow(x_cpu_cg.real, cmap=matplotlib.cm.gray)
    matplotlib.pyplot.title('CG_cpu')
    matplotlib.pyplot.subplot(2, 2, 2)
    matplotlib.pyplot.imshow(x_cuda_cg.get().real, cmap=matplotlib.cm.gray)
    matplotlib.pyplot.title('CG_cuda')
    matplotlib.pyplot.subplot(2, 2, 3)
    matplotlib.pyplot.imshow(x_cpu_TV.real, cmap=matplotlib.cm.gray)
    matplotlib.pyplot.title('TV_cpu')
    matplotlib.pyplot.subplot(2, 2, 4)
    matplotlib.pyplot.imshow(x_cuda_TV.get().real, cmap=matplotlib.cm.gray)
    matplotlib.pyplot.title('TV_cuda')
    matplotlib.pyplot.show()

    NufftObj.release()
    del NufftObj
Exemplo n.º 3
0
    return np.c_[dim1, dim2, dim3]

#samples = convert_mask_to_locations_3D(np.ones(Il.shape[0:3]))
#samples = 2 * np.pi * samples

samples = np.random.randn(2085640,3)*3.1415

# Create a NUFFT object 
nufftObj = NUFFT_hsa(API='ocl',
                     platform_number=1,
                     device_number=0,
                     verbosity=0)
nufftObj.plan(om=samples,
              Nd=(128,128,128),
              Kd=tuple([256, 256, 256]),
              Jd=tuple([5, 5, 5]),
              batch=4,
              ft_axes=(0,1,2),
              radix=1)

# Casting the dtype to be complex64
dtype = np.complex64

# Computing the forward pass of the NUFFT

# JML: not needed in the new version
# nufftObj.x_Nd = nufftObj.thr.to_device(Il[:3].astype(dtype))
# gx = nufftObj.thr.copy_array(nufftObj.x_Nd)

#x = numpy.einsum('cxyz -> xyzc', Il[0:3]).copy() # coil must be the last dimension; Assume it is a C-order array
Exemplo n.º 4
0
print('setting image dimension Nd...', Nd)
print('setting spectrum dimension Kd...', Kd)
print('setting interpolation size Jd...', Jd)

print('Fourier transform...')
time_pre = time.clock()
# Preprocessing NUFFT
if (gpu == True):
    time_1 = time.clock()
    NufftObj = NUFFT_hsa()
    time_2 = time.clock()
    # mem_usage =  memory_usage((NufftObj.plan,(om, Nd, Kd, Jd)))
    # print(mem_usage)

    NufftObj.plan(om, Nd, Kd, Jd)
    time_3 = time.clock()
    # NufftObj.offload('cuda')  # for GPU computation
    NufftObj.offload('ocl')  # for multi-CPU computation
    time_4 = time.clock()
    dtype = np.complex64
    time_5 = time.clock()

    print("send image to device")
    NufftObj.x_Nd = NufftObj.thr.to_device(image.astype(dtype))
    print("copy image to gx")
    time_6 = time.clock()
    gx = NufftObj.thr.copy_array(NufftObj.x_Nd)
    time_7 = time.clock()
    print('total:', time_7 - time_1, '/Decl obj: ', time_2 - time_1, '/plan: ', \
    time_3 - time_2, '/offload: ', time_4 - time_3, '/to_device: ', time_6 - time_5, '\copy_array: ', time_7 - time_6)
Exemplo n.º 5
0
import numpy
from pynufft import NUFFT_hsa
import scipy.misc
import matplotlib

Nd = (256,256)
Kd = (512,512)
Jd = (6,6)
om = numpy.random.randn(65536, 2) 
x = scipy.misc.imresize(scipy.misc.ascent(), Nd)
om1 = om[om[:,0]>0, :]
om2 = om[om[:,0]<=0, :]



NufftObj1 = NUFFT_hsa('ocl')
NufftObj1.plan(om1, Nd, Kd, Jd)

NufftObj2 = NUFFT_hsa('cuda')
NufftObj2.plan(om2, Nd, Kd, Jd)

y1 = NufftObj1.forward(x)
y2 = NufftObj2.forward(x)



Exemplo n.º 6
0
def test_mCoil(sense_number):
    image = scipy.misc.ascent()
    Nd = (64, 64, 64)  # time grid, tuple
    #     image = scipy.misc.imresize(image, Nd)*(1.0 + 0.0j)
    image = numpy.random.randn(64, 64, 64) * (1.0 + 0.0j)

    Kd = (128, 128, 128)  # frequency grid, tuple
    Jd = (6, 6, 6)  # interpolator
    #     om=       numpy.load(DATA_PATH+'om3D.npz')['arr_0']
    # om = numpy.random.randn(10000,3)*2
    # om = numpy.load('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/')['arr_0']
    #     om = scipy.io.loadmat('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/sparkling/samples_sparkling_x8_64x3072.mat')['samples_sparkling']
    # om = scipy.io.loadmat('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/samples_radial_x8_64x3072.mat')['samples_radial']
    #     om = om/numpy.max(om.real.ravel()) * numpy.pi
    om = numpy.random.randn(int((128**3) / 32), 3) * 1.5
    print('om.shape, ', om.shape)
    #     sense_number = 16
    #     sense = numpy.ones(Nd + (sense_number,), dtype=numpy.complex64)
    m = om.shape[0]
    print(om.shape)
    from pynufft import NUFFT_cpu, NUFFT_hsa, NUFFT_hsa_legacy
    # from pynufft import NUFFT_memsave
    NufftObj_cpu = NUFFT_cpu()
    api = 'ocl'
    proc = 0
    NufftObj_radix1 = NUFFT_hsa(api, proc, 0)
    NufftObj_radix2 = NUFFT_hsa(api, proc, 0)
    NufftObj_radix3 = NUFFT_hsa(api, proc, 0)

    import time
    #     t0=time.time()
    NufftObj_cpu.plan(om, Nd, Kd, Jd, batch=sense_number)
    #     t1 = time.time()

    #     t12 = time.time()

    #     t2 = time.time()

    #     tc = time.time()
    #     proc = 0 # GPU
    #     proc = 1 # gpu
    #     NufftObj_radix1.offload(API = 'ocl',   platform_number = proc, device_number = 0)
    #     t22 = time.time()
    #     NufftObj_radix2.offload(API = 'ocl',   platform_number = proc, device_number = 0)
    # NufftObj_radix2.offload(API = 'cuda',   platform_number = 0, device_number = 0)
    #     t3 = time.time()
    #     NufftObj_radix3.offload(API = 'ocl',   platform_number = proc, device_number = 0)
    #     tp = time.time()
    #     if proc is 0:
    #         print('CPU')
    #     else:
    #         print('GPU')
    #     print('Number of samples = ', om.shape[0])
    #     print('planning time of CPU = ', t1 - t0)
    #     print('planning time of HSA = ', t12 - t1)
    #     print('planning time of MEM = ', t2 - t12)
    #     print('planning time of mCoil = ', tc - t2)

    #     print('loading time of HSA = ', t22 - tc)
    #     print('loading time of MEM = ', t3 - t22)
    #     print('loading time of mCoil = ', tp - t3)

    maxiter = 1
    tcpu_forward, tcpu_adjoint, ycpu, xcpu = benchmark(NufftObj_cpu, image,
                                                       maxiter)
    print('CPU', int(m), tcpu_forward, tcpu_adjoint)

    maxiter = 20

    NufftObj_radix1.plan(om, Nd, Kd, Jd, batch=sense_number, radix=1)
    gx_hsa = NufftObj_radix1.thr.to_device(image.astype(numpy.complex64))
    #     gx_hsa = NufftObj_radix1.s2x(gx_hsa0)
    thsa_forward, thsa_adjoint, yradix1, xradix1 = benchmark(
        NufftObj_radix1, gx_hsa, maxiter)
    print(
        'radix-1',
        int(m),
        thsa_forward,
        thsa_adjoint,
    )  #numpy.linalg.norm(yradix1.get() - ycpu)/  numpy.linalg.norm( ycpu))
    #     for ss in range(0, sense_number):
    erry = numpy.linalg.norm(yradix1.get() - ycpu) / numpy.linalg.norm(ycpu)
    errx = numpy.linalg.norm(xradix1.get() - xcpu) / numpy.linalg.norm(xcpu)
    if erry > 1e-6 or errx > 1e-6:
        print("degraded accuracy:", sense_number, erry, errx)
    else:
        print("Pass test for coil: ", sense_number, erry, errx)
        print("Pass test for coil: ", sense_number, erry, errx)
    NufftObj_radix1.release()

    NufftObj_radix2.plan(om, Nd, Kd, Jd, batch=sense_number, radix=2)
    gx_memsave = NufftObj_radix2.thr.to_device(image.astype(numpy.complex64))
    #     gx_memsave = NufftObj_radix2.s2x(gx_memsave0)
    tmem_forward, tmem_adjoint, yradix2, xradix2 = benchmark(
        NufftObj_radix2, gx_memsave, maxiter)  #, sense_number)
    print('radix-2', int(m), tmem_forward, tmem_adjoint)
    #     for ss in range(0, sense_number):
    erry = numpy.linalg.norm(yradix2.get() - ycpu) / numpy.linalg.norm(ycpu)
    errx = numpy.linalg.norm(xradix2.get() - xcpu) / numpy.linalg.norm(xcpu)
    if erry > 1e-6 or errx > 1e-6:
        print("degraded accuracy:", sense_number, erry, errx)
    else:
        print("Pass test for coil: ", sense_number, erry, errx)
        print("Pass test for coil: ", sense_number, erry, errx)

    NufftObj_radix2.release()

    NufftObj_radix3.plan(om, Nd, Kd, Jd, batch=sense_number, radix=3)
    gx_mCoil = NufftObj_radix3.thr.to_device(image.astype(numpy.complex64))
    #     gx_mCoil = NufftObj_radix3.s2x(gx_mCoil0)
    tmCoil_forward, tmCoil_adjoint, yradix3, xradix3 = benchmark(
        NufftObj_radix3, gx_mCoil, maxiter)
    print('radix-3', int(m), tmCoil_forward, tmCoil_adjoint)

    #     for ss in range(0, sense_number):
    erry = numpy.linalg.norm(yradix3.get() - ycpu) / numpy.linalg.norm(ycpu)
    errx = numpy.linalg.norm(xradix3.get() - xcpu) / numpy.linalg.norm(xcpu)
    if erry > 1e-6 or errx > 1e-6:
        print("degraded accuracy:", sense_number, erry, errx)
    else:
        print("Pass test for coil: ", sense_number, erry, errx)
        print("Pass test for coil: ", sense_number, erry, errx)


#         print("Pass test for coil: ", ss)

    NufftObj_radix3.release()

    del NufftObj_radix2, NufftObj_radix1, NufftObj_radix3, NufftObj_cpu
    return tcpu_forward, tcpu_adjoint, thsa_forward, thsa_adjoint, tmem_forward, tmem_adjoint, tmCoil_forward, tmCoil_adjoint
Exemplo n.º 7
0
class NUFFT(Singleton):
    """  GPU implementation of N-D non uniform Fast Fourrier Transform class.

    Attributes
    ----------
    samples: np.ndarray
        the mask samples in the Fourier domain.
    shape: tuple of int
        shape of the image (necessarly a square/cubic matrix).
    nufftObj: The pynufft object
        depending on the required computational platform
    platform: string, 'opencl' or 'cuda'
        string indicating which hardware platform will be used to compute the
        NUFFT
    Kd: int or tuple
        int or tuple indicating the size of the frequency grid, for regridding.
        if int, will be evaluated to (Kd,)*nb_dim of the image
    Jd: int or tuple
        Size of the interpolator kernel. If int, will be evaluated
        to (Jd,)*dims image
    n_coils: int default 1
            Number of coils used to acquire the signal in case of multiarray
            receiver coils acquisition. If n_coils > 1, please organize data as
            n_coils X data_per_coil
    """
    numOfInstances = 0

    def __init__(self,
                 samples,
                 shape,
                 platform='cuda',
                 Kd=None,
                 Jd=None,
                 n_coils=1,
                 verbosity=0):
        """ Initilize the 'NUFFT' class.

        Parameters
        ----------
        samples: np.ndarray
            the mask samples in the Fourier domain.
        shape: tuple of int
            shape of the image (necessarly a square/cubic matrix).
        platform: string, 'cpu', 'opencl' or 'cuda'
            string indicating which hardware platform will be used to
            compute the NUFFT
        Kd: int or tuple
            int or tuple indicating the size of the frequency grid,
            for regridding. If int, will be evaluated
            to (Kd,)*nb_dim of the image
        Jd: int or tuple
            Size of the interpolator kernel. If int, will be evaluated
            to (Jd,)*dims image
        n_coils: int
            Number of coils used to acquire the signal in case of multiarray
            receiver coils acquisition
        """
        if (n_coils < 1) or (type(n_coils) is not int):
            raise ValueError('The number of coils should be an integer >= 1')
        if not pynufft_available:
            raise ValueError('PyNUFFT Package is not installed, please '
                             'consider using `gpuNUFFT` or install the '
                             'PyNUFFT package')
        self.nb_coils = n_coils
        self.shape = shape
        self.platform = platform
        self.samples = samples * (2 * np.pi)  # Pynufft use samples in
        # [-pi, pi[ instead of [-0.5, 0.5[
        self.dim = samples.shape[1]  # number of dimensions of the image

        if type(Kd) == int:
            self.Kd = (Kd, ) * self.dim
        elif type(Kd) == tuple:
            self.Kd = Kd
        elif Kd is None:
            # Preferential option
            self.Kd = tuple([2 * ix for ix in shape])

        if type(Jd) == int:
            self.Jd = (Jd, ) * self.dim
        elif type(Jd) == tuple:
            self.Jd = Jd
        elif Jd is None:
            # Preferential option
            self.Jd = (5, ) * self.dim

        for (i, s) in enumerate(shape):
            assert (self.shape[i] <= self.Kd[i]), 'size of frequency grid' + \
                                                  'must be greater or equal ' \
                                                  'than the image size'
        if verbosity > 0:
            print('Creating the NUFFT object...')
        if self.platform == 'opencl':
            warn('Attemping to use OpenCL plateform. Make sure to '
                 'have  all the dependecies installed')
            Singleton.__init__(self)
            if self.getNumInstances() > 1:
                warn('You have created more than one NUFFT object. '
                     'This could cause memory leaks')
            self.nufftObj = NUFFT_hsa(API='ocl',
                                      platform_number=None,
                                      device_number=None,
                                      verbosity=verbosity)

            self.nufftObj.plan(
                om=self.samples,
                Nd=self.shape,
                Kd=self.Kd,
                Jd=self.Jd,
                batch=1,  # TODO self.nb_coils,
                ft_axes=tuple(range(samples.shape[1])),
                radix=None)

        elif self.platform == 'cuda':
            warn('Attemping to use Cuda plateform. Make sure to '
                 'have  all the dependecies installed and '
                 'to create only one instance of NUFFT GPU')
            Singleton.__init__(self)
            if self.getNumInstances() > 1:
                warn('You have created more than one NUFFT object. '
                     'This could cause memory leaks')
            self.nufftObj = NUFFT_hsa(API='cuda',
                                      platform_number=None,
                                      device_number=None,
                                      verbosity=verbosity)

            self.nufftObj.plan(
                om=self.samples,
                Nd=self.shape,
                Kd=self.Kd,
                Jd=self.Jd,
                batch=1,  # TODO self.nb_coils,
                ft_axes=tuple(range(samples.shape[1])),
                radix=None)

        else:
            raise ValueError('Wrong type of platform. Platform must be'
                             '\'opencl\' or \'cuda\'')

    def __del__(self):
        # This is an important desctructor to ensure that the device memory
        # is freed
        # TODO this is still not freeing the memory right on device.
        # Mostly issue with reikna library.
        # Refer : https://github.com/fjarri/reikna/issues/53
        if self.platform == 'opencl' or self.platform == 'cuda':
            self.nufftObj.release()

    def op(self, img):
        """ This method calculates the masked non-cartesian Fourier transform
        of a 3-D image.

        Parameters
        ----------
        img: np.ndarray
            input 3D array with the same shape as shape.

        Returns
        -------
        x: np.ndarray
            masked Fourier transform of the input image.
        """
        if self.nb_coils == 1:
            dtype = np.complex64
            # Send data to the mCPU/GPU platform
            self.nufftObj.x_Nd = self.nufftObj.thr.to_device(img.astype(dtype))
            gx = self.nufftObj.thr.copy_array(self.nufftObj.x_Nd)
            # Forward operator of the NUFFT
            gy = self.nufftObj.forward(gx)
            y = np.squeeze(gy.get())
        else:
            dtype = np.complex64
            # Send data to the mCPU/GPU platform
            y = []
            for ch in range(self.nb_coils):
                self.nufftObj.x_Nd = self.nufftObj.thr.to_device(
                    np.copy(img[ch]).astype(dtype))
                gx = self.nufftObj.thr.copy_array(self.nufftObj.x_Nd)
                # Forward operator of the NUFFT
                gy = self.nufftObj.forward(gx)
                y.append(np.squeeze(gy.get()))
            y = np.asarray(y)
        return y * 1.0 / np.sqrt(np.prod(self.Kd))

    def adj_op(self, x):
        """ This method calculates inverse masked non-uniform Fourier
        transform of a 1-D coefficients array.

        Parameters
        ----------
        x: np.ndarray
            masked non-uniform Fourier transform 1D data.

        Returns
        -------
        img: np.ndarray
            inverse 3D discrete Fourier transform of the input coefficients.
        """
        if self.nb_coils == 1:
            dtype = np.complex64
            cuda_array = self.nufftObj.thr.to_device(x.astype(dtype))
            gx = self.nufftObj.adjoint(cuda_array)
            img = np.squeeze(gx.get())
        else:
            dtype = np.complex64
            img = []
            for ch in range(self.nb_coils):
                cuda_array = self.nufftObj.thr.to_device(
                    np.copy(x[ch]).astype(dtype))
                gx = self.nufftObj.adjoint(cuda_array)
                img.append(gx.get())
            img = np.asarray(np.squeeze(img))
        return img * np.sqrt(np.prod(self.Kd))
Exemplo n.º 8
0
    #     om = numpy.load('/home/sram/UCL/DATA/G/3D_Angio/greg_3D.npz')['arr_0'][0:int(m), :]
    print(om.shape)
    from pynufft import NUFFT_cpu, NUFFT_hsa  #, NUFFT_memsave
    # from pynufft import NUFFT_memsave
    NufftObj_cpu = NUFFT_cpu()
    #     NufftObj_hsa = NUFFT_hsa()
    NufftObj_hsa = NUFFT_hsa('cuda', 0, 0)

    import time
    t0 = time.time()
    NufftObj_cpu.plan(om, Nd, Kd, Jd)
    t1 = time.time()
    #     NufftObj_hsa.plan(om, Nd, Kd, Jd)
    t12 = time.time()
    RADIX = 1
    NufftObj_hsa.plan(om, Nd, Kd, Jd, radix=RADIX)
    t2 = time.time()
    # proc = 0 # GPU
    #     proc = 1 # gpu
    #     NufftObj_hsa.offload(API = 'ocl',   platform_number = proc, device_number = 0)
    t22 = time.time()
    #     NufftObj_memsave.offload(API = 'ocl',   platform_number = proc, device_number = 0)
    # NufftObj_memsave.offload(API = 'cuda',   platform_number = 0, device_number = 0)
    t3 = time.time()
    #     if proc is 0:
    #         print('CPU')
    #     else:
    #         print('GPU')
    print('Number of samples = ', om.shape[0])
    #     print('planning time of CPU = ', t1 - t0)
    #     print('planning time of HSA = ', t12 - t1)
Exemplo n.º 9
0
def test_opencl_multicoils():

    import numpy
    import matplotlib.pyplot

    # load example image
    import pkg_resources

    ## Define the source of data
    DATA_PATH = pkg_resources.resource_filename('pynufft', 'src/data/')
    #     PHANTOM_FILE = pkg_resources.resource_filename('pynufft', 'data/phantom_256_256.txt')
    import scipy

    image = scipy.misc.ascent()[::2, ::2]
    image = image.astype(numpy.float) / numpy.max(image[...])

    Nd = (256, 256)  # image space size
    Kd = (512, 512)  # k-space size
    Jd = (6, 6)  # interpolation size

    # load k-space points as M * 2 array
    om = numpy.load(DATA_PATH + 'om2D.npz')['arr_0']

    # Show the shape of om
    print('the shape of om = ', om.shape)

    batch = 8

    # initiating NUFFT_cpu object
    nfft = NUFFT_cpu()  # CPU NUFFT class

    # Plan the nfft object
    nfft.plan(om, Nd, Kd, Jd, batch=batch)

    # initiating NUFFT_hsa object
    try:
        NufftObj = NUFFT_hsa('cuda', 0, 0)
    except:
        try:
            NufftObj = NUFFT_hsa('ocl', 1, 0)
        except:
            NufftObj = NUFFT_hsa('ocl', 0, 0)

    # Plan the NufftObj (similar to NUFFT_cpu)
    NufftObj.plan(om, Nd, Kd, Jd, batch=batch, radix=2)
    coil_sense = numpy.ones(Nd + (batch, ), dtype=numpy.complex64)
    for cc in range(0, batch, 2):
        coil_sense[int(256 / batch) * cc:int(256 / batch) * (cc + 1), :,
                   cc].real *= 0.1
        coil_sense[:, int(256 / batch) * cc:int(256 / batch) * (cc + 1),
                   cc].imag *= -0.1

    NufftObj.set_sense(coil_sense)
    nfft.set_sense(coil_sense)
    y = nfft.forward_one2many(image)
    import time
    t0 = time.time()
    for pp in range(0, 2):

        xx = nfft.adjoint_many2one(y)

    t_cpu = (time.time() - t0) / 2

    ## Moving image to gpu
    ## gx is an gpu array, dtype = complex64
    gx = NufftObj.to_device(image)

    gy = NufftObj.forward_one2many(gx)

    t0 = time.time()
    for pp in range(0, 10):

        gxx = NufftObj.adjoint_many2one(gy)
    t_cu = (time.time() - t0) / 10
    print(y.shape, gy.get().shape)
    print('t_cpu = ', t_cpu)
    print('t_cuda =, ', t_cu)

    print('gy close? = ',
          numpy.allclose(y, gy.get(), atol=numpy.linalg.norm(y) * 1e-6))
    print('gy error = ',
          numpy.linalg.norm(y - gy.get()) / numpy.linalg.norm(y))
    print('gxx close? = ',
          numpy.allclose(xx, gxx.get(), atol=numpy.linalg.norm(xx) * 1e-6))
    print('gxx error = ',
          numpy.linalg.norm(xx - gxx.get()) / numpy.linalg.norm(xx))
    #     for bb in range(0, batch):
    matplotlib.pyplot.subplot(1, 2, 1)
    matplotlib.pyplot.imshow(xx[...].real, cmap=matplotlib.cm.gray)
    matplotlib.pyplot.title('Adjoint_cpu_coil')
    matplotlib.pyplot.subplot(1, 2, 2)
    matplotlib.pyplot.imshow(gxx.get()[...].real, cmap=matplotlib.cm.gray)
    matplotlib.pyplot.title('Adjoint_hsa_coil')
    #         matplotlib.pyplot.subplot(2, 2, 3)
    #         matplotlib.pyplot.imshow( x_cpu_TV.real, cmap= matplotlib.cm.gray)
    #         matplotlib.pyplot.title('TV_cpu')#     x_cuda_TV = NufftObj.solve(gy,'L1TVOLS', maxiter=maxiter, rho=2)
    #         matplotlib.pyplot.subplot(2, 2, 4)
    #         matplotlib.pyplot.imshow(x_cuda_TV.get().real, cmap= matplotlib.cm.gray)
    #         matplotlib.pyplot.title('TV_cuda')
    matplotlib.pyplot.show(block=False)
    matplotlib.pyplot.pause(1)
    matplotlib.pyplot.close()

    print("acceleration=", t_cpu / t_cu)
    maxiter = 100
    import time
    t0 = time.time()
    x_cpu_cg = nfft.solve(y, 'cg', maxiter=maxiter)
    #     x2 =  nfft.solve(y2, 'L1TVLAD',maxiter=maxiter, rho = 2)
    t1 = time.time() - t0
    #     gy=NufftObj.thr.copy_array(NufftObj.thr.to_device(y2))

    t0 = time.time()
    x_cuda_cg = NufftObj.solve(gy, 'cg', maxiter=maxiter)
    #     x = NufftObj.solve(gy,'L1TVLAD', maxiter=maxiter, rho=2)
    print('shape of cg = ', x_cuda_cg.get().shape, x_cpu_cg.shape)
    t2 = time.time() - t0
    print(t1, t2)
    print('acceleration of cg=', t1 / t2)

    t0 = time.time()
    #     x_cpu_TV =  nfft.solve(y, 'L1TVOLS',maxiter=maxiter, rho = 2)
    t1 = time.time() - t0

    t0 = time.time()

    #     x_cuda_TV = NufftObj.solve(gy,'L1TVOLS', maxiter=maxiter, rho=2)

    t2 = time.time() - t0
    print(t1, t2)
    #     print('acceleration of TV=', t1/t2 )

    #     try:
    for bb in range(0, batch):
        matplotlib.pyplot.subplot(2, batch, 1 + bb)
        matplotlib.pyplot.imshow(x_cpu_cg[..., bb].real,
                                 cmap=matplotlib.cm.gray)
        matplotlib.pyplot.title('CG_cpu_coil_' + str(bb))
        matplotlib.pyplot.subplot(2, batch, 1 + batch + bb)
        matplotlib.pyplot.imshow(x_cuda_cg.get()[..., bb].real,
                                 cmap=matplotlib.cm.gray)
        matplotlib.pyplot.title('CG_hsa_coil_' + str(bb))


#         matplotlib.pyplot.subplot(2, 2, 3)
#         matplotlib.pyplot.imshow( x_cpu_TV.real, cmap= matplotlib.cm.gray)
#         matplotlib.pyplot.title('TV_cpu')#     x_cuda_TV = NufftObj.solve(gy,'L1TVOLS', maxiter=maxiter, rho=2)
#         matplotlib.pyplot.subplot(2, 2, 4)
#         matplotlib.pyplot.imshow(x_cuda_TV.get().real, cmap= matplotlib.cm.gray)
#         matplotlib.pyplot.title('TV_cuda')
    matplotlib.pyplot.show()
    #     except:
    #         print('no matplotlib')

    NufftObj.release()
    del NufftObj
Exemplo n.º 10
0
def test_init():
    
#     cm = matplotlib.cm.gray
    # load example image
    import pkg_resources
    
    DATA_PATH = pkg_resources.resource_filename('pynufft', 'src/data/')
#     PHANTOM_FILE = pkg_resources.resource_filename('pynufft', 'data/phantom_256_256.txt')
    import numpy
    
#     import matplotlib.pyplot
    
    import scipy

    image = scipy.misc.ascent()[::2,::2]
    image=image.astype(numpy.float)/numpy.max(image[...])

    Nd = (256, 256)  # image space size
    Kd = (512, 512)  # k-space size
    Jd = (6,6)  # interpolation size

    # load k-space points
    om = numpy.load(DATA_PATH+'om2D.npz')['arr_0']

    nfft = NUFFT_cpu()  # CPU
    
    nfft.plan(om, Nd, Kd, Jd)
    try:
        NufftObj = NUFFT_hsa('cuda',0,0)
    except:
        NufftObj = NUFFT_hsa('ocl',0,0)
#     NufftObj2 = NUFFT_hsa('cuda',0,0)
    NufftObj.debug = 1
    NufftObj.plan(om, Nd, Kd, Jd, radix=2)
#     NufftObj2.plan(om, Nd, Kd, Jd)
    
#     NufftObj.offload(API = 'cuda',   platform_number = 0, device_number = 0)
#     NufftObj2.offload(API = 'cuda',   platform_number = 0, device_number = 0)
#     NufftObj2.offload('cuda')
#     NufftObj.offload(API = 'cuda',   platform_number = 0, device_number = 0)
#     print('api=', NufftObj.thr.api_name())
#     NufftObj.offload(API = 'ocl',   platform_number = 0, device_number = 0)
    y = nfft.k2y(nfft.xx2k(nfft.x2xx(image)))
    
    NufftObj.x_Nd = NufftObj.thr.to_device( image.astype(dtype))
    
    gx = NufftObj.thr.copy_array(NufftObj.x_Nd)
    
    print('x close? = ', numpy.allclose(image, gx.get() , atol=1e-4))
    gxx = NufftObj.x2xx(gx)    

    print('xx close? = ', numpy.allclose(nfft.x2xx(image), gxx.get() , atol=1e-4))        

    gk = NufftObj.xx2k(gxx)    

    k = nfft.xx2k(nfft.x2xx(image))
    
    print('k close? = ', numpy.allclose(nfft.xx2k(nfft.x2xx(image)), gk.get(), atol=1e-3*numpy.linalg.norm(k)))   
    gy = NufftObj.k2y(gk)    
    k2 = NufftObj.y2k(gy)
    print('y close? = ', numpy.allclose(y, gy.get() ,  atol=1e-3*numpy.linalg.norm(y)), numpy.linalg.norm((y - gy.get())/numpy.linalg.norm(y)))
    y2 = y
    print('k2 close? = ', numpy.allclose(nfft.y2k(y2), k2.get(), atol=1e-3*numpy.linalg.norm(nfft.y2k(y2)) ), numpy.linalg.norm(( nfft.y2k(y2)- k2.get())/numpy.linalg.norm(nfft.y2k(y2))))   
    gxx2 = NufftObj.k2xx(k2)
#     print('xx close? = ', numpy.allclose(nfft.k2xx(nfft.y2k(y2)), NufftObj.xx_Nd.get(queue=NufftObj.queue, async=False) , atol=0.1))
    gx2 = NufftObj.xx2x(gxx2)
    print('x close? = ', numpy.allclose(nfft.adjoint(y2), gx2.get() , atol=1e-3*numpy.linalg.norm(nfft.adjoint(y2))))
    image3 = gx2.get() 
    import time
    t0 = time.time()
#     k = nfft.xx2k(nfft.x2xx(image))
    for pp in range(0,50):
#         y = nfft.k2y(nfft.xx2k(nfft.x2xx(image)))    
            y = nfft.forward(image)
#             y = nfft.k2y(k)
#                 k = nfft.y2k(y)
#             x = nfft.adjoint(y)
#             y = nfft.forward(image)
#     y2 = NufftObj.y.get(   NufftObj.queue, async=False)
    t_cpu = (time.time() - t0)/50.0 
    print(t_cpu)
    
#     del nfft
        
    gy2=NufftObj.forward(gx)
#     gk =     NufftObj.xx2k(NufftObj.x2xx(gx))
    t0= time.time()
    for pp in range(0,20):
#         pass
        gy2 = NufftObj.forward(gx)
#         gy2 = NufftObj.k2y(gk)
#             gx2 = NufftObj.adjoint(gy2)
#             gk2 = NufftObj.y2k(gy2)
#         del gy2
#     c = gx2.get()
#         gy=NufftObj.forward(gx)        
        
    NufftObj.thr.synchronize()
    t_cl = (time.time() - t0)/20
    print(t_cl)
    
    print('gy close? = ', numpy.allclose(y, gy.get(),  atol=numpy.linalg.norm(y)*1e-3))
    print("acceleration=", t_cpu/t_cl)
    maxiter =100
    import time
    t0= time.time()
#     x2 =  nfft.solve(y2, 'cg',maxiter=maxiter)
    x2 =  nfft.solve(y2, 'L1TVOLS',maxiter=maxiter, rho = 2)
    t1 = time.time()-t0 
#     gy=NufftObj.thr.copy_array(NufftObj.thr.to_device(y2))
    
    t0= time.time()

#     x = NufftObj.solve(gy,'cg', maxiter=maxiter)
    x = NufftObj.solve(gy,'L1TVOLS', maxiter=maxiter, rho=2)
    
    t2 = time.time() - t0
    print(t1, t2)
    print('acceleration=', t1/t2 )
#     k = x.get()
#     x = nfft.k2xx(k)/nfft.st['sn']
#     return
    try:
        import matplotlib.pyplot
        matplotlib.pyplot.subplot(1, 2, 1)
        matplotlib.pyplot.imshow( x.get().real, cmap= matplotlib.cm.gray, vmin = 0, vmax = 1)
        matplotlib.pyplot.title("HSA reconstruction")
        matplotlib.pyplot.subplot(1, 2,2)
        matplotlib.pyplot.imshow(x2.real, cmap= matplotlib.cm.gray)
        matplotlib.pyplot.title("CPU reconstruction")
        matplotlib.pyplot.show(block = False)
        matplotlib.pyplot.pause(3)
        matplotlib.pyplot.close()
#         del NufftObj.thr
#         del NufftObj
    except:
        print("no graphics")
Exemplo n.º 11
0
# om = scipy.io.loadmat('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/samples_radial_x8_64x3072.mat')['samples_radial']
om = om / numpy.max(om.real.ravel()) * numpy.pi

print(om.shape)
from pynufft import NUFFT_cpu, NUFFT_hsa, NUFFT_hsa_legacy
# from pynufft import NUFFT_memsave
NufftObj_cpu = NUFFT_cpu()
NufftObj_hsa = NUFFT_hsa()
NufftObj_memsave = NUFFT_hsa()

import time

t0 = time.time()
NufftObj_cpu.plan(om, Nd, Kd, Jd)
t1 = time.time()
NufftObj_hsa.plan(om, Nd, Kd, Jd)
NufftObj_memsave.plan(om, Nd, Kd, Jd)
t2 = time.time()
# proc = 0 # cpu
proc = 1  # gpu
# NufftObj_hsa.offload(API = 'ocl',   platform_number = proc, device_number = 0)

# NufftObj_memsave.offload(API = 'ocl',   platform_number = proc, device_number = 0)
# NufftObj_memsave.offload(API = 'cuda',   platform_number = 0, device_number = 0)
t3 = time.time()
print('planning time of CPU = ', t1 - t0)
print('planning time of GPU = ', t2 - t1)
print('loading time of GPU = ', t3 - t2)
gx_hsa = NufftObj_hsa.thr.to_device(image.astype(numpy.complex64))
gx_memsave = NufftObj_memsave.thr.to_device(image.astype(numpy.complex64))