Python load_particles示例，cryodrgn.dataset.load_particles Python示例

示例#1

0

显示文件

def main(args):
    x = dataset.load_particles(args.input, lazy=True)
    log(x.shape)
    ind = utils.load_pkl(args.ind)
    x = np.array([x[i].get() for i in ind])
    log(x.shape)
    mrc.write(args.o, x)

示例#2

0

显示文件

文件： filter_mrcs.py 项目： zruan/cryodrgn

def main(args):
    x = dataset.load_particles(args.input, lazy=True)
    log(f'Loaded {len(x)} particles')
    ind = utils.load_pkl(args.ind)
    x = np.array([x[i].get() for i in ind])
    log(f'New stack dimensions: {x.shape}')
    mrc.write(args.o, x)

示例#3

0

显示文件

文件： translate_stack.py 项目： zruan/cryodrgn

def main(args):
    # load particles
    particles = dataset.load_particles(args.mrcs, datadir=args.datadir)
    log(particles.shape)
    Nimg, D, D = particles.shape

    trans = utils.load_pkl(args.trans)
    if type(trans) is tuple:
        trans = trans[1]
    trans *= args.tscale
    assert np.all(
        trans <= 1
    ), "ERROR: Old pose format detected. Translations must be in units of fraction of box."
    trans *= D  # convert to pixels
    assert len(trans) == Nimg

    xx, yy = np.meshgrid(np.arange(-D / 2, D / 2), np.arange(-D / 2, D / 2))
    TCOORD = np.stack([xx, yy], axis=2) / D  # DxDx2

    imgs = []
    for ii in range(Nimg):
        ff = fft.fft2_center(particles[ii])
        tfilt = np.dot(TCOORD, trans[ii]) * -2 * np.pi
        tfilt = np.cos(tfilt) + np.sin(tfilt) * 1j
        ff *= tfilt
        img = fft.ifftn_center(ff)
        imgs.append(img)

    imgs = np.asarray(imgs).astype(np.float32)
    mrc.write(args.o, imgs)

    if args.out_png:
        plot_projections(args.out_png, imgs[:9])

示例#4

0

显示文件

文件： write_starfile.py 项目： typo-graph/cryodrgn

def main(args):
    assert args.o.endswith('.star')
    particles = dataset.load_particles(args.particles, lazy=True, datadir=args.datadir)
    ctf = utils.load_pkl(args.ctf)
    assert ctf.shape[1] == 9, "Incorrect CTF pkl format"
    assert len(particles) == len(ctf), f"{len(particles)} != {len(ctf)}, Number of particles != number of CTF paraameters"
    if args.poses:
        poses = utils.load_pkl(args.poses)
        assert len(particles) == len(poses[0]), f"{len(particles)} != {len(poses)}, Number of particles != number of poses"
    log('{} particles'.format(len(particles)))

    if args.ind:
        ind = utils.load_pkl(args.ind)
        log(f'Filtering to {len(ind)} particles')
        particles = [particles[ii] for ii in ind]
        ctf = ctf[ind]
        if args.poses: 
            poses = (poses[0][ind], poses[1][ind])
    else:
        ind = np.arange(len(particles))

    ind += 1 # CHANGE TO 1-BASED INDEXING
    image_names = [img.fname for img in particles]
    if args.full_path:
        image_names = [os.path.abspath(img.fname) for img in particles]
    names = [f'{i}@{name}' for i,name in zip(ind, image_names)]

    ctf = ctf[:,2:]

    # convert poses
    if args.poses:
        eulers = utils.R_to_relion_scipy(poses[0]) 
        D = particles[0].get().shape[0]
        trans = poses[1] * D # convert from fraction to pixels

    data = {HEADERS[0]:names}
    for i in range(7):
        data[HEADERS[i+1]] = ctf[:,i]
    if args.poses:
        for i in range(3):
            data[POSE_HDRS[i]] = eulers[:,i]
        for i in range(2):
            data[POSE_HDRS[3+i]] = trans[:,i]
    df = pd.DataFrame(data=data)

    headers = HEADERS + POSE_HDRS if args.poses else HEADERS
    s = starfile.Starfile(headers,df)
    s.write(args.o)

示例#5

0

显示文件

def main(args):
    imgs = dataset.load_particles(args.mrcs, lazy=True, datadir=args.datadir)
    ctf_params = utils.load_pkl(args.ctf_params)
    assert len(imgs) == len(ctf_params)

    D = imgs[0].get().shape[0]
    fx, fy = np.meshgrid(np.linspace(-.5, .5, D, endpoint=False),
                         np.linspace(-.5, .5, D, endpoint=False))
    freqs = np.stack([fx.ravel(), fy.ravel()], 1)

    imgs_flip = np.empty((len(imgs), D, D), dtype=np.float32)
    for i in range(len(imgs)):
        if i % 1000 == 0: print(i)
        c = ctf.compute_ctf_np(freqs / ctf_params[i, 0], *ctf_params[i, 1:])
        c = c.reshape((D, D))
        ff = fft.fft2_center(imgs[i].get())
        ff *= np.sign(c)
        img = fft.ifftn_center(ff)
        imgs_flip[i] = img.astype(np.float32)

    mrc.write(args.o, imgs_flip)

示例#6

0

显示文件

def main(args):
    mkbasedir(args.o)
    warnexists(args.o)
    assert (args.o.endswith('.mrcs') or args.o.endswith('mrc')
            ), "Must specify output in .mrc(s) file format"

    lazy = not args.is_vol
    old = dataset.load_particles(args.mrcs,
                                 lazy=lazy,
                                 datadir=args.datadir,
                                 relion31=args.relion31)

    oldD = old[0].get().shape[0] if lazy else old.shape[-1]
    assert args.D <= oldD, f'New box size {args.D} cannot be larger than the original box size {oldD}'
    assert args.D % 2 == 0, 'New box size must be even'

    D = args.D
    start = int(oldD / 2 - D / 2)
    stop = int(oldD / 2 + D / 2)

    def _combine_imgs(imgs):
        ret = []
        for img in imgs:
            img.shape = (1, *img.shape)  # (D,D) -> (1,D,D)
        cur = imgs[0]
        for img in imgs[1:]:
            if img.fname == cur.fname and img.offset == cur.offset + 4 * np.product(
                    cur.shape):
                cur.shape = (cur.shape[0] + 1, *cur.shape[1:])
            else:
                ret.append(cur)
                cur = img
        ret.append(cur)
        return ret

    def downsample_images(imgs):
        if lazy:
            imgs = _combine_imgs(imgs)
            imgs = np.concatenate([i.get() for i in imgs])
        with Pool(min(args.max_threads, mp.cpu_count())) as p:
            oldft = np.asarray(p.map(fft.ht2_center, imgs))
            newft = oldft[:, start:stop, start:stop]
            new = np.asarray(p.map(fft.iht2_center, newft))
        return new

    def downsample_in_batches(old, b):
        new = np.empty((len(old), D, D), dtype=np.float32)
        for ii in range(math.ceil(len(old) / b)):
            log(f'Processing batch {ii}')
            new[ii * b:(ii + 1) * b, :, :] = downsample_images(
                old[ii * b:(ii + 1) * b])
        return new

    ### Downsample volume ###
    if args.is_vol:
        oldft = fft.htn_center(old)
        log(oldft.shape)
        newft = oldft[start:stop, start:stop, start:stop]
        log(newft.shape)
        new = fft.ihtn_center(newft).astype(np.float32)
        log(f'Saving {args.o}')
        mrc.write(args.o, new, is_vol=True)

    ### Downsample images ###
    elif args.chunk is None:
        new = downsample_in_batches(old, args.b)
        log(new.shape)
        log('Saving {}'.format(args.o))
        mrc.write(args.o, new.astype(np.float32), is_vol=False)

    ### Downsample images, saving chunks of N images ###
    else:
        nchunks = math.ceil(len(old) / args.chunk)
        out_mrcs = [
            '.{}'.format(i).join(os.path.splitext(args.o))
            for i in range(nchunks)
        ]
        chunk_names = [os.path.basename(x) for x in out_mrcs]
        for i in range(nchunks):
            log('Processing chunk {}'.format(i))
            chunk = old[i * args.chunk:(i + 1) * args.chunk]
            new = downsample_in_batches(chunk, args.b)
            log(new.shape)
            log(f'Saving {out_mrcs[i]}')
            mrc.write(out_mrcs[i], new, is_vol=False)
        # Write a text file with all chunks
        out_txt = '{}.txt'.format(os.path.splitext(args.o)[0])
        log(f'Saving {out_txt}')
        with open(out_txt, 'w') as f:
            f.write('\n'.join(chunk_names))

示例#7

0

显示文件

文件： test_mrc.py 项目： zruan/cryodrgn

# coding: utf-8
import sys, os
from cryodrgn import mrc
import numpy as np
data, _ = mrc.parse_mrc('data/toy_projections.mrcs', lazy=True)
data2, _ = mrc.parse_mrc('data/toy_projections.mrcs', lazy=False)
data1 = np.asarray([x.get() for x in data])
assert (data1 == data2).all()
print('ok')

from cryodrgn import dataset
data2 = dataset.load_particles('data/toy_projections.star')
assert (data1 == data2).all()
print('ok')

data2 = dataset.load_particles('data/toy_projections.txt')
assert (data1 == data2).all()
print('ok')

print('all ok')

示例#8

0

显示文件

文件： write_starfile.py 项目： Guillawme/cryodrgn

def main(args):
    assert args.o.endswith('.star'), "Output file must be .star file"
    assert args.particles.endswith('.mrcs') or args.particles.endswith(
        '.txt'), "Input file must be .mrcs or .txt"

    particles = dataset.load_particles(args.particles,
                                       lazy=True,
                                       datadir=args.datadir)
    ctf = utils.load_pkl(args.ctf)
    assert ctf.shape[1] == 9, "Incorrect CTF pkl format"
    assert len(particles) == len(
        ctf
    ), f"{len(particles)} != {len(ctf)}, Number of particles != number of CTF paraameters"
    if args.poses:
        poses = utils.load_pkl(args.poses)
        assert len(particles) == len(
            poses[0]
        ), f"{len(particles)} != {len(poses)}, Number of particles != number of poses"
    log(f'{len(particles)} particles in {args.particles}')

    if args.ref_star:
        ref_star = starfile.Starfile.load(args.ref_star)
        assert len(ref_star) == len(
            particles
        ), f"{len(particles)} != {len(ref_star)}, Number of particles in {args.particles} != number of particles in {args.ref_star}"

    # Get index for particles in each .mrcs file
    if args.particles.endswith('.txt'):
        N_per_chunk = parse_chunk_size(args.particles)
        particle_ind = np.concatenate([np.arange(nn) for nn in N_per_chunk])
        assert len(particle_ind) == len(particles)
    else:  # single .mrcs file
        particle_ind = np.arange(len(particles))

    if args.ind:
        ind = utils.load_pkl(args.ind)
        log(f'Filtering to {len(ind)} particles')
        particles = [particles[ii] for ii in ind]
        ctf = ctf[ind]
        if args.poses:
            poses = (poses[0][ind], poses[1][ind])
        if args.ref_star:
            ref_star.df = ref_star.df.loc[ind]
            # reset the index in the dataframe to avoid any downstream indexing issues
            ref_star.df.reset_index(inplace=True)
        particle_ind = particle_ind[ind]

    particle_ind += 1  # CHANGE TO 1-BASED INDEXING
    image_names = [img.fname for img in particles]
    if args.full_path:
        image_names = [os.path.abspath(img.fname) for img in particles]
    names = [f'{i}@{name}' for i, name in zip(particle_ind, image_names)]

    ctf = ctf[:, 2:]

    # convert poses
    if args.poses:
        eulers = utils.R_to_relion_scipy(poses[0])
        D = particles[0].get().shape[0]
        trans = poses[1] * D  # convert from fraction to pixels

    # Create a new dataframe with required star file headers
    data = {HEADERS[0]: names}
    for i in range(7):
        data[HEADERS[i + 1]] = ctf[:, i]
    if args.poses:
        for i in range(3):
            data[POSE_HDRS[i]] = eulers[:, i]
        for i in range(2):
            data[POSE_HDRS[3 + i]] = trans[:, i]
    df = pd.DataFrame(data=data)
    headers = HEADERS + POSE_HDRS if args.poses else HEADERS
    if args.keep_micrograph:
        assert args.ref_star, "Must provide reference .star file with micrograph coordinates"
        log(f'Copying micrograph coordinates from {args.ref_star}')
        # TODO: Prepend path from args.ref_star to MicrographName?
        for h in MICROGRAPH_HDRS:
            df[h] = ref_star.df[h]
        headers += MICROGRAPH_HDRS

    s = starfile.Starfile(headers, df)
    s.write(args.o)

示例#9

0

显示文件

文件： downsample.py 项目： xzhao-ai/cryodrgn

def main(args):
    mkbasedir(args.o)
    warnexists(args.o)
    assert (args.o.endswith('.mrcs') or args.o.endswith('mrc')
            ), "Must specify output in .mrc(s) file format"

    old = dataset.load_particles(args.mrcs, lazy=True, datadir=args.datadir)
    oldD = old[0].get().shape[0]
    assert args.D <= oldD, f'New box size {args.D} cannot be larger than the original box size {oldD}'
    assert args.D % 2 == 0, 'New box size must be even'

    D = args.D
    start = int(oldD / 2 - D / 2)
    stop = int(oldD / 2 + D / 2)

    ### Downsample volume ###
    if args.is_vol:
        oldft = fft.htn_center(np.array([x.get() for x in old]))
        log(oldft.shape)
        newft = oldft[start:stop, start:stop, start:stop]
        log(newft.shape)
        new = fft.ihtn_center(newft).astype(np.float32)
        log(f'Saving {args.o}')
        mrc.write(args.o, new, is_vol=True)

    ### Downsample images ###
    elif args.chunk is None:
        new = []
        for i in range(len(old)):
            if i % 1000 == 0:
                log(f'Processing image {i} of {len(old)}')
            img = old[i]
            oldft = fft.ht2_center(img.get()).astype(np.float32)
            newft = oldft[start:stop, start:stop]
            new.append(fft.ihtn_center(newft).astype(np.float32))
        assert oldft[int(oldD / 2), int(oldD / 2)] == newft[int(D / 2),
                                                            int(D / 2)]
        new = np.asarray(new)
        log(new.shape)
        log('Saving {}'.format(args.o))
        mrc.write(args.o, new, is_vol=False)

    ### Downsample images, saving chunks of N images ###
    else:
        chunk_names = []
        nchunks = math.ceil(len(old) / args.chunk)
        for i in range(nchunks):
            log('Processing chunk {}'.format(i))
            out_mrcs = '.{}'.format(i).join(os.path.splitext(args.o))
            new = []
            for img in old[i * args.chunk:(i + 1) * args.chunk]:
                oldft = fft.ht2_center(img.get()).astype(np.float32)
                newft = oldft[start:stop, start:stop]
                new.append(fft.ihtn_center(newft).astype(np.float32))
            assert oldft[int(oldD / 2), int(oldD / 2)] == newft[int(D / 2),
                                                                int(D / 2)]
            new = np.asarray(new)
            log(new.shape)
            log(f'Saving {out_mrcs}'.format(out_mrcs))
            mrc.write(out_mrcs, new, is_vol=False)
            chunk_names.append(os.path.basename(out_mrcs))
        # Write a text file with all chunks
        out_txt = '{}.txt'.format(os.path.splitext(args.o)[0])
        log(f'Saving {out_txt}')
        with open(out_txt, 'w') as f:
            f.write('\n'.join(chunk_names))

示例#10

0

显示文件

文件： preprocess.py 项目： zhonge/cryodrgn

def main(args):
    mkbasedir(args.o)
    warnexists(args.o)
    assert (
        args.o.endswith('.mrcs')
        or args.o.endswith('.txt')), "Must specify output in .mrcs file format"

    # load images
    lazy = args.lazy
    images = dataset.load_particles(args.mrcs,
                                    lazy=lazy,
                                    datadir=args.datadir,
                                    relion31=args.relion31)

    # filter images
    if args.ind is not None:
        log(f'Filtering image dataset with {args.ind}')
        ind = utils.load_pkl(args.ind).astype(int)
        images = [images[i] for i in ind] if lazy else images[ind]

    original_D = images[0].get().shape[0] if lazy else images.shape[-1]
    log(f'Loading {len(images)} {original_D}x{original_D} images')
    window = args.window
    invert_data = args.invert_data
    downsample = (args.D and args.D < original_D)
    if downsample:
        assert args.D <= original_D, f'New box size {args.D} cannot be larger than the original box size {D}'
        assert args.D % 2 == 0, 'New box size must be even'
        start = int(original_D / 2 - args.D / 2)
        stop = int(original_D / 2 + args.D / 2)
        D = args.D
        log(f'Downsampling images to {D}x{D}')
    else:
        D = original_D

    def _combine_imgs(imgs):
        ret = []
        for img in imgs:
            img.shape = (1, *img.shape)  # (D,D) -> (1,D,D)
        cur = imgs[0]
        for img in imgs[1:]:
            if img.fname == cur.fname and img.offset == cur.offset + 4 * np.product(
                    cur.shape):
                cur.shape = (cur.shape[0] + 1, *cur.shape[1:])
            else:
                ret.append(cur)
                cur = img
        ret.append(cur)
        return ret

    def preprocess(imgs):
        if lazy:
            imgs = _combine_imgs(imgs)
            imgs = np.concatenate([i.get() for i in imgs])
        with Pool(min(args.max_threads, mp.cpu_count())) as p:
            # todo: refactor as a routine in dataset.py

            # note: applying the window before downsampling is slightly
            # different than in the original workflow
            if window:
                imgs *= dataset.window_mask(original_D, args.window_r, .99)
            ret = np.asarray(p.map(fft.ht2_center, imgs))
            if invert_data:
                ret *= -1
            if downsample:
                ret = ret[:, start:stop, start:stop]
            ret = fft.symmetrize_ht(ret)
        return ret

    def preprocess_in_batches(imgs, b):
        ret = np.empty((len(imgs), D + 1, D + 1), dtype=np.float32)
        Nbatches = math.ceil(len(imgs) / b)
        for ii in range(Nbatches):
            log(f'Processing batch of {b} images ({ii+1} of {Nbatches})')
            ret[ii * b:(ii + 1) * b, :, :] = preprocess(imgs[ii * b:(ii + 1) *
                                                             b])
        return ret

    nchunks = math.ceil(len(images) / args.chunk)
    out_mrcs = [
        f'.{i}.ft'.join(os.path.splitext(args.o)) for i in range(nchunks)
    ]
    chunk_names = [os.path.basename(x) for x in out_mrcs]
    for i in range(nchunks):
        log(f'Processing chunk {i+1} of {nchunks}')
        chunk = images[i * args.chunk:(i + 1) * args.chunk]
        new = preprocess_in_batches(chunk, args.b)
        log(f'New shape: {new.shape}')
        log(f'Saving {out_mrcs[i]}')
        mrc.write(out_mrcs[i], new, is_vol=False)

    out_txt = f'{os.path.splitext(args.o)[0]}.ft.txt'
    log(f'Saving summary txt file {out_txt}')
    with open(out_txt, 'w') as f:
        f.write('\n'.join(chunk_names))