def main(args): x = dataset.load_particles(args.input, lazy=True) log(x.shape) ind = utils.load_pkl(args.ind) x = np.array([x[i].get() for i in ind]) log(x.shape) mrc.write(args.o, x)
def main(args): x = dataset.load_particles(args.input, lazy=True) log(f'Loaded {len(x)} particles') ind = utils.load_pkl(args.ind) x = np.array([x[i].get() for i in ind]) log(f'New stack dimensions: {x.shape}') mrc.write(args.o, x)
def main(args): # load particles particles = dataset.load_particles(args.mrcs, datadir=args.datadir) log(particles.shape) Nimg, D, D = particles.shape trans = utils.load_pkl(args.trans) if type(trans) is tuple: trans = trans[1] trans *= args.tscale assert np.all( trans <= 1 ), "ERROR: Old pose format detected. Translations must be in units of fraction of box." trans *= D # convert to pixels assert len(trans) == Nimg xx, yy = np.meshgrid(np.arange(-D / 2, D / 2), np.arange(-D / 2, D / 2)) TCOORD = np.stack([xx, yy], axis=2) / D # DxDx2 imgs = [] for ii in range(Nimg): ff = fft.fft2_center(particles[ii]) tfilt = np.dot(TCOORD, trans[ii]) * -2 * np.pi tfilt = np.cos(tfilt) + np.sin(tfilt) * 1j ff *= tfilt img = fft.ifftn_center(ff) imgs.append(img) imgs = np.asarray(imgs).astype(np.float32) mrc.write(args.o, imgs) if args.out_png: plot_projections(args.out_png, imgs[:9])
def main(args): assert args.o.endswith('.star') particles = dataset.load_particles(args.particles, lazy=True, datadir=args.datadir) ctf = utils.load_pkl(args.ctf) assert ctf.shape[1] == 9, "Incorrect CTF pkl format" assert len(particles) == len(ctf), f"{len(particles)} != {len(ctf)}, Number of particles != number of CTF paraameters" if args.poses: poses = utils.load_pkl(args.poses) assert len(particles) == len(poses[0]), f"{len(particles)} != {len(poses)}, Number of particles != number of poses" log('{} particles'.format(len(particles))) if args.ind: ind = utils.load_pkl(args.ind) log(f'Filtering to {len(ind)} particles') particles = [particles[ii] for ii in ind] ctf = ctf[ind] if args.poses: poses = (poses[0][ind], poses[1][ind]) else: ind = np.arange(len(particles)) ind += 1 # CHANGE TO 1-BASED INDEXING image_names = [img.fname for img in particles] if args.full_path: image_names = [os.path.abspath(img.fname) for img in particles] names = [f'{i}@{name}' for i,name in zip(ind, image_names)] ctf = ctf[:,2:] # convert poses if args.poses: eulers = utils.R_to_relion_scipy(poses[0]) D = particles[0].get().shape[0] trans = poses[1] * D # convert from fraction to pixels data = {HEADERS[0]:names} for i in range(7): data[HEADERS[i+1]] = ctf[:,i] if args.poses: for i in range(3): data[POSE_HDRS[i]] = eulers[:,i] for i in range(2): data[POSE_HDRS[3+i]] = trans[:,i] df = pd.DataFrame(data=data) headers = HEADERS + POSE_HDRS if args.poses else HEADERS s = starfile.Starfile(headers,df) s.write(args.o)
def main(args): imgs = dataset.load_particles(args.mrcs, lazy=True, datadir=args.datadir) ctf_params = utils.load_pkl(args.ctf_params) assert len(imgs) == len(ctf_params) D = imgs[0].get().shape[0] fx, fy = np.meshgrid(np.linspace(-.5, .5, D, endpoint=False), np.linspace(-.5, .5, D, endpoint=False)) freqs = np.stack([fx.ravel(), fy.ravel()], 1) imgs_flip = np.empty((len(imgs), D, D), dtype=np.float32) for i in range(len(imgs)): if i % 1000 == 0: print(i) c = ctf.compute_ctf_np(freqs / ctf_params[i, 0], *ctf_params[i, 1:]) c = c.reshape((D, D)) ff = fft.fft2_center(imgs[i].get()) ff *= np.sign(c) img = fft.ifftn_center(ff) imgs_flip[i] = img.astype(np.float32) mrc.write(args.o, imgs_flip)
def main(args): mkbasedir(args.o) warnexists(args.o) assert (args.o.endswith('.mrcs') or args.o.endswith('mrc') ), "Must specify output in .mrc(s) file format" lazy = not args.is_vol old = dataset.load_particles(args.mrcs, lazy=lazy, datadir=args.datadir, relion31=args.relion31) oldD = old[0].get().shape[0] if lazy else old.shape[-1] assert args.D <= oldD, f'New box size {args.D} cannot be larger than the original box size {oldD}' assert args.D % 2 == 0, 'New box size must be even' D = args.D start = int(oldD / 2 - D / 2) stop = int(oldD / 2 + D / 2) def _combine_imgs(imgs): ret = [] for img in imgs: img.shape = (1, *img.shape) # (D,D) -> (1,D,D) cur = imgs[0] for img in imgs[1:]: if img.fname == cur.fname and img.offset == cur.offset + 4 * np.product( cur.shape): cur.shape = (cur.shape[0] + 1, *cur.shape[1:]) else: ret.append(cur) cur = img ret.append(cur) return ret def downsample_images(imgs): if lazy: imgs = _combine_imgs(imgs) imgs = np.concatenate([i.get() for i in imgs]) with Pool(min(args.max_threads, mp.cpu_count())) as p: oldft = np.asarray(p.map(fft.ht2_center, imgs)) newft = oldft[:, start:stop, start:stop] new = np.asarray(p.map(fft.iht2_center, newft)) return new def downsample_in_batches(old, b): new = np.empty((len(old), D, D), dtype=np.float32) for ii in range(math.ceil(len(old) / b)): log(f'Processing batch {ii}') new[ii * b:(ii + 1) * b, :, :] = downsample_images( old[ii * b:(ii + 1) * b]) return new ### Downsample volume ### if args.is_vol: oldft = fft.htn_center(old) log(oldft.shape) newft = oldft[start:stop, start:stop, start:stop] log(newft.shape) new = fft.ihtn_center(newft).astype(np.float32) log(f'Saving {args.o}') mrc.write(args.o, new, is_vol=True) ### Downsample images ### elif args.chunk is None: new = downsample_in_batches(old, args.b) log(new.shape) log('Saving {}'.format(args.o)) mrc.write(args.o, new.astype(np.float32), is_vol=False) ### Downsample images, saving chunks of N images ### else: nchunks = math.ceil(len(old) / args.chunk) out_mrcs = [ '.{}'.format(i).join(os.path.splitext(args.o)) for i in range(nchunks) ] chunk_names = [os.path.basename(x) for x in out_mrcs] for i in range(nchunks): log('Processing chunk {}'.format(i)) chunk = old[i * args.chunk:(i + 1) * args.chunk] new = downsample_in_batches(chunk, args.b) log(new.shape) log(f'Saving {out_mrcs[i]}') mrc.write(out_mrcs[i], new, is_vol=False) # Write a text file with all chunks out_txt = '{}.txt'.format(os.path.splitext(args.o)[0]) log(f'Saving {out_txt}') with open(out_txt, 'w') as f: f.write('\n'.join(chunk_names))
# coding: utf-8 import sys, os from cryodrgn import mrc import numpy as np data, _ = mrc.parse_mrc('data/toy_projections.mrcs', lazy=True) data2, _ = mrc.parse_mrc('data/toy_projections.mrcs', lazy=False) data1 = np.asarray([x.get() for x in data]) assert (data1 == data2).all() print('ok') from cryodrgn import dataset data2 = dataset.load_particles('data/toy_projections.star') assert (data1 == data2).all() print('ok') data2 = dataset.load_particles('data/toy_projections.txt') assert (data1 == data2).all() print('ok') print('all ok')
def main(args): assert args.o.endswith('.star'), "Output file must be .star file" assert args.particles.endswith('.mrcs') or args.particles.endswith( '.txt'), "Input file must be .mrcs or .txt" particles = dataset.load_particles(args.particles, lazy=True, datadir=args.datadir) ctf = utils.load_pkl(args.ctf) assert ctf.shape[1] == 9, "Incorrect CTF pkl format" assert len(particles) == len( ctf ), f"{len(particles)} != {len(ctf)}, Number of particles != number of CTF paraameters" if args.poses: poses = utils.load_pkl(args.poses) assert len(particles) == len( poses[0] ), f"{len(particles)} != {len(poses)}, Number of particles != number of poses" log(f'{len(particles)} particles in {args.particles}') if args.ref_star: ref_star = starfile.Starfile.load(args.ref_star) assert len(ref_star) == len( particles ), f"{len(particles)} != {len(ref_star)}, Number of particles in {args.particles} != number of particles in {args.ref_star}" # Get index for particles in each .mrcs file if args.particles.endswith('.txt'): N_per_chunk = parse_chunk_size(args.particles) particle_ind = np.concatenate([np.arange(nn) for nn in N_per_chunk]) assert len(particle_ind) == len(particles) else: # single .mrcs file particle_ind = np.arange(len(particles)) if args.ind: ind = utils.load_pkl(args.ind) log(f'Filtering to {len(ind)} particles') particles = [particles[ii] for ii in ind] ctf = ctf[ind] if args.poses: poses = (poses[0][ind], poses[1][ind]) if args.ref_star: ref_star.df = ref_star.df.loc[ind] # reset the index in the dataframe to avoid any downstream indexing issues ref_star.df.reset_index(inplace=True) particle_ind = particle_ind[ind] particle_ind += 1 # CHANGE TO 1-BASED INDEXING image_names = [img.fname for img in particles] if args.full_path: image_names = [os.path.abspath(img.fname) for img in particles] names = [f'{i}@{name}' for i, name in zip(particle_ind, image_names)] ctf = ctf[:, 2:] # convert poses if args.poses: eulers = utils.R_to_relion_scipy(poses[0]) D = particles[0].get().shape[0] trans = poses[1] * D # convert from fraction to pixels # Create a new dataframe with required star file headers data = {HEADERS[0]: names} for i in range(7): data[HEADERS[i + 1]] = ctf[:, i] if args.poses: for i in range(3): data[POSE_HDRS[i]] = eulers[:, i] for i in range(2): data[POSE_HDRS[3 + i]] = trans[:, i] df = pd.DataFrame(data=data) headers = HEADERS + POSE_HDRS if args.poses else HEADERS if args.keep_micrograph: assert args.ref_star, "Must provide reference .star file with micrograph coordinates" log(f'Copying micrograph coordinates from {args.ref_star}') # TODO: Prepend path from args.ref_star to MicrographName? for h in MICROGRAPH_HDRS: df[h] = ref_star.df[h] headers += MICROGRAPH_HDRS s = starfile.Starfile(headers, df) s.write(args.o)
def main(args): mkbasedir(args.o) warnexists(args.o) assert (args.o.endswith('.mrcs') or args.o.endswith('mrc') ), "Must specify output in .mrc(s) file format" old = dataset.load_particles(args.mrcs, lazy=True, datadir=args.datadir) oldD = old[0].get().shape[0] assert args.D <= oldD, f'New box size {args.D} cannot be larger than the original box size {oldD}' assert args.D % 2 == 0, 'New box size must be even' D = args.D start = int(oldD / 2 - D / 2) stop = int(oldD / 2 + D / 2) ### Downsample volume ### if args.is_vol: oldft = fft.htn_center(np.array([x.get() for x in old])) log(oldft.shape) newft = oldft[start:stop, start:stop, start:stop] log(newft.shape) new = fft.ihtn_center(newft).astype(np.float32) log(f'Saving {args.o}') mrc.write(args.o, new, is_vol=True) ### Downsample images ### elif args.chunk is None: new = [] for i in range(len(old)): if i % 1000 == 0: log(f'Processing image {i} of {len(old)}') img = old[i] oldft = fft.ht2_center(img.get()).astype(np.float32) newft = oldft[start:stop, start:stop] new.append(fft.ihtn_center(newft).astype(np.float32)) assert oldft[int(oldD / 2), int(oldD / 2)] == newft[int(D / 2), int(D / 2)] new = np.asarray(new) log(new.shape) log('Saving {}'.format(args.o)) mrc.write(args.o, new, is_vol=False) ### Downsample images, saving chunks of N images ### else: chunk_names = [] nchunks = math.ceil(len(old) / args.chunk) for i in range(nchunks): log('Processing chunk {}'.format(i)) out_mrcs = '.{}'.format(i).join(os.path.splitext(args.o)) new = [] for img in old[i * args.chunk:(i + 1) * args.chunk]: oldft = fft.ht2_center(img.get()).astype(np.float32) newft = oldft[start:stop, start:stop] new.append(fft.ihtn_center(newft).astype(np.float32)) assert oldft[int(oldD / 2), int(oldD / 2)] == newft[int(D / 2), int(D / 2)] new = np.asarray(new) log(new.shape) log(f'Saving {out_mrcs}'.format(out_mrcs)) mrc.write(out_mrcs, new, is_vol=False) chunk_names.append(os.path.basename(out_mrcs)) # Write a text file with all chunks out_txt = '{}.txt'.format(os.path.splitext(args.o)[0]) log(f'Saving {out_txt}') with open(out_txt, 'w') as f: f.write('\n'.join(chunk_names))
def main(args): mkbasedir(args.o) warnexists(args.o) assert ( args.o.endswith('.mrcs') or args.o.endswith('.txt')), "Must specify output in .mrcs file format" # load images lazy = args.lazy images = dataset.load_particles(args.mrcs, lazy=lazy, datadir=args.datadir, relion31=args.relion31) # filter images if args.ind is not None: log(f'Filtering image dataset with {args.ind}') ind = utils.load_pkl(args.ind).astype(int) images = [images[i] for i in ind] if lazy else images[ind] original_D = images[0].get().shape[0] if lazy else images.shape[-1] log(f'Loading {len(images)} {original_D}x{original_D} images') window = args.window invert_data = args.invert_data downsample = (args.D and args.D < original_D) if downsample: assert args.D <= original_D, f'New box size {args.D} cannot be larger than the original box size {D}' assert args.D % 2 == 0, 'New box size must be even' start = int(original_D / 2 - args.D / 2) stop = int(original_D / 2 + args.D / 2) D = args.D log(f'Downsampling images to {D}x{D}') else: D = original_D def _combine_imgs(imgs): ret = [] for img in imgs: img.shape = (1, *img.shape) # (D,D) -> (1,D,D) cur = imgs[0] for img in imgs[1:]: if img.fname == cur.fname and img.offset == cur.offset + 4 * np.product( cur.shape): cur.shape = (cur.shape[0] + 1, *cur.shape[1:]) else: ret.append(cur) cur = img ret.append(cur) return ret def preprocess(imgs): if lazy: imgs = _combine_imgs(imgs) imgs = np.concatenate([i.get() for i in imgs]) with Pool(min(args.max_threads, mp.cpu_count())) as p: # todo: refactor as a routine in dataset.py # note: applying the window before downsampling is slightly # different than in the original workflow if window: imgs *= dataset.window_mask(original_D, args.window_r, .99) ret = np.asarray(p.map(fft.ht2_center, imgs)) if invert_data: ret *= -1 if downsample: ret = ret[:, start:stop, start:stop] ret = fft.symmetrize_ht(ret) return ret def preprocess_in_batches(imgs, b): ret = np.empty((len(imgs), D + 1, D + 1), dtype=np.float32) Nbatches = math.ceil(len(imgs) / b) for ii in range(Nbatches): log(f'Processing batch of {b} images ({ii+1} of {Nbatches})') ret[ii * b:(ii + 1) * b, :, :] = preprocess(imgs[ii * b:(ii + 1) * b]) return ret nchunks = math.ceil(len(images) / args.chunk) out_mrcs = [ f'.{i}.ft'.join(os.path.splitext(args.o)) for i in range(nchunks) ] chunk_names = [os.path.basename(x) for x in out_mrcs] for i in range(nchunks): log(f'Processing chunk {i+1} of {nchunks}') chunk = images[i * args.chunk:(i + 1) * args.chunk] new = preprocess_in_batches(chunk, args.b) log(f'New shape: {new.shape}') log(f'Saving {out_mrcs[i]}') mrc.write(out_mrcs[i], new, is_vol=False) out_txt = f'{os.path.splitext(args.o)[0]}.ft.txt' log(f'Saving summary txt file {out_txt}') with open(out_txt, 'w') as f: f.write('\n'.join(chunk_names))