def process(op): with open(op['input data json file']) as f: dj = json.load(f) if 'test' in op: if ('sample_num' in op['test']) and (op['test']['sample_num'] > 0) and (len(dj) > op['test']['sample_num']): print(('testing the procedure using a subsample of %d subtomograms' % op['test']['sample_num'])) dj = random.sample(dj, op['test']['sample_num']) mat = None for (i, d) in enumerate(dj): print('\rloading', i, ' ', end=' ') sys.stdout.flush() v = IF.read_mrc_vol(d['subtomogram']) if op['mode'] == 'pose': vr = GR.rotate_pad_mean(v, rm=N.array(d['pose']['rm']), c1=N.array(d['pose']['c'])) elif op['mode'] == 'template': vr = GR.rotate_pad_mean(v, angle=N.array(d['angle']), loc_r=N.array(d['loc'])) else: raise Exception('op[mode]') if mat is None: mat = N.zeros((len(dj), vr.size)) mat[i, :] = vr.flatten() if 'PCA' in op: import aitom.tomominer.dimension_reduction.empca as drempca pca = drempca.empca(data=mat, weights=N.ones(mat.shape), nvec=op['PCA']['n_dims'], niter=op['PCA']['n_iter']) mat_km = pca.coeff else: mat_km = mat km = SC.KMeans(n_clusters=op['kmeans']['cluster num'], n_init=op['kmeans']['n_init'], n_jobs=(op['kmeans']['n_jobs'] if ('n_jobs' in op['kmeans']) else (-1)), verbose=op['kmeans']['verbose']) lbl = km.fit_predict(mat_km) dj_new = [] for (i, d) in enumerate(dj): dn = {} if 'id' in d: dn['id'] = d['id'] dn['subtomogram'] = d['subtomogram'] dn['cluster_label'] = int(lbl[i]) dj_new.append(dn) op['output data json file'] = os.path.abspath(op['output data json file']) if not os.path.isdir(os.path.dirname(op['output data json file'])): os.makedirs(os.path.dirname(op['output data json file'])) with open(op['output data json file'], 'w') as f: json.dump(dj_new, f, indent=2) clus_dir = os.path.join(op['out dir'], 'vol-avg') if not os.path.isdir(clus_dir): os.makedirs(clus_dir) clus_stat = [] for l in set(lbl.tolist()): avg_file_name = os.path.abspath(os.path.join(clus_dir, ('%03d.mrc' % (l,)))) v_avg = mat[(lbl == l), :].sum(axis=0).reshape(v.shape) IF.put_mrc(mrc=v_avg, path=avg_file_name, overwrite=True) clus_stat.append( {'cluster_label': l, 'size': len([_ for _ in lbl if (_ == l)]), 'subtomogram': avg_file_name, }) op['output cluster stat file'] = os.path.abspath(op['output cluster stat file']) if not os.path.isdir(os.path.dirname(op['output cluster stat file'])): os.makedirs(os.path.dirname(op['output cluster stat file'])) with open(op['output cluster stat file'], 'w') as f: json.dump(clus_stat, f, indent=2)
def main(): with open('aligned_refine__op.json') as f: op = json.load(f) out_dir = os.path.abspath(op['out_dir']) if (not os.path.isdir(out_dir)): os.makedirs(out_dir) with open(op['data_file']) as f: dj = json.load(f) for d in dj: if (not os.path.isabs(d['subtomogram'])): d['subtomogram'] = os.path.abspath( os.path.join(os.path.dirname(op['data_file']), d['subtomogram'])) if (not os.path.isabs(d['mask'])): d['mask'] = os.path.abspath( os.path.join(os.path.dirname(op['data_file']), d['mask'])) pmpg_file = os.path.join(out_dir, 'pmpg.pickle') if os.path.isfile(pmpg_file): print('loading existing result file', pmpg_file) with open(pmpg_file, 'rb') as f: pmpg = pickle.load(f) else: pmpg_dp_op = {} pmpg_ga_op = copy.deepcopy(op['genetic_algorithm']) pmpg_ga_op['sum_min'] = op['min_sample_num'] pmpg_ga_op['evaluate']['ssnr']['mask_sum_threshold'] = op[ 'min_sample_num'] pmpg = {} pmpg['dp'] = PMPG.data_prepare(dj=dj, op=pmpg_dp_op) pmpg['full_set'] = {} pmpg['full_set']['evaluate'] = [ PMPG.ga_evaluate__single(l=N.ones(len(dj)), stat=pmpg['dp'], op=pmpg_ga_op['evaluate']) ] PMPG.ga_evaluate__scoring(pmpg['full_set']['evaluate'], op=pmpg_ga_op['evaluate']['scoring']) pmpg['best'] = PMPG.ga(stat=pmpg['dp'], op=pmpg_ga_op) pmpg['dj'] = [ dj[_] for _ in range(len(dj)) if (pmpg['best']['p'][(0, _)] == 1) ] del pmpg['dp'] with open(pmpg_file, 'wb') as f: pickle.dump(pmpg, f, protocol=(-1)) print(pmpg['full_set']['evaluate'][0]['score']) print('score for the full set of', len(dj), 'subtomograms:', pmpg['full_set']['evaluate'][0]['score']) print('score for the', pmpg['best']['p'][0, :].sum(), 'selected subtomograms:', pmpg['best']['e'][0]['score']) avg_re = average(dj=pmpg['dj'], mask_count_threshold=op['min_sample_num']) avg_dir = os.path.join(op['out_dir'], 'avg') if (not os.path.isdir(avg_dir)): os.makedirs(avg_dir) IF.put_mrc(avg_re['v'], os.path.join(avg_dir, 'vol_avg.mrc')) IF.put_mrc(avg_re['m'], os.path.join(avg_dir, 'mask_avg.mrc')) with open(os.path.join(out_dir, 'data_selected.json'), 'w') as f: json.dump(pmpg['dj'], f, indent=2)
v = MU.generate_toy_model(dim_siz=64) # generate a pseudo density map print(v.shape) # randomly rotate and translate v loc_proportion = 0.1 loc_max = N.array(v.shape, dtype=float) * loc_proportion angle = GAL.random_rotation_angle_zyz() loc_r = (N.random.random(3) - 0.5) * loc_max vr = GR.rotate(v, angle=angle, loc_r=loc_r, default_val=0.0) # generate simulated subtomogram vb from v vb = TSRSC.do_reconstruction(vr, op, verbose=True) print('vb', 'mean', vb.mean(), 'std', vb.std(), 'var', vb.var()) # save v and vb as 3D grey scale images TIF.put_mrc(vb, '/tmp/vb.mrc', overwrite=True) TIF.put_mrc(v, '/tmp/v.mrc', overwrite=True) # save images of the slices of the corresponding 3D iamges for visual inspection import aitom.image.io as IIO import aitom.tomominer.image.vol.util as TIVU IIO.save_png(TIVU.cub_img(vb)['im'], "/tmp/vb.png") IIO.save_png(TIVU.cub_img(v)['im'], "/tmp/v.png") if True: # verify the correctness of SNR estimation vb_rep = TSRSC.do_reconstruction(vr, op, verbose=True) import scipy.stats as SS # calculate SNR vb_corr = SS.pearsonr(vb.flatten(), vb_rep.flatten())[0]