def test_remove_millers(): """ Confirming that remove_millers function yields the desired P1 completeness. """ base_path = "./reference/pdb_files/" pdb_ids = ['6d6g', '4bfh', '2id8'] c_vals = np.random.rand(3) for pdb, c in zip(pdb_ids, c_vals): # generate reference structure factors pdb_path = os.path.join(base_path, "%s.pdb" % pdb) refI, refp = cctbx_utils.reference_sf(pdb_path, 4.0, expand_to_p1=True, table='electron') # extract crystal symmetry object pdb_input = iotbx.pdb.input(file_name=pdb_path) cs = pdb_input.crystal_symmetry() # test that remove millers yields correct completeness refI_sel, refp_sel = mock_data.remove_millers(refI, refp, c) ma = cctbx_utils.convert_to_sf(refI_sel.keys(), np.array(refI_sel.values()), np.deg2rad(np.array(refp_sel.values())), cs.cell_equivalent_p1()) # ascertain within 2 percent of expected completeness np.testing.assert_allclose(ma.completeness(), c, atol=0.02) return
def test_find_origin(): import ProcessCrystals as proc # set up paths, default values, reference structure factors res, n_processes, grid_spacing = 3.0, 8, 1.0 pdb_path = "./reference/pdb_files/4bfh.pdb" sg_symbol, sg_no, cell, cs = cctbx_utils.unit_cell_info(pdb_path) refI, refp = cctbx_utils.reference_sf(pdb_path, res, expand_to_p1=True, table='electron') # confirm phase origin at (0,0,0) for reference structure # equivalent origins should be ranked lower due to interpolation errors fo = proc.FindOrigin(sg_symbol, cell, cs, refp, refI) dmetrics, shifts = fo.scan_candidates(grid_spacing, n_processes) np.testing.assert_array_equal(shifts, np.zeros(3)) # confirm that correct origin is identified if phases are shifted t_shifts = np.random.random(3) p_shifted = fo.shift_phases(t_shifts) refp_s = OrderedDict((key, val) for key, val in zip(fo.hkl, p_shifted)) fo_s = proc.FindOrigin(sg_symbol, cell, cs, refp_s, refI) data, shifts = fo_s.scan_candidates(grid_spacing, n_processes) # compare identified shifts to permitted fractional shifts (pf_origins) for P 21 21 21 pf_origins = np.array([0.0, 0.5, 1.0]) tol = grid_spacing / np.array(cell)[:3] residual = np.abs(1.0 - np.array(shifts) - t_shifts) assert all( np.min(np.abs(pf_origins - residual.reshape(3, 1)), axis=1) < tol) return
def test_compare_crystals(): import ProcessCrystals as proc # set up paths and default values res, n_processes, grid_spacing = 3.0, 8, 1.0 # make sure correct shifts are found for two different PDB files for pdb_id in ['4bfh', '6d6g']: pdb_path = "./reference/pdb_files/%s.pdb" % pdb_id sg_symbol, sg_no, cell, cs = cctbx_utils.unit_cell_info(pdb_path) # compute reference and randomly-shifted data tarI, tarp, eshifts = mock_data.generate_mock_data(pdb_path, res, completeness=1.0, hkl_sel=None, sigma=0.0) refI, refp = cctbx_utils.reference_sf(pdb_path, res, expand_to_p1=True, table='electron') # use CompareCrystals class to calculate shifts that relate data comp = proc.CompareCrystals(cell, tarp, hklI=tarI) mgrid, hklp_shifted, fshifts = comp.grid_shift(refp, grid_spacing, n_processes, hklI_ref=refI) # ensure computed shifts are below tolerance tol = grid_spacing / np.array(cell)[:3] assert all(np.abs(1.0 - fshifts - eshifts) < tol) return
def simulate_damage(specs_path, pdb_path, resolution, tilt_order, bfactor): """ Simulate damage according to the model: I(q,n) = I(q,0)*exp(-n*B*q^2/(16*pi^2)), where n is proportional to the amount of dose received, B is the initial B factor, and I(q,0) is the initial intensity of the reflection under consideration. The B-factor increases linearly with dose, which itself increases linearly with the number of tilts collected. The simulated structure factors are treated as though they belong to a crystal randomly oriented relative to the missing wedge. Inputs: ------- pdb_path: path to reference PDB file specs_path: path to file indicating details of data collection strategy resolution: high-resolution limit of structure factors tilt_order: tilt angles in order of image collected, first to last bfactor: initial B factor in Angstrom squared at image 0 Outputs: -------- hklI: dict whose keys are Millers and values are 'damaged' intensities hklp: dict whose keys are Millers in hklI and values are reference phases A: crystal setting matrix, randomly oriented """ # compute reference (undamaged) structure factors refI, refp = cctbx_utils.reference_sf(pdb_path, resolution, expand_to_p1=True) # compute tilt angles for structure factors in a random orientation A = generate_random_A(pdb_path) hklt = predict_angles(specs_path, pdb_path, resolution, A) tilts = np.array(hklt.values()) # convert tilts to n, proportional to dose, by interpolating to nearest tilt tilt_order = OrderedDict((v, c) for c, v in enumerate(tilt_order)) n = np.zeros_like(tilts) for i in range(len(tilts)): nearest_tilt = min(tilt_order, key=lambda x: abs(x - tilts[i])) n[i] = tilt_order[nearest_tilt] n += 1 # this way, first image effectively receives some dose # compute qmags and initial intensity qvecs = np.inner(A, np.squeeze(np.array(hklt.keys()))).T qmags = np.linalg.norm(qvecs, axis=1) I_0 = np.array([refI[key] for key in hklt.keys()]) # compute damaged intensities I_d = I_0 * np.exp(-(bfactor / (16 * np.square(np.pi))) * n * np.square(qmags)) hklI = OrderedDict((key, val) for key, val in zip(hklt.keys(), I_d)) hklp = OrderedDict((key, refp[key]) for key in hklI.keys()) return hklI, hklp, hklt, A
def test_merge_crystals(): import ProcessCrystals as proc res, grid_spacing, n_processes = 3.3, 0.5, 8 for pdb_id in ['4bfh', '6d6g']: # generate reference information and structure factors pdb_path = "./reference/pdb_files/%s.pdb" % pdb_id sg_symbol, sg_no, cell, cs = cctbx_utils.unit_cell_info(pdb_path) refI, refp = cctbx_utils.reference_sf(pdb_path, res, expand_to_p1=True, table='electron') # generate randomly-shifted, half-complete (in P1) mock data hklI1, hklp1, eshifts1 = mock_data.generate_mock_data(pdb_path, res, completeness=0.5, sigma=0.0) hklI2, hklp2, eshifts2 = mock_data.generate_mock_data(pdb_path, res, completeness=0.5, sigma=0.0) # merge crystals using MergeCrystals class mc = proc.MergeCrystals(space_group=sg_no, grid_spacing=grid_spacing) mc.add_crystal(hklI1, hklp1, np.array(cell), n_processes=n_processes, weighted=True) mc.add_crystal(hklI2, hklp2, np.array(cell), n_processes=n_processes, weighted=True) # check that fractional shift for merge matches expected value tol = np.max(grid_spacing / np.array(cell)[:3]) p_origins = np.array([0.0, 1.0]) c_origin = eshifts2 - eshifts1 + mc.fshifts[1] assert all( np.min(np.abs(p_origins - c_origin.reshape(3, 1)), axis=1) < tol) # check that intensities of merged data match reference Imerge, pmerge = mc.merge_values() assert all( np.array([(Imerge[hkl] - refI[hkl]) / Imerge[hkl] for hkl in Imerge.keys()]) < 1e-6) return
def test_reduce_crystals(): import ProcessCrystals as proc # set up paths and default values pdb_path, res = "./reference/pdb_files/4bfh.pdb", 3.0 sg_symbol, sg_no, cell, cs = cctbx_utils.unit_cell_info(pdb_path) refI, refp = cctbx_utils.reference_sf(pdb_path, res, expand_to_p1=True, table='electron') refI_mod = OrderedDict((key, np.array([val])) for key, val in refI.items()) refp_mod = OrderedDict((key, np.array([val])) for key, val in refp.items()) # check that reduced phases are internally consistent rc = proc.ReduceCrystals(refI_mod, refp_mod, cell, sg_symbol) p_asu = rc.reduce_phases(weighted=True) rc.reduce_intensities() eq = np.array([np.allclose(v, v[0]) for v in p_asu.values()]) assert all([ np.allclose(np.around(p_asu.values()[i]) % 180, 0) for i in np.where(eq == False)[0] ]) # check that reduced phases and intensities match reference hkl_asu = list( cs.build_miller_set(anomalous_flag=False, d_min=res).indices()) assert np.allclose( utils.wraptopi( np.array([rc.data['PHIB'][hkl] - refp[hkl] for hkl in hkl_asu])), 0) assert np.allclose( np.array([rc.data['IMEAN'][hkl] - refI[hkl] for hkl in hkl_asu]), 0) # check that shifting phases from origin leads to loss of symmetry-expected relationships rc.shift_phases(np.random.random(3)) p_asu = rc.reduce_phases(weighted=True) eq = np.array([np.allclose(v, v[0]) for v in p_asu.values()]) assert not all([ np.allclose(np.around(p_asu.values()[i]) % 180, 0) for i in np.where(eq == False)[0] ]) return
def test_residual_phases(): """ Validating residual_phases function with a different (slightly slower) implementation. """ pdb_path, res = "./reference/pdb_files/4bfh.pdb", 3.0 refI, refp = cctbx_utils.reference_sf(pdb_path, res, expand_to_p1=True, table='electron') refp_err = mock_data.add_phase_errors(refp, 10.0) p_shared = utils.shared_dict(refp, refp_err) p_vals = np.deg2rad(np.array(p_shared.values())) p1, p2 = p_vals[:, 0], p_vals[:, 1] pr_ref = np.rad2deg(np.arccos(np.cos(p1 - p2))) pr_est = utils.residual_phases(refp, refp_err) np.testing.assert_allclose(pr_ref, pr_est) return
def simulate_one_tilt(pdb_path, resolution, angle, A=np.eye(3), ang_width=1.0): """ Generate reflection data within +/- ang_width of input angle, and subject to a random, global phase shift. Inputs: ------- pdb_path: path to coordinates file resolution: maximum resolution to which to compute reflection data angle: tilt angle reflection data are centered on A: orientation matrix by which to rotate crystal, optional ang_width: half of increment within angle for which reflections will be kept Outputs: -------- hklI_sel: OrderedDict of retained Miller indices and corresponding intensities hklp_sel: OrderedDict of retained Miller indices and corresponding phases shifts: global phase shift reflection data were subjected to """ # compute reference information sg_symbol, sg_no, cell, cs = cctbx_utils.unit_cell_info(pdb_path) refI, refp = cctbx_utils.reference_sf(pdb_path, resolution, expand_to_p1=True) # rotate reflection data and compute tilt angles hklt = predict_angles(pdb_path, resolution, A) hkl, tilts = np.array(hklt.keys()), np.array(hklt.values()) # retain reflection data in observed angular range; add random phase shift hkl_sel = hkl[np.where((tilts > angle - ang_width) & (tilts < angle + ang_width))[0]] hkl_sel = [tuple(h) for h in hkl_sel] hkl_sel = utils.remove_Friedels(hkl_sel) hklI_sel = OrderedDict( (key, val) for key, val in refI.items() if key in hkl_sel) hklp_sel = OrderedDict( (key, val) for key, val in refp.items() if key in hkl_sel) hklp_sel, shifts = mock_data.add_random_phase_shift(hklp_sel) return hklI_sel, hklp_sel, shifts
def generate_mock_data(ref_path, resolution, completeness=1.0, hkl_sel=None, sigma=0.0): """ Generate a modified hklp dictionary in which phases have been shifted from the reference origin and phase errors have been introduced. Remove Millers either not listed in hkl_sel or to achieve specified completeness. Inputs: ------- ref_path: path to reference PDB file resolution: high-resolution limit of structure factors completeness: fraction of Millers to retain, default is 1.0 hkl_sel: list of Millers to retain; if None (default), retain all sigma: standard deviation for error distribution, default is 0.0 Outputs: -------- hklI: dict whose keys are Millers and values are intensities hklp: dict whose keys are Millers and values are phases, ordered as hklI shifts: fractional shifts by which phase origin has been translated """ hklI, hklp = cctbx_utils.reference_sf(ref_path, resolution, expand_to_p1=True, table='electron') if completeness != 1.0: hklI, hklp = remove_millers(hklI, hklp, completeness) if hkl_sel is not None: hklI, hklp = retain_millers(hklI, hklp, hkl_sel) hklp, shifts = add_random_phase_shift(hklp) hklp = add_phase_errors(hklp, sigma, friedels_same=True) return hklI, hklp, shifts
def assess_tr_data(args): """ Evaluate a series of mock crystals from randomly oriented tomograms. Inputs: ------- args: dict of pdb_path, n_processes, grid_spacing, resolution, n_cryst, data_paths Outputs: -------- results: dict of r_ref, r_sym, completeness, cc_map shifts: dict of shifts for merging, finding origin, comparing to reference """ # compute reference information pdb_input = iotbx.pdb.input(file_name=args['pdb_path']) ref_sg_symbol, ref_sg_no, ref_cell, ref_cs = cctbx_utils.unit_cell_info( args['pdb_path']) refI, refp = cctbx_utils.reference_sf(args['pdb_path'], args['resolution'], expand_to_p1=True, table='electron') refp_mod = OrderedDict((key, np.array([val])) for key, val in refp.items()) # set up dictionary for storing results results, shifts = dict(), dict() for key in [ 'r_ref', 'r_sym', 'completeness_p1', 'completeness_sg', 'cc_map', 'cc_I' ]: results[key] = np.zeros(args['n_cryst']) for key in ['reference', 'origin', 'merge']: shifts[key] = np.zeros((args['n_cryst'], 3)) if ref_sg_no == 1: shifts.pop('origin') # set up MergeCrystals class for scan over crystals mc = proc.MergeCrystals(space_group=ref_sg_no, grid_spacing=args['grid_spacing']) for num in range(args['n_cryst']): print "Intensites from: %s" % (args['data_paths']['intensities'][num]) print "Phases from: %s" % (args['data_paths']['phases'][num]) print "Indexing matrix from: %s" % (args['data_paths']['json'][num]) # load simulated data hklI = pickle.load(open(args['data_paths']['intensities'][num])) hklp = pickle.load(open(args['data_paths']['phases'][num])) sg_symbol, sg_no, cell, cs = cctbx_utils.unit_cell_info( args['data_paths']['json'][num]) assert sg_symbol == ref_sg_symbol # filter Millers below resolution cutoff and with negative intensities res = utils.compute_resolution(sg_no, cell, np.array(hklp.keys())) indices = np.where((res < args['resolution']) | (np.array(hklI.values()) < 0))[0] hkl_to_remove = list() for idx in indices: hkl_to_remove.append(hklp.keys()[idx]) for htr in hkl_to_remove: hklp.pop(htr) hklI.pop(htr) # merge crystal mc.add_crystal(hklI, hklp, np.array(cell), n_processes=args['n_processes'], weighted=True) hklI_avg, hklp_avg = mc.merge_values() avg_cell = tuple(np.average(mc.cell_constants.values(), axis=0)) # locate crystallographic origin if not triclinic if sg_no != 1: fo = proc.FindOrigin(sg_symbol, avg_cell, cs, hklp_avg, hklI_avg) dmetrics, shifts['origin'][num] = fo.scan_candidates( args['grid_spacing'], args['n_processes']) # shift phases to match reference for easier comparison comp = proc.CompareCrystals(avg_cell, hklp_avg, hklI_avg) m_grid, hklp_shifted, shifts['reference'][num] = comp.grid_shift( refp, args['grid_spacing'], args['n_processes'], refI) hklI_merge = OrderedDict((key, val) for key, val in mc.hklI.items()) hklp_merge = OrderedDict((key, val) for key, val in mc.hklp.items()) # reduce phases to asymmetric unit rc = proc.ReduceCrystals(hklI_merge, hklp_merge, avg_cell, sg_symbol) rc.shift_phases(shifts['reference'][num]) p_asu = rc.reduce_phases(weighted=True) rc.reduce_intensities() mtz_object = rc.generate_mtz() # examine cross-correlation with reference map savename = os.path.join(args['output'], "reduced%i.ccp4" % num) ma = cctbx_utils.mtz_to_miller_array(mtz_object) map_sim = cctbx_utils.compute_map(ma, save_name=savename) results['cc_map'][num] = cctbx_utils.compare_maps( map_sim, args['pdb_path']) # compute completeness -- both (non-anomalous) P1 and space group for cs_obj, tag in zip([cs, cs.cell_equivalent_p1()], ['completeness_sg', 'completeness_p1']): ma_calc = miller.array( miller_set=miller.set(cs_obj, flex.miller_index(rc.hklI.keys()), anomalous_flag=False), data=flex.double(np.ones(len(rc.hklI.keys())))) results[tag][num] = ma_calc.merge_equivalents().array( ).completeness() # assess quality of phases phib = OrderedDict( (key, np.array([val])) for key, val in rc.data['PHIB'].items()) results['r_ref'][num] = np.mean(utils.residual_phases(phib, refp_mod)) results['r_sym'][num] = np.mean(utils.residual_to_avgphase(p_asu)) # assess accuracy of intensities: log-log cross-correlation ivals = np.array(utils.shared_dict(refI, rc.data['IMEAN']).values()) results['cc_I'][num] = np.corrcoef(np.log10(ivals[:, 0]), np.log10(ivals[:, 1]))[0, 1] # examine errors in predicted fractional shifts if num != 0: shifts['merge'][num] = mc.fshifts[num] print "\n" return results, shifts
start_time = time.time() # extract command line arguments args = parse_commandline() args['resolution'] = 3.3 args[ 'grid_spacing'] = 0.5 # grid spacing used for crystallographic origin search args[ 'n_candidates'] = 150 # number of origin candidates to consider per tilt image args['threshold'] = 4.0 # phase residual threshold for adding a tilt image # reference information args['sg_symbol'], args['sg_no'], args['cell'], args[ 'cs'] = cctbx_utils.unit_cell_info(args['pdb_path']) refI, refp = cctbx_utils.reference_sf(args['pdb_path'], args['resolution'], expand_to_p1=True) refp_mod = OrderedDict((key, np.array([val])) for key, val in refp.items()) eq_origins = np.array([[0, 0, 0], [0, 0, 0.5], [0.5, 0.5, 0], [0.5, 0.5, 0.5]]) # additional set-up information pnames = os.path.join(args['dpath'], "pimage_*_p.pickle") tilt_angles = np.loadtxt(args['tilt_angles']) args['hkl_p1'], args['hkl_asu'] = retrieve_hkl(pnames, args['cs'], tilt_angles) remaining_angles = list(tilt_angles) # information to track during merging comp_sg, p_residuals = np.zeros(len(tilt_angles)), np.zeros( len(tilt_angles))
def assess_mock_data(args, rep): """ Evaluate a series of mock crystals with specified completeness and phase errors. Inputs: ------- args: dict specifying n_processes, grid_spacing, pdb_path, resolution, n_cryst, completeness, sigma, specs_path, tilt_list, bfactor rep: repetition number currently being computed Outputs: -------- results: dict of r_ref, r_sym, completeness, cc_map shifts: dict of expected, merging, and origin shifts hkl_tilts_Id: dict of retained hkl: np.array([tilt angle, intensity]) """ # compute reference information sg_symbol, sg_no, cell, cs = cctbx_utils.unit_cell_info(args['pdb_path']) refI, refp = cctbx_utils.reference_sf(args['pdb_path'], args['resolution'], expand_to_p1=True, table='electron') refp_mod = OrderedDict((key, np.array([val])) for key, val in refp.items()) # set up dictionary for storing results results, shifts, hkl_tilts_Id, A_matrices = dict(), dict(), dict(), dict() for key in [ 'sigma_i', 'r_ref_i', 'sigma', 'r_ref', 'r_sym', 'completeness_p1', 'completeness_sg', 'cc_map', 'cc_I' ]: results[key] = np.zeros(args['n_cryst']) for key in ['reference', 'expected', 'origin', 'merge']: shifts[key] = np.zeros((args['n_cryst'], 3)) if sg_no == 1: shifts.pop('origin') # set up MergeCrystals class for scan over crystals mc = proc.MergeCrystals(space_group=sg_no, grid_spacing=args['grid_spacing']) for num in range(args['n_cryst']): # apply analytic model of damage to a randomly-oriented crystal hklId, hklpd, hkltd, A_matrices[num] = mock_data.simulate_damage( args['specs_path'], args['pdb_path'], args['resolution'], args['tilt_list'], args['bfactor']) # retain highest intensity reflections for specified completeness hkl_sel = select_hkl(cs, args['resolution'], args['completeness'], hklId) # generate mock data based on above hkl list hklI, hklp, shifts['expected'][num] = mock_data.generate_mock_data( args['pdb_path'], args['resolution'], completeness=1.0, hkl_sel=hkl_sel, sigma=args['sigma']) hklI_d = OrderedDict((key, hklId[key]) for key in hklI.keys()) hkl_tilts_Id[num] = OrderedDict( (key, np.array([hkltd[key], hklI_d[key]])) for key in hklI.keys()) # compute starting phase errors and store sigma_i, m_error = initial_errors(hklp, refp, shifts['expected'][num]) results['sigma_i'][num], results['r_ref_i'][num] = sigma_i, m_error # add to MergeCrystals object mc.add_crystal(hklI_d, hklp, np.array(cell), n_processes=args['n_processes'], weighted=True) hklI_avg, hklp_avg = mc.merge_values() # locate crystallographic origin if not triclinic ##if sg_no != 1: ## fo = proc.FindOrigin(sg_symbol, cell, cs, hklp_avg, hklI_avg) ## dmetrics, shifts['origin'][num] = fo.scan_candidates(args['grid_spacing'], ## args['n_processes']) # shift phases to match reference for easier comparison comp = proc.CompareCrystals(cell, hklp_avg, hklI_avg) m_grid, hklp_shifted, shifts['reference'][num] = comp.grid_shift( refp, args['grid_spacing'], args['n_processes'], refI) hklI_merge = OrderedDict((key, val) for key, val in mc.hklI.items()) hklp_merge = OrderedDict((key, val) for key, val in mc.hklp.items()) # reduce phases to asymmetric unit rc = proc.ReduceCrystals(hklI_merge, hklp_merge, cell, sg_symbol) rc.shift_phases(shifts['reference'][num]) p_asu = rc.reduce_phases(weighted=True) rc.reduce_intensities() mtz_object = rc.generate_mtz() # examine completeness and cross-correlation with reference map #savename = "%s_r%in%i.ccp4" %(args['output'],rep,num) ma = cctbx_utils.mtz_to_miller_array(mtz_object) map_sim = cctbx_utils.compute_map(ma) results['cc_map'][num] = cctbx_utils.compare_maps( map_sim, args['pdb_path']) # compute completeness -- both (non-anomalous) P1 and space group for cs_obj, tag in zip([cs, cs.cell_equivalent_p1()], ['completeness_sg', 'completeness_p1']): ma_calc = miller.array( miller_set=miller.set(cs_obj, flex.miller_index(rc.hklI.keys()), anomalous_flag=False), data=flex.double(np.ones(len(rc.hklI.keys())))) results[tag][num] = ma_calc.merge_equivalents().array( ).completeness() # assess quality of phases results['sigma'][num], results['r_ref'][ num] = utils.residual_phase_distribution(refp, rc.data['PHIB']) results['r_sym'][num] = np.mean(utils.residual_to_avgphase(p_asu)) # assess accuracy of intensities: log-log cross-correlation ivals = np.array(utils.shared_dict(refI, rc.data['IMEAN']).values()) results['cc_I'][num] = np.corrcoef(np.log10(ivals[:, 0]), np.log10(ivals[:, 1]))[0, 1] # examine errors in predicted fractional shifts if num != 0: shifts['merge'][num] = mc.fshifts[num] print "\n" return results, shifts, hkl_tilts_Id, A_matrices