def get_map_stats_for_atoms (self, atoms) : from cctbx import maptbx from scitbx.array_family import flex sites_cart = flex.vec3_double() sites_cart_nonH = flex.vec3_double() values_2fofc = flex.double() values_fofc = flex.double() for atom in atoms : sites_cart.append(atom.xyz) if (not atom.element.strip() in ["H","D"]) : #XXX trap: neutrons? sites_cart_nonH.append(atom.xyz) site_frac = self.unit_cell.fractionalize(atom.xyz) values_2fofc.append(self.f_map.eight_point_interpolation(site_frac)) values_fofc.append(self.diff_map.eight_point_interpolation(site_frac)) if (len(sites_cart_nonH) == 0) : return None sel = maptbx.grid_indices_around_sites( unit_cell=self.unit_cell, fft_n_real=self.f_map.focus(), fft_m_real=self.f_map.all(), sites_cart=sites_cart, site_radii=get_atom_radii(atoms, self.atom_radius)) f_map_sel = self.f_map.select(sel) model_map_sel = self.model_map.select(sel) diff_map_sel = self.diff_map.select(sel) cc = flex.linear_correlation(x=f_map_sel, y=model_map_sel).coefficient() return group_args(cc=cc, mean_2fofc=flex.mean(values_2fofc), mean_fofc=flex.mean(values_fofc))
def show(self): b = self.bss_result print >> self.log, " Statistics in resolution bins:" #assert k_mask.size() == len(self.bin_selections) fmt=" %7.5f %6.2f -%6.2f %5.1f %5d %-6s %-6s %-6s %6.3f %6.3f %8.2f %6.4f" f_model = self.core.f_model.data() print >> self.log, " s^2 Resolution Compl Nrefl k_mask k_iso k_ani <Fobs> R" print >> self.log, " (A) (%) orig smooth average" k_mask_bin_orig_ = str(None) k_mask_bin_smooth_ = str(None) k_mask_bin_approx_ = str(None) for i_sel, cas in enumerate(self.cores_and_selections): selection, core, selection_use, sel_work = cas sel = sel_work ss_ = self.ss_bin_values[i_sel][2] if(b is not None and self.bss_result.k_mask_bin_orig is not None): k_mask_bin_orig_ = "%6.4f"%self.bss_result.k_mask_bin_orig[i_sel] if(b is not None and self.bss_result.k_mask_bin_smooth is not None): k_mask_bin_smooth_ = "%6.4f"%self.bss_result.k_mask_bin_smooth[i_sel] k_mask_bin_averaged_ = "%6.4f"%flex.mean(self.core.k_mask().select(sel)) d_ = self.d_spacings.data().select(sel) d_min_ = flex.min(d_) d_max_ = flex.max(d_) n_ref_ = d_.size() f_obs_ = self.f_obs.select(sel) f_obs_mean_ = flex.mean(f_obs_.data()) k_isotropic_ = flex.mean(self.core.k_isotropic.select(sel)) k_anisotropic_ = flex.mean(self.core.k_anisotropic.select(sel)) cmpl_ = f_obs_.completeness(d_max=d_max_)*100. r_ = bulk_solvent.r_factor(f_obs_.data(),f_model.select(sel),1) print >> self.log, fmt%(ss_, d_max_, d_min_, cmpl_, n_ref_, k_mask_bin_orig_, k_mask_bin_smooth_,k_mask_bin_averaged_, k_isotropic_, k_anisotropic_, f_obs_mean_, r_)
def optimize(self): # initialise the population please self.make_random_population() # score the population please self.score_population() converged = False monitor_score = flex.min( self.scores ) self.count = 0 while not converged: self.evolve() location = flex.min_index( self.scores ) if self.show_progress: if self.count%self.show_progress_nth_cycle==0: # make here a call to a custom print_status function in the evaluator function # the function signature should be (min_target, mean_target, best vector) self.evaluator.print_status( flex.min(self.scores), flex.mean(self.scores), self.population[ flex.min_index( self.scores ) ], self.count) self.count += 1 if self.count%self.monitor_cycle==0: if (monitor_score-flex.min(self.scores) ) < self.eps: converged = True else: monitor_score = flex.min(self.scores) rd = (flex.mean(self.scores) - flex.min(self.scores) ) rd = rd*rd/(flex.min(self.scores)*flex.min(self.scores) + self.eps ) if ( rd < self.eps ): converged = True if self.count>=self.max_iter: converged =True
def log_frame(experiments, reflections, params, run, n_strong, timestamp = None, two_theta_low = None, two_theta_high = None): app = dxtbx_xfel_db_application(params) db_run = app.get_run(run_number=run) if params.input.trial is None: db_trial = app.get_trial(trial_id = params.input.trial_id) params.input.trial = db_trial.trial else: db_trial = app.get_trial(trial_number = params.input.trial) if params.input.rungroup is None: db_event = app.create_event(timestamp = timestamp, run_id = db_run.id, trial_id = db_trial.id, n_strong = n_strong, two_theta_low = two_theta_low, two_theta_high = two_theta_high) else: db_event = app.create_event(timestamp = timestamp, run_id = db_run.id, trial_id = db_trial.id, rungroup_id = params.input.rungroup, n_strong = n_strong, two_theta_low = two_theta_low, two_theta_high = two_theta_high) if experiments is not None: assert len(experiments) == 1 db_experiment = app.create_experiment(experiments[0]) app.link_imageset_frame(db_experiment.imageset, db_event) d = experiments[0].crystal.get_unit_cell().d(reflections['miller_index']) for db_bin in db_experiment.crystal.cell.bins: # will be [] if there are no isoforms sel = (d <= float(db_bin.d_max)) & (d > float(db_bin.d_min)) sel &= reflections['intensity.sum.value'] > 0 refls = reflections.select(sel) n_refls = len(refls) Cell_Bin(app, count = n_refls, bin_id = db_bin.id, crystal_id = db_experiment.crystal.id, avg_intensity = flex.mean(refls['intensity.sum.value']) if n_refls > 0 else None, avg_sigma = flex.mean(flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None, avg_i_sigi = flex.mean(refls['intensity.sum.value'] / flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None)
def is_converged(self, rho_trial): result = False r = r_factor(self.f, self.f_mem, use_scale=False) if(r < self.convergence_r_threshold): if(self.xray_structure is None): self.r_factors.append(r) size = self.r_factors.size() if(size>=3): tmp = flex.mean(self.r_factors[size-3:]) if(tmp <= r or r <= self.convergence_at_r_factor): result = True else: f_mem = self.full_set.structure_factors_from_map( map = rho_trial, use_scale = False, anomalous_flag = False, use_sg = False) self.cc = f_mem.map_correlation(other = self.f_calc) self.cc_to_answer.append(self.cc) def max_change_so_far(x): result = flex.double() if(self.cc_to_answer.size()): for i in xrange(self.cc_to_answer.size()): if(i>0): result.append(self.cc_to_answer[i]-self.cc_to_answer[i-1]) return flex.max(result) size = self.cc_to_answer.size() if(size>=3): mcsf = max_change_so_far(x = self.cc_to_answer) tmp = flex.mean(self.cc_to_answer[size-3:]) if(tmp >= self.cc-1.e-6 or mcsf/5 >= self.cc-tmp): result = True else: self.cc=None return result
def exercise(): """ Exercise refine "easy" with DNA/RNA. """ pi_good = get_pdb_inputs(pdb_str=pdb_str_answer, restraints=False) map_data = get_map(xrs=pi_good.xrs) xrs_good = pi_good.xrs.deep_copy_scatterers() pi_good.ph.write_pdb_file(file_name="answer.pdb", crystal_symmetry=xrs_good.crystal_symmetry()) # pi_poor = get_pdb_inputs(pdb_str=pdb_str_poor, restraints=True) pi_poor.ph.write_pdb_file(file_name="poor.pdb") xrs_poor = pi_poor.xrs.deep_copy_scatterers() # d = xrs_good.distances(other=xrs_poor) print d.min_max_mean().as_tuple() assert flex.max(d)>2 assert flex.mean(d)>0.7 # xrs_refined = xrs_poor for i in xrange(3): ero = individual_sites.easy( map_data = map_data, xray_structure = xrs_refined, pdb_hierarchy = pi_poor.ph, geometry_restraints_manager = pi_poor.grm) xrs_refined = ero.xray_structure # comapre d = xrs_good.distances(other=xrs_refined) print d.min_max_mean().as_tuple() assert flex.max(d)<0.15 assert flex.mean(d)<0.03 ero.pdb_hierarchy.write_pdb_file(file_name="refined.pdb", crystal_symmetry=xrs_good.crystal_symmetry())
def exercise_gauss(): data = rt.normal_variate(mu=0,sigma=1,N=1000000) mu1 = flex.mean(data) mu2 = flex.mean(data*data) mu3 = flex.mean(data*data*data) assert approx_equal(mu1,0,eps=0.02) assert approx_equal(mu2,1,eps=0.02) assert approx_equal(mu3,0,eps=0.02)
def __init__ (self, ligand, pdb_hierarchy, xray_structure, two_fofc_map, fofc_map, fmodel_map, reference_ligands=None, two_fofc_map_cutoff=1.5, fofc_map_cutoff=-3.0) : from mmtbx import real_space_correlation from cctbx import adptbx from scitbx.array_family import flex atom_selection = ligand.atoms().extract_i_seq() assert (len(atom_selection) == 1) or (not atom_selection.all_eq(0)) manager = real_space_correlation.selection_map_statistics_manager( atom_selection=atom_selection, xray_structure=xray_structure, fft_m_real=two_fofc_map.all(), fft_n_real=two_fofc_map.focus(), exclude_hydrogens=True) stats_two_fofc = manager.analyze_map( map=two_fofc_map, model_map=fmodel_map, min=1.5) stats_fofc = manager.analyze_map( map=fofc_map, model_map=fmodel_map, min=-3.0) self.atom_selection = manager.atom_selection # XXX non-hydrogens only! sites_cart = xray_structure.sites_cart().select(self.atom_selection) self.xyz_center = sites_cart.mean() self.id_str = ligand.id_str() self.cc = stats_two_fofc.cc self.two_fofc_min = stats_two_fofc.min self.two_fofc_max = stats_two_fofc.max self.two_fofc_mean = stats_two_fofc.mean self.fofc_min = stats_fofc.min self.fofc_max = stats_fofc.max self.fofc_mean = stats_fofc.mean self.n_below_two_fofc_cutoff = stats_two_fofc.n_below_min self.n_below_fofc_cutoff = stats_fofc.n_below_min u_iso = xray_structure.extract_u_iso_or_u_equiv().select( self.atom_selection) u_iso_mean = flex.mean(u_iso) self.b_iso_mean = adptbx.u_as_b(u_iso_mean) occ = xray_structure.scatterers().extract_occupancies().select( self.atom_selection) self.occupancy_mean = flex.mean(occ) self.rmsds = self.pbss = None if (reference_ligands is not None) and (len(reference_ligands) > 0) : self.rmsds, self.pbss = compare_ligands_impl(ligand=ligand, reference_ligands=reference_ligands, max_distance_between_centers_of_mass=8.0, raise_sorry_if_no_matching_atoms=False, verbose=False, quiet=True)
def exercise_variate_generators(): from scitbx.random \ import variate, normal_distribution, bernoulli_distribution, \ gamma_distribution, poisson_distribution for i in xrange(10): scitbx.random.set_random_seed(0) g = variate(normal_distribution()) assert approx_equal(g(), -1.2780081289048213) assert approx_equal(g(10), (-0.40474189234755492, -0.41845505596083288, -1.8825790263067721, -1.5779112018107659, -1.1888174422378859, -1.8619619179878537, -0.53946818661388318, -1.2400941724410812, 0.64511959841907285, -0.59934120033270688)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 0, eps=0.005) assert approx_equal(stat.biased_variance, 1, eps=0.005) assert approx_equal(stat.skew, 0, eps=0.005) assert approx_equal(stat.kurtosis, 3, eps=0.005) bernoulli_seq = variate(bernoulli_distribution(0.1)) for b in itertools.islice(bernoulli_seq, 10): assert b in (True, False) bernoulli_sample = flex.bool(itertools.islice(bernoulli_seq, 10000)) assert approx_equal( bernoulli_sample.count(True)/len(bernoulli_sample), 0.1, eps = 0.01) scitbx.random.set_random_seed(0) g = variate(gamma_distribution()) assert approx_equal(g(), 0.79587450456577546) assert approx_equal(g(2), (0.89856038848394115, 1.2559307580473893)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 1, eps=0.005) assert approx_equal(stat.skew, 2, eps=0.005) assert approx_equal(stat.biased_variance, 1, eps=0.005) scitbx.random.set_random_seed(0) g = variate(gamma_distribution(alpha=2, beta=3)) assert approx_equal(g(), 16.670850592722729) assert approx_equal(g(2), (10.03662877519449, 3.9357158398972873)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 6, eps=0.005) assert approx_equal(stat.skew, 2/math.sqrt(2), eps=0.05) assert approx_equal(stat.biased_variance, 18, eps=0.05) mean = 10.0 pv = variate(poisson_distribution(mean)) draws = pv(1000000).as_double() m = flex.mean(draws) v = flex.mean(draws*draws) - m*m assert approx_equal(m,mean,eps=0.05) assert approx_equal(v,mean,eps=0.05)
def vectors(handle,all): from scitbx.array_family import flex x = flex.double() y = flex.double() xpred = flex.double() ypred = flex.double() for line in handle.readlines(): if line.find("CV ")!=0: continue tokens = line.split() obscen = matrix.col((float(tokens[2]),float(tokens[3]))) refcen = matrix.col((float(tokens[5]),float(tokens[6]))) obsspo = matrix.col((float(tokens[8]),float(tokens[9]))) predspo = matrix.col((float(tokens[11]),float(tokens[12]))) xpred.append(predspo[0]) ypred.append(predspo[1]) prediction = predspo-refcen observation = obsspo-obscen cv = prediction-observation x.append(cv[0]) y.append(cv[1]) if all: print "Plotting all %d spots one graph."%len(x) from matplotlib import pyplot as plt plt.plot(x,y,"r.") plt.show() return print len(x),len(y) #from spotfinder.applications.xfel.cxi_run3 import get_initial_cxi_scope #print "ONLY FOR RUN 3!!!" #params = get_initial_cxi_scope() #tiling = params.distl.detector_tiling print "ONLY FOR RUN 4!!!" tiling = [518, 439, 712, 624, 715, 439, 909, 624, 519, 652, 713, 837, 716, 652, 910, 837, 510, 19, 695, 213, 510, 216, 695, 410, 721, 19, 906, 213, 721, 216, 906, 410, 87, 233, 281, 418, 284, 233, 478, 418, 88, 20, 282, 205, 285, 20, 479, 205, 108, 447, 293, 641, 108, 644, 293, 838, 321, 445, 506, 639, 321, 642, 506, 836, 437, 853, 622, 1047, 437, 1050, 622, 1244, 649, 853, 834, 1047, 649, 1050, 834, 1244, 19, 1069, 213, 1254, 216, 1069, 410, 1254, 18, 856, 212, 1041, 215, 856, 409, 1041, 230, 1282, 415, 1476, 230, 1479, 415, 1673, 16, 1282, 201, 1476, 16, 1479, 201, 1673, 442, 1469, 636, 1654, 639, 1469, 833, 1654, 443, 1257, 637, 1442, 640, 1257, 834, 1442, 852, 1137, 1046, 1322, 1049, 1137, 1243, 1322, 852, 925, 1046, 1110, 1049, 925, 1243, 1110, 1067, 1350, 1252, 1544, 1067, 1547, 1252, 1741, 854, 1352, 1039, 1546, 854, 1549, 1039, 1743, 1280, 1342, 1474, 1527, 1477, 1342, 1671, 1527, 1282, 1554, 1476, 1739, 1479, 1554, 1673, 1739, 1467, 924, 1652, 1118, 1467, 1121, 1652, 1315, 1255, 925, 1440, 1119, 1255, 1122, 1440, 1316, 1142, 521, 1327, 715, 1142, 718, 1327, 912, 930, 521, 1115, 715, 930, 718, 1115, 912, 1359, 514, 1553, 699, 1556, 514, 1750, 699, 1358, 727, 1552, 912, 1555, 727, 1749, 912, 1353, 92, 1538, 286, 1353, 289, 1538, 483, 1565, 91, 1750, 285, 1565, 288, 1750, 482, 932, 111, 1126, 296, 1129, 111, 1323, 296, 931, 323, 1125, 508, 1128, 323, 1322, 508] for itile in xrange(len(tiling)//4): print "tile",itile, print "(%4d %4d)-(%4d %4d)"%tuple(tiling[4*itile:4*itile+4]), selection = flex.bool() for i in xrange(len(x)): selection.append( tiling[4*itile+0]<xpred[i]<tiling[4*itile+2] and tiling[4*itile+1]<ypred[i]<tiling[4*itile+3] ) print "in selection of %d/%d"%(selection.count(True),len(selection)) if selection.count(True)<10:continue from matplotlib import pyplot as plt plt.plot(x.select(selection),y.select(selection),"r.") plt.plot([flex.mean(x.select(selection))],[flex.mean(y.select(selection))],"go") print "Delta x=%.1f"%flex.mean(x.select(selection)),"Delta y=%.1f"%flex.mean(y.select(selection)) plt.show()
def plot_uc_3Dplot(self, info): assert self.interactive import numpy as np from mpl_toolkits.mplot3d import Axes3D # import dependency fig = self.plt.figure(figsize=(12, 10)) # Extract uc dimensions from info list a = flex.double([i['a'] for i in info]) b = flex.double([i['b'] for i in info]) c = flex.double([i['c'] for i in info]) alpha = flex.double([i['alpha'] for i in info]) beta = flex.double([i['beta'] for i in info]) gamma = flex.double([i['gamma'] for i in info]) n_total = len(a) accepted = flex.bool(n_total, True) for d in [a, b, c, alpha, beta, gamma]: outliers = self.reject_outliers(d) accepted &= ~outliers a = a.select(accepted) b = b.select(accepted) c = c.select(accepted) AA = "a-edge (%.2f +/- %.2f $\AA$)" % (flex.mean(a), flex.mean_and_variance(a).unweighted_sample_standard_deviation()) BB = "b-edge (%.2f +/- %.2f $\AA$)" % (flex.mean(b), flex.mean_and_variance(b).unweighted_sample_standard_deviation()) CC = "c-edge (%.2f +/- %.2f $\AA$)" % (flex.mean(c), flex.mean_and_variance(c).unweighted_sample_standard_deviation()) subset = min(len(a),1000) flex.set_random_seed(123) rnd_sel = flex.random_double(len(a))<(subset/n_total) a = a.select(rnd_sel) b = b.select(rnd_sel) c = c.select(rnd_sel) fig.suptitle('{} randomly selected cells out of total {} images' ''.format(len(a),n_total), fontsize=18) ax = fig.add_subplot(111, projection='3d') for ia in xrange(len(a)): ax.scatter(a[ia],b[ia],c[ia],c='r',marker='+') ax.set_xlabel(AA) ax.set_ylabel(BB) ax.set_zlabel(CC)
def explore(self): if(self.count == 0): self.T=(flex.mean(flex.pow2(self.simplexValue - flex.mean(self.simplexValue) )))**0.5 * 10.0 self.min_T = self.T /self.T_ratio elif(self.count%self.Nstep == 0): self.T = self.T*self.coolfactor for kk in range(1,self.dimension+1): self.FindCentroidPt(self.dimension+1-kk) self.FindReflectionPt(kk) self.sort() return # end of this explore step
def exercise_poisson(): import random random.seed(0) # failure rate is fairly high without fixed seed a = rt.poisson_variate(100000,1).as_double() m = flex.mean(a) v = flex.mean(a*a)-m*m assert approx_equal(m,1.0,eps=0.05) assert approx_equal(v,1.0,eps=0.05) a = rt.poisson_variate(100000,10).as_double() m = flex.mean(a) v = flex.mean(a*a)-m*m assert approx_equal(m,10.0,eps=0.05) assert approx_equal(v,10.0,eps=0.05)
def exercise_ellipsoidal_truncation(space_group_info, n_sites=100, d_min=1.5): xrs = random_structure.xray_structure( space_group_info=space_group_info, elements=(("O", "N", "C") * (n_sites // 3 + 1))[:n_sites], volume_per_atom=50, min_distance=1.5, ) f_obs = abs(xrs.structure_factors(d_min=d_min).f_calc()) # exercise reciprocal_space_vector() for mi, d in zip(f_obs.indices(), f_obs.d_spacings().data()): rsv = flex.double(f_obs.unit_cell().reciprocal_space_vector(mi)) assert approx_equal(d, 1.0 / math.sqrt(rsv.dot(rsv))) ## print f_obs.unit_cell() f = flex.random_double(f_obs.data().size()) * flex.mean(f_obs.data()) / 10 # f_obs1 = f_obs.customized_copy(data=f_obs.data(), sigmas=f_obs.data() * f) print "datat in:", f_obs1.data().size() r = f_obs1.ellipsoidal_truncation_by_sigma(sigma_cutoff=1) print "data left:", r.data().size() r.miller_indices_as_pdb_file(file_name="indices1.pdb", expand_to_p1=False) r.miller_indices_as_pdb_file(file_name="indices2.pdb", expand_to_p1=True) # f_obs.miller_indices_as_pdb_file(file_name="indices3.pdb", expand_to_p1=False) f_obs.miller_indices_as_pdb_file(file_name="indices4.pdb", expand_to_p1=True) print "*" * 25
def __init__(self, app, detector_id = None, detector = None, **kwargs): assert [detector_id, detector].count(None) == 1 if detector is not None: kwargs['distance'] = flex.mean(flex.double([p.get_distance() for p in detector])) db_proxy.__init__(self, app, "%s_detector" % app.params.experiment_tag, id=detector_id, **kwargs) self.detector_id = self.id
def test1(): dials_regression = libtbx.env.find_in_repositories( relative_path="dials_regression", test=os.path.isdir) data_dir = os.path.join(dials_regression, "centroid_test_data") datablock_path = os.path.join(data_dir, "datablock.json") # work in a temporary directory cwd = os.path.abspath(os.curdir) tmp_dir = open_tmp_directory(suffix="tst_rs_mapper") os.chdir(tmp_dir) cmd = 'dials.rs_mapper ' + datablock_path + ' map_file="junk.ccp4"' result = easy_run.fully_buffered(command=cmd).raise_if_errors() # load results from iotbx import ccp4_map from scitbx.array_family import flex m = ccp4_map.map_reader(file_name="junk.ccp4") assert len(m.data) == 7189057 assert approx_equal(m.header_min, -1.0) assert approx_equal(flex.min(m.data), -1.0) assert approx_equal(m.header_max, 2052.75) assert approx_equal(flex.max(m.data), 2052.75) assert approx_equal(m.header_mean, 0.018606403842568398) assert approx_equal(flex.mean(m.data), 0.018606403842568398) print "OK" return
def blank_integrated_analysis(reflections, scan, phi_step, fractional_loss): prf_sel = reflections.get_flags(reflections.flags.integrated_prf) if prf_sel.count(True) > 0: reflections = reflections.select(prf_sel) intensities = reflections["intensity.prf.value"] variances = reflections["intensity.prf.variance"] else: sum_sel = reflections.get_flags(reflections.flags.integrated_sum) reflections = reflections.select(sum_sel) intensities = reflections["intensity.sum.value"] variances = reflections["intensity.sum.variance"] i_sigi = intensities / flex.sqrt(variances) xyz_px = reflections["xyzobs.px.value"] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc phi_min = flex.min(phi) phi_max = flex.max(phi) n_steps = iceil((phi_max - phi_min) / phi_step) hist = flex.histogram(z_px, n_slots=n_steps) mean_i_sigi = flex.double() for i, slot_info in enumerate(hist.slot_infos()): sel = (z_px >= slot_info.low_cutoff) & (z_px < slot_info.high_cutoff) if sel.count(True) == 0: mean_i_sigi.append(0) else: mean_i_sigi.append(flex.mean(i_sigi.select(sel))) fractional_mean_i_sigi = mean_i_sigi / flex.max(mean_i_sigi) potential_blank_sel = mean_i_sigi <= (fractional_loss * flex.max(mean_i_sigi)) xmin, xmax = zip(*[(slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos()]) d = { "data": [ { "x": list(hist.slot_centers()), "y": list(mean_i_sigi), "xlow": xmin, "xhigh": xmax, "blank": list(potential_blank_sel), "type": "bar", "name": "blank_counts_analysis", } ], "layout": {"xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0}, } blank_regions = blank_regions_from_sel(d["data"][0]) d["blank_regions"] = blank_regions return d
def __init__(self, target_map, pdb_hierarchy, atom_radius, use_adp_restraints, nproc, log=None): adopt_init_args(self, locals()) self.xray_structure = self.pdb_hierarchy.extract_xray_structure( crystal_symmetry = self.target_map.miller_array.crystal_symmetry()) b_isos = self.xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) self.xray_structure = self.xray_structure.set_b_iso( value = flex.mean(b_isos)) #for rg in self.pdb_hierarchy.residue_groups(): # sel = rg.atoms().extract_i_seq() # sel = flex.bool(b_isos.size(), sel) # self.xray_structure = self.xray_structure.set_b_iso( # value = flex.mean(b_isos.select(sel)), # selection = sel) self.pdb_hierarchy.adopt_xray_structure(self.xray_structure) self.chain_selections = [] for chain in self.pdb_hierarchy.chains(): self.chain_selections.append(chain.atoms().extract_i_seq())
def get_sites_cc (self, atoms, sites=None) : from cctbx import maptbx from scitbx.array_family import flex radii = flex.double() for atom in atoms : if (atom.element.strip() in ["H", "D"]) : radii.append(1.) else : radii.append(1.5) fcalc_map = self.fcalc_real_map if (sites is None) : sites = atoms.extract_xyz() else : fcalc_map = self.get_new_fcalc_map( sites_new=sites, i_seqs=atoms.extract_i_seq()) sel = maptbx.grid_indices_around_sites( unit_cell = self.unit_cell, fft_n_real = self.n_real, fft_m_real = self.m_real, sites_cart = sites, site_radii = radii) m1 = self.real_map.select(sel) m2 = fcalc_map.select(sel) cc = flex.linear_correlation(x=m1, y=m2).coefficient() return group_args( cc=cc, map_mean=flex.mean(m1.as_1d()))
def residual_contribution(self, sites_current, sites_average): diff = sites_current - sites_average self.rms_with_respect_to_average.append(flex.mean(diff.dot())**0.5) self.number_of_restraints += diff.size() self.residual_sum += self.weight * diff.sum_sq() if (self.gradients is not None): return (2 * self.weight) * diff
def create_sigmas (f_obs, params, wilson_b=None, return_as_amplitudes=False) : assert (f_obs.sigmas() is None) from scitbx.array_family import flex i_obs = f_obs.f_as_f_sq() i_norm = i_obs.data() / flex.mean(i_obs.data()) profiler = profile_sigma_generator( mtz_file=params.noise_profile_file, pdb_file=params.profile_model_file, wilson_b=wilson_b, data_label=params.profile_data_label, n_resolution_bins=params.n_resolution_bins, n_intensity_bins=params.n_intensity_bins) sigmas = flex.double(i_norm.size(), 0.0) i_obs.setup_binner(n_bins=params.n_resolution_bins) for j_bin in i_obs.binner().range_used() : bin_sel = i_obs.binner().selection(j_bin) shell_profile = profiler.get_noise_profile_for_shell(j_bin) for k in bin_sel.iselection() : i_over_sigma = shell_profile.get_i_over_sigma(i_norm[k]) sigmas[k] = i_obs.data()[k] / i_over_sigma i_new = i_obs.customized_copy(sigmas=sigmas) if (return_as_amplitudes) : return i_new.f_as_f_sq() else : return i_new
def update(self, xray_structure, accept_as_is=True): if(not accept_as_is): current_map = self.compute_map(xray_structure = xray_structure) sites_cart = xray_structure.sites_cart() sites_cart_ = self.xray_structure.sites_cart() for r in self.residue_monitors: sca = sites_cart.select(r.selection_all) scs = sites_cart.select(r.selection_sidechain) scb = sites_cart.select(r.selection_backbone) map_cc_all = self.map_cc(sites_cart = sca, other_map = current_map) map_cc_sidechain = self.map_cc(sites_cart = scs, other_map = current_map) map_cc_backbone = self.map_cc(sites_cart = scb, other_map = current_map) flag = map_cc_all >= r.map_cc_all and \ map_cc_backbone >= r.map_cc_backbone and \ map_cc_sidechain>= r.map_cc_sidechain if(flag): residue_sites_cart_new = sites_cart.select(r.selection_all) sites_cart_ = sites_cart_.set_selected(r.selection_all, residue_sites_cart_new) xray_structure = xray_structure.replace_sites_cart(sites_cart_) # re-initialize monitor self.dist_from_previous = flex.mean(self.xray_structure.distances( other = xray_structure)) self.xray_structure = xray_structure self.pdb_hierarchy.adopt_xray_structure(xray_structure) self.initialize() self.states_collector.add(sites_cart = xray_structure.sites_cart()) self.assert_pdb_hierarchy_xray_structure_sync()
def box_iterator(self): b = maptbx.boxes( n_real = self.atom_map_asu.focus(), fraction = self.box_size_as_fraction, max_boxes= self.max_boxes, log = self.log) def get_wide_box(s,e): # define wide box: neutral + phased volumes if(self.neutral_volume_box_cushion_width>0): sh = self.neutral_volume_box_cushion_width ss = [max(s[i]-sh,0) for i in [0,1,2]] ee = [min(e[i]+sh,n_real_asu[i]) for i in [0,1,2]] else: ss,ee = s,e return ss,ee n_real_asu = b.n_real n_boxes = len(b.starts) i_box = 0 for s,e in zip(b.starts, b.ends): i_box+=1 sw,ew = get_wide_box(s=s,e=e) fmodel_omit = self.omit_box(start=sw, end=ew) r = fmodel_omit.r_work() self.r.append(r) # for tests only if(self.log): print >> self.log, "r(curr,min,max,mean)=%6.4f %6.4f %6.4f %6.4f"%(r, flex.min(self.r), flex.max(self.r), flex.mean(self.r)), i_box, n_boxes omit_map_data = self.asu_map_from_fmodel( fmodel=fmodel_omit, map_type=self.map_type) maptbx.copy_box( map_data_from = omit_map_data, map_data_to = self.map_result_asu, start = s, end = e) self.map_result_asu.reshape(self.acc_asu)
def get_mean_statistic_for_resolution (d_min, stat_type, range=0.2, out=None) : if (out is None) : out = sys.stdout from scitbx.array_family import flex pkl_file = libtbx.env.find_in_repositories( relative_path = "chem_data/polygon_data/all_mvd.pickle", test = os.path.isfile) db = easy_pickle.load(pkl_file) all_d_min = db['high_resolution'] stat_values = db[stat_type] values_for_range = flex.double() for (d_, v_) in zip(all_d_min, stat_values) : try : d = float(d_) v = float(v_) except ValueError : continue else : if (d > (d_min - range)) and (d < (d_min + range)) : values_for_range.append(v) h = flex.histogram(values_for_range, n_slots=10) print >> out, " %s for d_min = %.3f - %.3f A" % (stat_names[stat_type], d_min-range, d_min+range) min = flex.min(values_for_range) max = flex.max(values_for_range) mean = flex.mean(values_for_range) print >> out, " count: %d" % values_for_range.size() print >> out, " min: %.2f" % min print >> out, " max: %.2f" % max print >> out, " mean: %.2f" % mean print >> out, " histogram of values:" h.show(prefix=" ") return mean
def run(b_iso=10): xrs = iotbx.pdb.input(source_info=None, lines=pdb_str).xray_structure_simple() xrs = xrs.set_b_iso(value=b_iso) xrs.scattering_type_registry( table = "n_gaussian", d_min = 0., types_without_a_scattering_contribution=["?"]) n_real = [100,100,100] pixel_volume = xrs.unit_cell().volume()/(n_real[0]*n_real[1]*n_real[2]) map_data_3d = mmtbx.real_space.sampled_model_density( xray_structure = xrs, n_real = n_real).data()*pixel_volume dist, map_data_2d = maptbx.map_peak_3d_as_2d( map_data = map_data_3d, unit_cell = xrs.unit_cell(), center_cart = xrs.sites_cart()[0], radius = 3.0) # map_data_2d_exact = flex.double() ed = xrs._scattering_type_registry.gaussian("Ca") for r in dist: map_data_2d_exact.append(ed.electron_density(r, b_iso)) map_data_2d_exact = map_data_2d_exact * pixel_volume # assert approx_equal(flex.sum(map_data_3d), 20, 0.1) # Page 556, Int.Tables. assert flex.mean(abs(map_data_2d-map_data_2d_exact)) < 1.e-4
def finalize_model (pdb_hierarchy, xray_structure, set_b_iso=None, convert_to_isotropic=None, selection=None) : """ Prepare a rebuilt model for refinement, optionally including B-factor reset. """ from cctbx import adptbx from scitbx.array_family import flex pdb_atoms = pdb_hierarchy.atoms() if (selection is None) : selection = flex.bool(pdb_atoms.size(), True) elif isinstance(selection, str) : sel_cache = pdb_hierarchy.atom_selection_cache() selection = sel_cache.selection(selection) for i_seq, atom in enumerate(pdb_atoms) : assert (atom.parent() is not None) atom.segid = "" sc = xray_structure.scatterers()[i_seq] sc.label = atom.id_str() if (convert_to_isotropic) : xray_structure.convert_to_isotropic(selection=selection.iselection()) if (set_b_iso is not None) : if (set_b_iso is Auto) : u_iso = xray_structure.extract_u_iso_or_u_equiv() set_b_iso = adptbx.u_as_b(flex.mean(u_iso)) / 2 xray_structure.set_b_iso(value=set_b_iso, selection=selection) pdb_hierarchy.adopt_xray_structure(xray_structure) pdb_atoms.reset_serial() pdb_atoms.reset_i_seq() pdb_atoms.reset_tmp()
def compute_functional_and_gradients(self): from scitbx.array_family import flex import math self.model_mean_x = flex.double(len(self.observed_x)) self.model_mean_y = flex.double(len(self.observed_x)) for x in xrange(6): selection = (self.master_groups==x) self.model_mean_x.set_selected(selection, self.x[2*x]) self.model_mean_y.set_selected(selection, self.x[2*x+1]) delx = self.observed_x - self.model_mean_x dely = self.observed_y - self.model_mean_y delrsq = delx*delx + dely*dely f = flex.sum(delrsq) gradients = flex.double([0.]*12) for x in xrange(6): selection = (self.master_groups==x) gradients[2*x] = -2. * flex.sum( delx.select(selection) ) gradients[2*x+1]=-2. * flex.sum( dely.select(selection) ) if self.verbose: print "Functional ",math.sqrt(flex.mean(delrsq)) self.count_iterations += 1 return f,gradients
def compute_functional_and_gradients(self): # HATTNE does never enter this function print "HATTNE entering mark1.compute_functional_and_gradients" self.model_calcx = self.spotcx.deep_copy() self.model_calcy = self.spotcy.deep_copy() for x in xrange(64): selection = self.selections[x] self.model_calcx.set_selected(selection, self.model_calcx + self.x[2 * x]) self.model_calcy.set_selected(selection, self.model_calcy + self.x[2 * x + 1]) squares = self.delrsq_functional(self.model_calcx, self.model_calcy) f = flex.sum(squares) calc_obs_diffx = self.model_calcx - self.spotfx calc_obs_diffy = self.model_calcy - self.spotfy gradients = flex.double([0.0] * 128) for x in xrange(64): selection = self.selections[x] gradients[2 * x] = 2.0 * flex.sum(calc_obs_diffx.select(selection)) gradients[2 * x + 1] = 2.0 * flex.sum(calc_obs_diffy.select(selection)) print "Functional ", math.sqrt(flex.mean(squares)) return f, gradients
def run(): xrs = random_structure.xray_structure( space_group_info = sgtbx.space_group_info("P1"), elements = ["N"]*500, unit_cell = (20, 30, 40, 70, 80, 120)) xrs = xrs.set_b_iso(value=25) d_mins = [1.5] result = [] for d_min in d_mins: f_obs = abs(xrs.structure_factors(d_min=d_min).f_calc()) f_obs = f_obs.customized_copy(data = f_obs.data() * 135.) shifts = [0.0, 0.3] for xyz_shake_amount in shifts: xrs_shaken = xrs.deep_copy_scatterers() xrs_shaken.shake_sites_in_place(mean_distance = xyz_shake_amount) ml_err = flex.double() ml_err_new = flex.double() for trial in xrange(10): r_free_flags = f_obs.generate_r_free_flags() fmodel = mmtbx.f_model.manager( f_obs = f_obs, r_free_flags = r_free_flags, xray_structure = xrs_shaken) ml_err_ = fmodel.model_error_ml() ml_err.append(ml_err_) result.append(flex.mean(ml_err)) assert result[0] > 0 and result[0] < 0.03 assert result[1] > 0.2 and result[1] <= 0.32
def collect(O, rmsd_t_c, param_values): mins = [flex.min(a) for a in rmsd_t_c] means = [flex.mean(a) for a in rmsd_t_c] if (mins[0] < mins[1]): a = "t" else: a = "c" if (means[0] < means[1]): b = "t" else: b = "c" O.data[a+b].append(param_values)
def __init__(self, evaluator, population_size=50, f=None, cr=0.9, eps=1e-2, n_cross=1, max_iter=10000, monitor_cycle=200, out=None, show_progress=False, show_progress_nth_cycle=1, insert_solution_vector=None, dither_constant=0.4): self.dither = dither_constant self.show_progress = show_progress self.show_progress_nth_cycle = show_progress_nth_cycle self.evaluator = evaluator self.population_size = population_size self.f = f self.cr = cr self.n_cross = n_cross self.max_iter = max_iter self.monitor_cycle = monitor_cycle self.vector_length = evaluator.n self.eps = eps self.population = [] self.seeded = False if insert_solution_vector is not None: assert len(insert_solution_vector) == self.vector_length self.seeded = insert_solution_vector for ii in xrange(self.population_size): self.population.append(flex.double(self.vector_length, 0)) self.scores = flex.double(self.population_size, 1000) self.optimize() self.best_score = flex.min(self.scores) self.best_vector = self.population[flex.min_index(self.scores)] self.evaluator.x = self.best_vector if self.show_progress: self.evaluator.print_status( flex.min(self.scores), flex.mean(self.scores), self.population[flex.min_index(self.scores)], 'Final')
def get_binned_intensities(self, n_bins=100): """ Using self.ISIGI, bin the intensities using the following procedure: 1) Find the minimum and maximum intensity values. 2) Divide max-min by n_bins. This is the bin step size The effect is @param n_bins number of bins to use. @return a tuple with an array of selections for each bin and an array of median intensity values for each bin. """ print >> self.log, "Computing intensity bins.", all_mean_Is = flex.double() only_means = flex.double() for hkl_id in range(self.scaler.n_refl): hkl = self.scaler.miller_set.indices()[hkl_id] if hkl not in self.scaler.ISIGI: continue n = len(self.scaler.ISIGI[hkl]) # get scaled intensities intensities = flex.double( [self.scaler.ISIGI[hkl][i][0] for i in range(n)]) meanI = flex.mean(intensities) only_means.append(meanI) all_mean_Is.extend(flex.double([meanI] * n)) step = (flex.max(only_means) - flex.min(only_means)) / n_bins print >> self.log, "Bin size:", step sels = [] binned_intensities = [] min_all_mean_Is = flex.min(all_mean_Is) for i in range(n_bins): sel = (all_mean_Is > (min_all_mean_Is + step * i)) & (all_mean_Is < (min_all_mean_Is + step * (i + 1))) if sel.all_eq(False): continue sels.append(sel) binned_intensities.append((step / 2 + step * i) + min(only_means)) for i, (sel, intensity) in enumerate(zip(sels, binned_intensities)): print >> self.log, "Bin %02d, number of observations: % 10d, midpoint intensity: %f" % ( i, sel.count(True), intensity) return sels, binned_intensities
def __init__(self, tophat, normal, plot): # take U-mats from two different distributions, apply them to unit vectors, and plot if plot: from matplotlib import pyplot as plt fig, axes = plt.subplots(2, 2,figsize=(8,7)) else: axes = ((1,2),(3,4)) #dummy # columns plot the transformation of x, y, and z unit vectors rows = [tophat,normal] differences = [] for irow,dist in enumerate(rows): iaxes = axes[irow]; cube_diag = math.sqrt(1./3) # 0.57735 for icol, RLP in enumerate([(0,0,1), (cube_diag, cube_diag, cube_diag)]): RLP = col(RLP) print("(%7.5f %7.5f %7.5f)"%(RLP.elems),"Vector length:%8.6f"%(RLP.length()), end=' ') axis = iaxes[icol] unit = RLP.normalize() seed = col((1,0,0)) perm2 = unit.cross(seed) perm3 = unit.cross(perm2) a2 = flex.double(); a3 = flex.double() difference_vectors = flex.vec3_double() for u in dist: U = sqr(u) newvec = U * RLP difference_vectors.append( newvec-RLP ) a2.append(newvec.dot(perm2)); a3.append(newvec.dot(perm3)) rms = math.sqrt( flex.mean ( difference_vectors.dot(difference_vectors) ) ) print("The rms difference is", rms) differences.append(rms) if plot: axis.plot (a2,a3,'r,') axis.set_aspect("equal") axis.set_title("Transformation of v=%s"%("(%5.3f,%5.3f,%5.3f)"%(RLP.elems))) axis.set_xlim(-0.05,0.05) axis.set_ylim(-0.05,0.05) assert approx_equal(differences[0],differences[1],eps=1e-04), \ "RMS mosaic distribution for axis vector and diagonal vector should be similar, as proposed by J Holton" if plot: plt.show()
def get_active_data_percentile(self): data = self.imgobj.linearintdata indexing = [] for asic in self.corners: block = data.matrix_copy_block(i_row=asic[0], i_column=asic[1], n_rows=asic[2] - asic[0], n_columns=asic[3] - asic[1]) active_data = block.as_1d().as_double() order = flex.sort_permutation(active_data) if self.verbose: print("The mean is ", flex.mean(active_data), "on %d pixels" % len(active_data)) print("The 90-percentile pixel is ", active_data[order[int(0.9 * len(active_data))]]) print("The 99-percentile pixel is ", active_data[order[int(0.99 * len(active_data))]]) percentile90 = active_data[order[int(0.9 * len(active_data))]] maximas = flex.vec2_double() for idx in range( len(active_data) - 1, int(0.9 * len(active_data)), -1): if active_data[order[idx]] > 2.0 * percentile90: if self.verbose: print(" ", idx, active_data[order[idx]]) irow = order[idx] // (asic[3] - asic[1]) icol = order[idx] % (asic[3] - asic[1]) #self.green.append((asic[0]+irow, asic[1]+icol)) maximas.append((irow, icol)) CLUS = clustering(maximas) #coords = CLUS.as_spot_max_pixels(block,asic) coords = CLUS.as_spot_center_of_mass(block, asic, percentile90) intensities = CLUS.intensities for coord, height in zip(coords, intensities): self.green.append(coord) indexing.append(( coord[0] * float(self.inputpd["pixel_size"]), coord[1] * float(self.inputpd["pixel_size"]), 0.0, # 0 -degree offset for still image height)) return indexing
def calc_monte_carlo_population_covariances(mats, mean_matrix): """Given the sequence of matrices mats, calculate the variance-covariance matrix of the elements, using the known mean values for each elt in mat to avoid the approximation implied in taking the sample covariance""" # check input assert all([e.is_square() for e in mats]) n = mats[0].n_rows() assert all([e.n_rows() == n for e in mats]) # create an empty var-cov matrix covmat = flex.double(flex.grid(n**2, n**2), 0.0) for i in range(covmat.all()[0]): for j in range(covmat.all()[1]): resid_a = flex.double([m[i] - mean_matrix[i] for m in mats]) resid_b = flex.double([m[j] - mean_matrix[j] for m in mats]) covmat[i, j] = flex.mean(resid_a * resid_b) return covmat
def initialize(self): self.assert_pdb_hierarchy_xray_structure_sync() five_cc_o = five_cc(map=self.target_map_object.map_data, xray_structure=self.xray_structure, d_min=self.target_map_object.d_min, compute_cc_box=True, compute_cc_image=False, compute_cc_mask=True, compute_cc_volume=False, compute_cc_peaks=False) self.cc_mask = five_cc_o.cc_mask self.cc_box = five_cc_o.cc_box if (self.geometry_restraints_manager is not None): es = self.geometry_restraints_manager.energies_sites( sites_cart=self.xray_structure.sites_cart()) self.rmsd_a = es.angle_deviations()[2] self.rmsd_b = es.bond_deviations()[2] self.dist_from_start = flex.mean( self.xray_structure_start.distances(other=self.xray_structure)) self.assert_pdb_hierarchy_xray_structure_sync()
def run(space_group_info): """ Make sure it work for all space groups and boxes with non-zero origin. """ # make up data xrs = random_structure.xray_structure( space_group_info = space_group_info, volume_per_atom = 50, general_positions_only = False, u_iso = 0.3, elements = ('C', 'N', 'O', "S")*10, min_distance = 1.5) xrs.scattering_type_registry(table="wk1995") f_calc = xrs.structure_factors(d_min=2).f_calc() f_obs = abs(f_calc) # create fmodel object fmodel = mmtbx.f_model.manager( xray_structure = xrs, f_obs = f_obs) fmodel.update_all_scales() mc1 = fmodel.electron_density_map().map_coefficients( map_type = "2mFo-DFc", isotropize = False, exclude_free_r_reflections=False, fill_missing = False) crystal_gridding = fmodel.f_obs().crystal_gridding( d_min = fmodel.f_obs().d_min(), symmetry_flags = maptbx.use_space_group_symmetry, resolution_factor = 1./3) # compute OMIT map r = cfom.run( crystal_gridding = crystal_gridding, fmodel = fmodel.deep_copy(), full_resolution_map = False, max_boxes = 70, neutral_volume_box_cushion_width = 0, box_size_as_fraction=0.3, log=False) ccs = get_cc(mc1=mc1, mc2=r.map_coefficients(filter_noise=False), xrs=xrs) assert flex.mean(ccs) > 0.8 print(" CC(min/max,mean)",ccs.min_max_mean().as_tuple())
def exercise(): # Exercise "simple" target pi = get_pdb_inputs(pdb_str=pdb_str_1) selection = flex.bool(pi.xrs.scatterers().size(), True) for d_min in [1, 2, 3]: print "d_min:", d_min f_calc = pi.xrs.structure_factors(d_min = d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=0.25) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() rsr_simple_refiner = individual_sites.simple( target_map = target_map, selection = selection, real_space_gradients_delta = d_min/4, max_iterations = 150, geometry_restraints_manager = pi.grm.geometry) for shake_size in [1,]: print " shake_size:", shake_size for p in [(0.01, 1.0), (0.03, 3.0), (0.1, 10.0)]: print " target:", p w_opt = flex.double() for start_value in [0.001, 0.01, 0.1, 0, 1, 10, 100, 1000]: xrs_poor = pi.xrs.deep_copy_scatterers() random.seed(0) flex.set_random_seed(0) xrs_poor.shake_sites_in_place(mean_distance = shake_size) # refined = individual_sites.refinery( refiner = rsr_simple_refiner, xray_structure = xrs_poor, start_trial_weight_value = start_value, rms_bonds_limit = p[0], rms_angles_limit = p[1]) w_opt.append(refined.weight_final) dist = flex.mean(flex.sqrt((pi.xrs.sites_cart() - refined.sites_cart_result).dot())) print " w_start,w_final,b,a,dist: %9.4f %9.4f %6.3f %6.3f %6.3f"%( start_value, refined.weight_final, refined.rms_bonds_final, refined.rms_angles_final, dist) assert refined.rms_bonds_final <= p[0] assert refined.rms_angles_final <= p[1]
def followup_brightness_scale(data): """ histogramming ported from iotbx/detectors/display.h """ #first pass through data calculate average data = data.as_double() qave = flex.mean(data) #second pass calculate histogram hsize = 100 histogram = flex.double(hsize, 0) for i in xrange(data.size()): temp = int((hsize / 2) * data[i] / qave) if temp < 0: histogram[0] += 1 elif temp >= hsize: histogram[hsize - 1] += 1 else: histogram[temp] += 1 #third pass calculate 90% percentile = 0 accum = 0 for i in xrange(hsize): accum += histogram[i] if (accum > 0.9 * data.size()): percentile = i * qave / (hsize / 2) break adjlevel = 0.4 brightness = 0.4 if percentile > 0.: correction = brightness * adjlevel / percentile else: correction = brightness / 5.0 outscale = 256 corrected = data * correction outvalue = outscale * (1.0 - corrected) sel1 = outvalue < 0 sel2 = outvalue >= outscale outvalue.set_selected(sel1, 0) outvalue.set_selected(sel2, outscale - 1) return outvalue
def show_histogram(data, n_slots, smooth=True): triplets = [] histogram = flex.histogram(data=data, n_slots=n_slots) l = histogram.data_min() for i, s in enumerate(histogram.slots()): r = histogram.data_min() + histogram.slot_width() * (i + 1) triplets.append([l, r, s]) print "%8.4f %8.4f %d" % (l, r, s) l = r if (smooth): print "... smooth histogram" triplets_smooth = [] for i, t in enumerate(triplets): values = flex.double() for j in [-1, 0, 1]: if (i + j >= 0 and i + j < len(triplets)): values.append(float(triplets[i + j][2])) triplets_smooth.append((t[0], t[1], flex.mean(values))) for t in triplets_smooth: print "%8.4f %8.4f %d" % (t[0], t[1], int("%.0f" % t[2])) return histogram
def do_sigma_scaling(self): # Divide each pixel value by it's dark standard deviation. Since we are led # to believe that the standard deviation of a pixel is proportional to the # gain of said pixel, this approximates a gain correction. assert self.dark_img is not None assert self.gain_map is None # not appropriate to do sigma scaling and gain correction at the same time! flex_cspad_img = self.cspad_img.as_double() flex_cspad_img_sel = flex_cspad_img.as_1d().select(self.dark_mask.as_1d()) flex_dark_stddev = self.dark_stddev.select(self.dark_mask.as_1d()).as_double() assert flex_dark_stddev.count(0) == 0 flex_dark_stddev /= flex.mean(flex_dark_stddev) flex_cspad_img_sel /= flex_dark_stddev flex_cspad_img.as_1d().set_selected(self.dark_mask.as_1d().iselection(), flex_cspad_img_sel) self.cspad_img = flex_cspad_img if 0: # for debugging from matplotlib import pyplot hist_min, hist_max = flex.min(flex_cspad_img_sel.as_double()), flex.max(flex_cspad_img_sel.as_double()) print hist_min, hist_max n_slots = 100 n, bins, patches = pyplot.hist(flex_cspad_img_sel.as_1d().as_numpy_array(), bins=n_slots, range=(hist_min, hist_max)) pyplot.show()
def estimate_signal_to_noise(x, y_noisy, y_smoothed, plot=False): """Estimate noise in spectra by subtracting a smoothed spectrum from the original noisy unsmoothed spectrum. See: The extraction of signal to noise values in x-ray absorption spectroscopy A. J. Dent, P. C. Stephenson, and G. N. Greaves Rev. Sci. Instrum. 63, 856 (1992); https://doi.org/10.1063/1.1142627 """ noise = y_noisy - y_smoothed noise_sq = flex.pow2(noise) from xfel.command_line.view_pixel_histograms import sliding_average sigma_sq = sliding_average(noise_sq, n=31) sigma_sq = smoothing.savitzky_golay_filter(x.as_double(), flex.pow2(noise), half_window=20, degree=1)[1] sigma_sq.set_selected(sigma_sq <= 0, flex.mean(sigma_sq)) # or do this instead to use the background region as the source of noise: #signal_to_noise = y_smoothed/math.sqrt(flex.mean(noise_sq[50:190])) signal_to_noise = y_smoothed / flex.sqrt(sigma_sq) #signal_to_noise.set_selected(x < 50, 0) #signal_to_noise.set_selected(x > 375, 0) if plot: from matplotlib import pyplot linewidth = 2 pyplot.plot(x, y_noisy, linewidth=linewidth) pyplot.plot(x, y_smoothed, linewidth=linewidth) pyplot_label_axes() pyplot.show() pyplot.plot(x, noise, linewidth=linewidth, label="noise") pyplot.plot(x, flex.sqrt(sigma_sq), linewidth=linewidth, label="sigma") pyplot_label_axes() pyplot.legend(loc=2, prop={'size': 20}) pyplot.show() pyplot.plot(x, signal_to_noise, linewidth=linewidth) pyplot_label_axes() pyplot.show() return signal_to_noise
def reduce_raw_data(raw_data, qmax, bandwidth, level=0.05, q_background=None): print print " ==== Data reduction ==== " print print " Preprocessing of data increases efficiency of shape retrieval procedure." print print " - Interpolation stepsize : %4.3e" % bandwidth print " - Uniform density criteria: level is set to : %4.3e" % level print " maximum q to consider : %4.3e" % qmax qmin_indx = flex.max_index(raw_data.i) qmin = raw_data.q[qmin_indx] new_data = get_q_array_uniform_body(raw_data, q_min=qmin, q_max=qmax, level=level) qmax = new_data.q[-1] if qmax > raw_data.q[-1]: qmax = raw_data.q[-1] print " Resulting q range to use in search: q start : %4.3e" % qmin print " q stop : %4.3e" % qmax print raw_q = raw_data.q[qmin_indx:] raw_i = raw_data.i[qmin_indx:] raw_s = raw_data.s[qmin_indx:] ### Take care of the background (set zero at very high q) ### if (q_background is not None): cutoff = flex.bool(raw_q > q_background) q_bk_indx = flex.last_index(cutoff, False) if (q_bk_indx < raw_q.size()): bkgrd = flex.mean(raw_i[q_bk_indx:]) print "Background correction: I=I-background, where background=", bkgrd raw_i = flex.abs(raw_i - bkgrd) q = flex.double(range(int( (qmax - qmin) / bandwidth) + 1)) * bandwidth + qmin raw_data.i = flex.linear_interpolation(raw_q, raw_i, q) raw_data.s = flex.linear_interpolation(raw_q, raw_s, q) raw_data.q = q return raw_data
def construct_src_to_dst_plan(icount, tranch_size, comm, verbose=True): mpi_rank = comm.Get_rank() mpi_size = comm.Get_size() COMPOSITE_MULTIPLICITY = 3 from scitbx.array_family import flex imean = flex.mean(icount.as_double()); isum = flex.sum(icount) trial_comm_size = max(1, int(isum * COMPOSITE_MULTIPLICITY / tranch_size)) trial_comm_size = min(trial_comm_size, mpi_size - 1) # new comm must have fewer than mpi_helper.size ranks if trial_comm_size in [2,3]: trial_comm_size = 1 # consensus communicator can run on 1,4,5,..., not 2,3 trial_comm_size = max(1, trial_comm_size) # cannot be 0 if verbose: print("recommended communicator size:",trial_comm_size) trial = [[] for itrank in range(trial_comm_size)] srcrk = [[] for itrank in range(trial_comm_size)] todst = {} # plan of action: src rank values to dst keys mt = flex.mersenne_twister(seed=0) order = mt.random_permutation(len(icount)) available_dst = list(range(trial_comm_size)) for idx in range(len(icount)): item = icount[order[idx]] ; rk = idx if trial_comm_size>3: unavailable = [] for irx in range(COMPOSITE_MULTIPLICITY): if len(available_dst)==0: available_dst = list(range(trial_comm_size)) idx_st = choose_without_replace(available_dst, unavailable, mt) unavailable.append(idx_st) trial[(idx_st)%trial_comm_size].append(item); srcrk[(idx_st)%trial_comm_size].append(rk) todst[idx_st] = todst.get(idx_st,[]); todst[idx_st].append(order[idx]) else: trial[0].append(item) idx_st = 0 todst[idx_st] = todst.get(idx_st,[]); todst[idx_st].append(order[idx]) if verbose: for itranch, item in enumerate(trial): print("tranch %2d:"%itranch, item, flex.sum(flex.int(item))) print() for item in srcrk: print(", ".join(["%02d"%i for i in item])) print(todst) return todst
def get_atom_radius(xray_structure=None, d_min=None, map_data=None, crystal_symmetry=None, radius=None): if(radius is not None): return radius radii = [] if(d_min is not None): radii.append(d_min) if([xray_structure, crystal_symmetry].count(None)==0): assert crystal_symmetry.is_similar_symmetry( xray_structure.crystal_symmetry()) if([map_data, crystal_symmetry].count(None)==0): d99 = maptbx.d99( map = map_data, crystal_symmetry = crystal_symmetry).result.d99 radii.append(d99) if(xray_structure is not None and d_min is not None): b_iso = adptbx.u_as_b( flex.mean(xray_structure.extract_u_iso_or_u_equiv())) o = maptbx.atom_curves(scattering_type="C", scattering_table="electron") rad_image = o.image(d_min=d_min, b_iso=b_iso, radius_max=max(15.,d_min), radius_step=0.01).radius radii.append(rad_image) return max(3, min(10, max(radii)))
def pair_align(self): ms = flex.double() ss = flex.double() tmp_nlm_array = math.nlm_array(self.nmax) for coef in self.finals: mean = abs(coef[0]) var = flex.sum(flex.norm(coef)) sigma = smath.sqrt(var - mean * mean) ms.append(mean) ss.append(sigma) grids = flex.grid(self.n_trial, self.n_trial) self.cc_array = flex.double(grids, 1.0) for ii in range(self.n_trial): self.nlm_array.load_coefs(self.nlm, self.finals[ii]) for jj in range(ii): tmp_nlm_array.load_coefs(self.nlm, self.finals[jj]) cc = fft_align.align(self.nlm_array, tmp_nlm_array, nmax=self.nmax, refine=True).best_score cc = (cc - ms[ii] * ms[jj]) / (ss[ii] * ss[jj]) self.cc_array[(ii, jj)] = cc self.cc_array[(jj, ii)] = cc outfile = self.prefix + "pair.cc" comment = "# electron density correlation coefficient, < rho_1(r)*rho_2(r) >" out = open(outfile, 'w') print >> out, comment for ii in range(1, self.n_trial + 1): print >> out, "%6d" % ii, print >> out, " average" for ii in range(self.n_trial): for jj in range(self.n_trial): print >> out, "%6.3f" % self.cc_array[(ii, jj)], print >> out, flex.mean(self.cc_array[ii * self.n_trial:(ii + 1) * self.n_trial]) out.close()
def compute(self): """ Computes the reduced chi squared parameter. Can be used to correct for under-estimation of experimental errors. See https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Correcting_for_over-_or_under-dispersion """ print >> self.log, "Computing reduced chi squared" self.scaler.reduced_chi_squared = flex.double(self.scaler.n_refl, 1.) for hkl_id in range(self.scaler.n_refl): hkl = self.scaler.miller_set.indices()[hkl_id] if hkl not in self.scaler.ISIGI: continue n = len(self.scaler.ISIGI[hkl]) if n <= 1: continue i = self.scaler.summed_wt_I[hkl_id] / self.scaler.summed_weight[ hkl_id] x = flex.double([self.scaler.ISIGI[hkl][i][0] for i in range(n)]) v = (x / flex.double([self.scaler.ISIGI[hkl][i][1] for i in range(n)]))**2 self.scaler.reduced_chi_squared[hkl_id] = 1 / (n - 1) * flex.sum( (x - i)**2 / v) sel = self.scaler.reduced_chi_squared > 0 print >> self.log, "Done computing reduced chi squared", flex.mean( self.scaler.reduced_chi_squared.select(sel)) if False: from matplotlib import pyplot as plt plt.hist( self.scaler.reduced_chi_squared.select(sel).as_numpy_array(), bins=100, range=(0, 10)) plt.show()
def show(self, prefix="", out=sys.stdout): if (self.n_residues() == 1): print(prefix + "Segment: 1 residue (%s), %d conformers" % \ (self.residue_groups[0].id_str(), self.n_confs()), file=out) else: print(prefix+"Segment: %d residues (%s --> %s), %d conformers" %\ (self.n_residues(), self.residue_groups[0].id_str(), self.residue_groups[-1].id_str(), self.n_confs()), file=out) for i_res, rg in enumerate(self.residue_groups): print(prefix + " residue_group=%s" % rg.id_str(), file=out) for ag in rg.atom_groups(): rama = rota = None for o in self.ramachandran.get(rg.id_str(), []): if (o.altloc == ag.altloc): rama = o break for o in self.rotamers.get(rg.id_str(), []): if (o.altloc == ag.altloc): rota = o break print(prefix + " " + \ "atom_group=%1s %3s occ=%.2f phi=%-6s psi=%-6s rot=%-7s" %\ (ag.altloc, ag.resname, flex.mean(ag.atoms().extract_occ()), fv("%.1f", getattr(rama, "phi", None)), fv("%.1f", getattr(rama, "psi", None)), getattr(rota, "rotamer_name", None)), file=out) if (len(self.backrubs[i_res]) > 0): for backrub in self.backrubs[i_res]: backrub.show(out=out, prefix=prefix + " ") outliers = self.outliers[rg.id_str()] if (len(outliers) > 0): print(prefix + " MolProbity outliers:", file=out) for outlier in outliers: print(prefix + " %s: %s" % (type(outlier).__name__, str(outlier)), file=out)
def box_iterator(self): b = maptbx.boxes(n_real=self.atom_map_asu.focus(), fraction=self.box_size_as_fraction, max_boxes=self.max_boxes, log=self.log) def get_wide_box(s, e): # define wide box: neutral + phased volumes if (self.neutral_volume_box_cushion_width > 0): sh = self.neutral_volume_box_cushion_width ss = [max(s[i] - sh, 0) for i in [0, 1, 2]] ee = [min(e[i] + sh, n_real_asu[i]) for i in [0, 1, 2]] else: ss, ee = s, e return ss, ee n_real_asu = b.n_real n_boxes = len(b.starts) i_box = 0 for s, e in zip(b.starts, b.ends): i_box += 1 sw, ew = get_wide_box(s=s, e=e) fmodel_omit = self.omit_box(start=sw, end=ew) r = fmodel_omit.r_work() self.r.append(r) # for tests only if (self.log): print( "r(curr,min,max,mean)=%6.4f %6.4f %6.4f %6.4f" % (r, flex.min(self.r), flex.max(self.r), flex.mean(self.r)), i_box, n_boxes, file=self.log) omit_map_data = self.asu_map_from_fmodel(fmodel=fmodel_omit, map_type=self.map_type) maptbx.copy_box(map_data_from=omit_map_data, map_data_to=self.map_result_asu, start=s, end=e) self.map_result_asu.reshape(self.acc_asu)
def get_mean_statistic_for_resolution(d_min, stat_type, range_value=0.2, out=None): if (out is None): out = sys.stdout from scitbx.array_family import flex pkl_file = libtbx.env.find_in_repositories( relative_path="chem_data/polygon_data/all_mvd.pickle", test=os.path.isfile) db = easy_pickle.load(pkl_file) all_d_min = db['high_resolution'] stat_values = db[stat_type] values_for_range = flex.double() for (d_, v_) in zip(all_d_min, stat_values): try: d = float(d_) v = float(v_) except ValueError: continue else: if (d > (d_min - range_value)) and (d < (d_min + range_value)): values_for_range.append(v) h = flex.histogram(values_for_range, n_slots=10) print(" %s for d_min = %.3f - %.3f A" % (stat_names[stat_type], d_min - range_value, d_min + range_value), file=out) min = flex.min(values_for_range) max = flex.max(values_for_range) mean = flex.mean(values_for_range) print(" count: %d" % values_for_range.size(), file=out) print(" min: %.2f" % min, file=out) print(" max: %.2f" % max, file=out) print(" mean: %.2f" % mean, file=out) print(" histogram of values:", file=out) h.show(prefix=" ") return mean
def find_duplicate_conformers(residue_group, rmsd_cutoff=0.1): """Return pairs of residue objects for conformers with an RMSD less than rmsd_cutoff""" rmsd_cutoff_sq = rmsd_cutoff**2 duplicate_conformers = [] for i, c1 in enumerate(residue_group.conformers()): r1 = c1.only_residue() a1 = r1.atoms() a1_nam = list(a1.extract_name()) for j, c2 in enumerate(residue_group.conformers()): if j <= i: continue # Extract residue and skip if not comparable r2 = c2.only_residue() if r1.resname != r2.resname: continue # Extract atoms a2 = r2.atoms() a2_nam = list(a2.extract_name()) # Get atom overlap between conformers common_atoms = list(set(a1_nam).intersection(a2_nam)) # Sort the atoms so can use unsorted residues a1_sel = flex.size_t([a1_nam.index(an) for an in common_atoms]) a2_sel = flex.size_t([a2_nam.index(an) for an in common_atoms]) # Check selection working as it should assert a1.extract_name().select( a1_sel) == a2.extract_name().select(a2_sel) # Extract ordered coordinates a1_xyz = a1.extract_xyz().select(a1_sel) a2_xyz = a2.extract_xyz().select(a2_sel) # Claculate RMSD and check below threshold d = flex.mean((a1_xyz - a2_xyz).dot()) if d < rmsd_cutoff_sq: duplicate_conformers.append( (r1.standalone_copy(), r2.standalone_copy())) return duplicate_conformers
def test_rs_mapper(dials_data, tmp_path): result = procrunner.run( [ "dials.rs_mapper", dials_data("centroid_test_data", pathlib=True) / "imported_experiments.json", 'map_file="junk.ccp4"', ], working_directory=tmp_path, ) assert not result.returncode and not result.stderr assert (tmp_path / "junk.ccp4").is_file() # load results m = ccp4_map.map_reader(file_name=str(tmp_path / "junk.ccp4")) assert len(m.data) == 7189057 assert m.header_min == 0.0 assert flex.min(m.data) == 0.0 assert m.header_max == 2052.75 assert flex.max(m.data) == 2052.75 assert m.header_mean == pytest.approx(0.018924040719866753, abs=1e-6) assert flex.mean(m.data) == pytest.approx(0.01892407052218914, abs=1e-6)
def run(prefix): """ Exercise "qr.finalise m.pdb" produces expected (and meaningful) output. Do not modify this test before checking the result on graphics (eg, PyMol)! """ pdb_in = "%s.pdb" % prefix open(pdb_in, "w").write(pdb_str_in) cmd = "qr.finalise %s > %s.log" % (pdb_in, prefix) assert easy_run.call(cmd) == 0 h_answer = iotbx.pdb.input(source_info=None, lines=pdb_str_out).construct_hierarchy() h_result = iotbx.pdb.input(file_name="%s_complete.pdb" % prefix).construct_hierarchy() # s1 = h_answer.atoms().extract_xyz() s2 = h_result.atoms().extract_xyz() r = flex.mean(flex.sqrt((s1 - s2).dot())) assert r < 0.005 # asc = h_result.atom_selection_cache() sel = asc.selection("element H or element D") assert sel.count(True) == 7 occ = h_result.atoms().extract_occ().select(sel) assert flex.max(occ) < 1.e-6
def get_sites_cc(self, atoms, sites=None): from cctbx import maptbx from scitbx.array_family import flex radii = flex.double() for atom in atoms: if (atom.element.strip() in ["H", "D"]): radii.append(1.) else: radii.append(1.5) fcalc_map = self.fcalc_real_map if (sites is None): sites = atoms.extract_xyz() else: fcalc_map = self.get_new_fcalc_map(sites_new=sites, i_seqs=atoms.extract_i_seq()) sel = maptbx.grid_indices_around_sites(unit_cell=self.unit_cell, fft_n_real=self.n_real, fft_m_real=self.m_real, sites_cart=sites, site_radii=radii) m1 = self.real_map.select(sel) m2 = fcalc_map.select(sel) cc = flex.linear_correlation(x=m1, y=m2).coefficient() return group_args(cc=cc, map_mean=flex.mean(m1.as_1d()))
def log_frame(experiments, reflections, params, run, n_strong, timestamp=None, two_theta_low=None, two_theta_high=None, db_event=None, app=None, trial=None): if app is None: app = dxtbx_xfel_db_application(params, mode='cache_commits') else: app.mode = 'cache_commits' if isinstance(run, int) or isinstance(run, str): db_run = app.get_run(run_number=run) else: db_run = run if trial is None: if params.input.trial is None: db_trial = app.get_trial(trial_id=params.input.trial_id) params.input.trial = db_trial.trial else: db_trial = app.get_trial(trial_number=params.input.trial) else: db_trial = trial if db_event is None: if params.input.rungroup is None: db_event = app.create_event(timestamp=timestamp, run_id=db_run.id, trial_id=db_trial.id, n_strong=n_strong, two_theta_low=two_theta_low, two_theta_high=two_theta_high) else: db_event = app.create_event(timestamp=timestamp, run_id=db_run.id, trial_id=db_trial.id, rungroup_id=params.input.rungroup, n_strong=n_strong, two_theta_low=two_theta_low, two_theta_high=two_theta_high) inserts = "" if app.last_query is None: app.last_query = "" def save_last_id(name): nonlocal inserts inserts += app.last_query + ";\n" inserts += "SELECT LAST_INSERT_ID() INTO @%s_id;\n" % name if experiments: save_last_id('event') else: inserts += app.last_query + ";\n" for i, experiment in enumerate(experiments or []): reflections_i = reflections.select(reflections['id'] == i) imageset = Imageset(app) save_last_id('imageset') beam = Beam(app, beam=experiment.beam) save_last_id('beam') detector = Detector(app, detector=experiment.detector) save_last_id('detector') cell = Cell(app, crystal=experiment.crystal, isoform_id=None) save_last_id('cell') crystal = Crystal(app, crystal=experiment.crystal, make_cell=False, cell_id="@cell_id") save_last_id('crystal') inserts += ("INSERT INTO `%s_experiment` (imageset_id, beam_id, detector_id, crystal_id, crystal_cell_id) " + \ "VALUES (@imageset_id, @beam_id, @detector_id, @crystal_id, @cell_id);\n") % ( params.experiment_tag) inserts += "INSERT INTO `%s_imageset_event` (imageset_id, event_id, event_run_id) VALUES (@imageset_id, @event_id, %d);\n" % ( params.experiment_tag, db_run.id) d = experiment.crystal.get_unit_cell().d( reflections['miller_index']).select(reflections['id'] == i) from cctbx.crystal import symmetry cs = symmetry(unit_cell=experiment.crystal.get_unit_cell(), space_group=experiment.crystal.get_space_group()) mset = cs.build_miller_set(anomalous_flag=False, d_min=db_trial.d_min) n_bins = 10 # FIXME use n_bins as an attribute on the trial table binner = mset.setup_binner(n_bins=n_bins) for i in binner.range_used(): d_max, d_min = binner.bin_d_range(i) Bin(app, number=i, d_min=d_min, d_max=d_max, total_hkl=binner.counts_complete()[i], cell_id='@cell_id') save_last_id('bin') sel = (d <= float(d_max)) & (d > float(d_min)) sel &= reflections_i['intensity.sum.value'] > 0 refls = reflections_i.select(sel) n_refls = len(refls) Cell_Bin( app, count=n_refls, bin_id='@bin_id', crystal_id='@crystal_id', avg_intensity=flex.mean(refls['intensity.sum.value']) if n_refls > 0 else None, avg_sigma=flex.mean(flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None, avg_i_sigi=flex.mean( refls['intensity.sum.value'] / flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None) inserts += app.last_query + ";\n" app.mode = 'execute' return inserts
def run (args, image = None): from xfel import radial_average from scitbx.array_family import flex import os, sys import dxtbx # Parse input try: n = len(args) except Exception: params = args else: user_phil = [] for arg in args: if (not "=" in arg): try : user_phil.append(libtbx.phil.parse("""file_path=%s""" % arg)) except ValueError: raise Sorry("Unrecognized argument '%s'" % arg) else: try: user_phil.append(libtbx.phil.parse(arg)) except RuntimeError as e: raise Sorry("Unrecognized argument '%s' (error: %s)" % (arg, str(e))) params = master_phil.fetch(sources=user_phil).extract() if image is None: if params.file_path is None or len(params.file_path) == 0 or not all([os.path.isfile(f) for f in params.file_path]): master_phil.show() raise Usage("file_path must be defined (either file_path=XXX, or the path alone).") assert params.n_bins is not None assert params.verbose is not None assert params.output_bins is not None # Allow writing to a file instead of stdout if params.output_file is None: logger = sys.stdout else: logger = open(params.output_file, 'w') logger.write("%s "%params.output_file) if params.show_plots: from matplotlib import pyplot as plt import numpy as np colormap = plt.cm.gist_ncar plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, len(params.file_path))]) if params.mask is not None: params.mask = easy_pickle.load(params.mask) if image is None: iterable = params.file_path load_func = lambda x: dxtbx.load(x) else: iterable = [image] load_func = lambda x: x # Iterate over each file provided for item in iterable: img = load_func(item) try: n_images = img.get_num_images() subiterable = xrange(n_images) except AttributeError: n_images = None subiterable = [0] for image_number in subiterable: if n_images is None: beam = img.get_beam() detector = img.get_detector() else: beam = img.get_beam(image_number) detector = img.get_detector(image_number) s0 = col(beam.get_s0()) # Search the detector for the panel farthest from the beam. The number of bins in the radial average will be # equal to the farthest point from the beam on the detector, in pixels, unless overridden at the command line panel_res = [p.get_max_resolution_at_corners(s0) for p in detector] farthest_panel = detector[panel_res.index(min(panel_res))] size2, size1 = farthest_panel.get_image_size() corners = [(0,0), (size1-1,0), (0,size2-1), (size1-1,size2-1)] corners_lab = [col(farthest_panel.get_pixel_lab_coord(c)) for c in corners] corner_two_thetas = [farthest_panel.get_two_theta_at_pixel(s0, c) for c in corners] extent_two_theta = max(corner_two_thetas) max_corner = corners_lab[corner_two_thetas.index(extent_two_theta)] extent = int(math.ceil(max_corner.length()*math.sin(extent_two_theta)/max(farthest_panel.get_pixel_size()))) extent_two_theta *= 180/math.pi if params.n_bins < extent: params.n_bins = extent # These arrays will store the radial average info sums = flex.double(params.n_bins) * 0 sums_sq = flex.double(params.n_bins) * 0 counts = flex.int(params.n_bins) * 0 if n_images is None: all_data = img.get_raw_data() else: all_data = img.get_raw_data(image_number) if not isinstance(all_data, tuple): all_data = (all_data,) for tile, (panel, data) in enumerate(zip(detector, all_data)): if params.mask is None: mask = flex.bool(flex.grid(data.focus()), True) else: mask = params.mask[tile] if hasattr(data,"as_double"): data = data.as_double() logger.flush() if params.verbose: logger.write("Average intensity tile %d: %9.3f\n"%(tile, flex.mean(data))) logger.write("N bins: %d\n"%params.n_bins) logger.flush() x1,y1,x2,y2 = 0,0,panel.get_image_size()[1],panel.get_image_size()[0] bc = panel.get_beam_centre_px(beam.get_s0()) bc = int(round(bc[1])), int(round(bc[0])) # compute the average radial_average(data,mask,bc,sums,sums_sq,counts,panel.get_pixel_size()[0],panel.get_distance(), (x1,y1),(x2,y2)) # average the results, avoiding division by zero results = sums.set_selected(counts <= 0, 0) results /= counts.set_selected(counts <= 0, 1).as_double() if params.median_filter_size is not None: logger.write("WARNING, the median filter is not fully propogated to the variances\n") from scipy.ndimage.filters import median_filter results = flex.double(median_filter(results.as_numpy_array(), size = params.median_filter_size)) # calculate standard devations stddev_sel = ((sums_sq-sums*results) >= 0) & (counts > 0) std_devs = flex.double(len(sums), 0) std_devs.set_selected(stddev_sel, (sums_sq.select(stddev_sel)-sums.select(stddev_sel)* \ results.select(stddev_sel))/counts.select(stddev_sel).as_double()) std_devs = flex.sqrt(std_devs) twotheta = flex.double(xrange(len(results)))*extent_two_theta/params.n_bins q_vals = 4*math.pi*flex.sin(math.pi*twotheta/360)/beam.get_wavelength() if params.low_max_two_theta_limit is None: subset = results else: subset = results.select(twotheta >= params.low_max_two_theta_limit) max_result = flex.max(subset) if params.x_axis == 'two_theta': xvals = twotheta max_x = twotheta[flex.first_index(results, max_result)] elif params.x_axis == 'q': xvals = q_vals max_x = q_vals[flex.first_index(results, max_result)] for i in xrange(len(results)): val = xvals[i] if params.output_bins and "%.3f"%results[i] != "nan": #logger.write("%9.3f %9.3f\n"% (val,results[i])) #.xy format for Rex.cell. logger.write("%9.3f %9.3f %9.3f\n"%(val,results[i],std_devs[i])) #.xye format for GSASII #logger.write("%.3f %.3f %.3f\n"%(val,results[i],ds[i])) # include calculated d spacings logger.write("Maximum %s: %f, value: %f\n"%(params.x_axis, max_x, max_result)) if params.show_plots: if params.plot_x_max is not None: results = results.select(xvals <= params.plot_x_max) xvals = xvals.select(xvals <= params.plot_x_max) if params.normalize: plt.plot(xvals.as_numpy_array(),(results/flex.max(results)).as_numpy_array(),'-') else: plt.plot(xvals.as_numpy_array(),results.as_numpy_array(),'-') if params.x_axis == 'two_theta': plt.xlabel("2 theta") elif params.x_axis == 'q': plt.xlabel("q") plt.ylabel("Avg ADUs") if params.plot_y_max is not None: plt.ylim(0, params.plot_y_max) if params.show_plots: #plt.legend([os.path.basename(os.path.splitext(f)[0]) for f in params.file_path], ncol=2) plt.show() return xvals, results
def _avg_sd_from_list(lst): """simple function to get average and standard deviation""" arr = flex.double(lst) avg = round(flex.mean(arr), 5) std = round(arr.standard_deviation_of_the_sample(), 5) return avg, std
def adjust_errors(self): print >> self.log, "Starting adjust_errors" # Save original sigmas refls = self.scaler.ISIGI refls['original_sigmas'] = refls['scaled_intensity'] / refls['isigi'] print >> self.log, "Computing initial estimates of parameters" propagator = sdfac_propagate(self.scaler, verbose=False) propagator.initial_estimates() propagator.adjust_errors(compute_sums=False) init_params = flex.double(self.get_initial_sdparams_estimates()) init_params.extend(propagator.error_terms.to_x()) values = self.parameterization(init_params) print >> self.log, "Initial estimates:", values.show(self.log) print >> self.log, "Refining error correction parameters" sels, binned_intensities = self.get_binned_intensities() minimizer = self.run_minimzer(values, sels) values = minimizer.get_refined_params() print >> self.log, "Final", values.show(self.log) print >> self.log, "Applying sdfac/sdb/sdadd 1" # Restore original sigmas refls['isigi'] = refls['scaled_intensity'] / refls['original_sigmas'] # Propagate refined errors from postrefinement propagator.error_terms = error_terms.from_x(values.propagate_terms) propagator.adjust_errors() minimizer.apply_sd_error_params(self.scaler.ISIGI, values) self.scaler.summed_weight = flex.double(self.scaler.n_refl, 0.) self.scaler.summed_wt_I = flex.double(self.scaler.n_refl, 0.) print >> self.log, "Applying sdfac/sdb/sdadd 2" for i in xrange(len(self.scaler.ISIGI)): hkl_id = self.scaler.ISIGI['miller_id'][i] Intensity = self.scaler.ISIGI['scaled_intensity'][ i] # scaled intensity sigma = Intensity / self.scaler.ISIGI['isigi'][ i] # corrected sigma variance = sigma * sigma self.scaler.summed_wt_I[hkl_id] += Intensity / variance self.scaler.summed_weight[hkl_id] += 1 / variance if False: # validate using http://ccp4wiki.org/~ccp4wiki/wiki/index.php?title=Symmetry%2C_Scale%2C_Merge#Analysis_of_Standard_Deviations print >> self.log, "Validating" from matplotlib import pyplot as plt all_sigmas_normalized = compute_normalized_deviations( self.scaler.ISIGI, self.scaler.miller_set.indices()) plt.hist(all_sigmas_normalized, bins=100) plt.figure() binned_rms_normalized_sigmas = [] for i, sel in enumerate(sels): binned_rms_normalized_sigmas.append( math.sqrt( flex.mean( all_sigmas_normalized.select(sel) * all_sigmas_normalized.select(sel)))) plt.plot(binned_intensities, binned_rms_normalized_sigmas, 'o') plt.show() all_sigmas_normalized = all_sigmas_normalized.select( all_sigmas_normalized != 0) self.normal_probability_plot(all_sigmas_normalized, (-0.5, 0.5), plot=True)
def initialize_A(self): # initial fit is to set the average self.A = self.gain * flex.mean(self.KI)
def log_frame(experiments, reflections, params, run, n_strong, timestamp=None, two_theta_low=None, two_theta_high=None, db_event=None): app = dxtbx_xfel_db_application(params) db_run = app.get_run(run_number=run) if params.input.trial is None: db_trial = app.get_trial(trial_id=params.input.trial_id) params.input.trial = db_trial.trial else: db_trial = app.get_trial(trial_number=params.input.trial) if db_event is None: if params.input.rungroup is None: db_event = app.create_event(timestamp=timestamp, run_id=db_run.id, trial_id=db_trial.id, n_strong=n_strong, two_theta_low=two_theta_low, two_theta_high=two_theta_high) else: db_event = app.create_event(timestamp=timestamp, run_id=db_run.id, trial_id=db_trial.id, rungroup_id=params.input.rungroup, n_strong=n_strong, two_theta_low=two_theta_low, two_theta_high=two_theta_high) for i, experiment in enumerate(experiments or []): reflections_i = reflections.select(reflections['id'] == i) db_experiment = app.create_experiment(experiment) app.link_imageset_frame(db_experiment.imageset, db_event) d = experiment.crystal.get_unit_cell().d( reflections['miller_index']).select(reflections['id'] == i) if len( db_experiment.crystal.cell.bins ) == 0: # will be [] if there are no isoforms and no target cells from cctbx.crystal import symmetry cs = symmetry( unit_cell=db_experiment.crystal.cell.unit_cell, space_group_symbol=db_experiment.crystal.cell.lookup_symbol) mset = cs.build_miller_set(anomalous_flag=False, d_min=db_trial.d_min) n_bins = 10 # FIXME use n_bins as an attribute on the trial table binner = mset.setup_binner(n_bins=n_bins) for i in binner.range_used(): d_max, d_min = binner.bin_d_range(i) Bin(app, number=i, d_min=d_min, d_max=d_max, total_hkl=binner.counts_complete()[i], cell_id=db_experiment.crystal.cell.id) db_experiment.crystal.cell.bins = app.get_cell_bins( db_experiment.crystal.cell.id) assert len(db_experiment.crystal.cell.bins) == n_bins for db_bin in db_experiment.crystal.cell.bins: sel = (d <= float(db_bin.d_max)) & (d > float(db_bin.d_min)) sel &= reflections_i['intensity.sum.value'] > 0 refls = reflections_i.select(sel) n_refls = len(refls) Cell_Bin( app, count=n_refls, bin_id=db_bin.id, crystal_id=db_experiment.crystal.id, avg_intensity=flex.mean(refls['intensity.sum.value']) if n_refls > 0 else None, avg_sigma=flex.mean(flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None, avg_i_sigi=flex.mean( refls['intensity.sum.value'] / flex.sqrt(refls['intensity.sum.variance'])) if n_refls > 0 else None) return db_event