def build_scattering_library(atom_types, q_array, radii, radius_scale, Explicit_H, S_factor): scat_lib = intensity.scattering_library(q_array) ener_lib = server.ener_lib() for at in atom_types: element = ener_lib.lib_atom[at].element if (element is ''): element = 'C' print "Warning: unexpected atom found, and C element is used" val = xray_scattering.it1992(element, True).fetch() a = val.array_of_a() b = val.array_of_b() c = val.c() sf_result = scattering_factor(a, b, c, q_array) # getting displaced solvent volumn if (radii.__contains__(at)): r = radii[at] * radius_scale v = math.pi * 4.0 / 3.0 * r**3.0 else: v = 16.44 dummy_sf = dummy_factor(v, q_array) if (not Explicit_H): if (S_factor.__contains__(at)): scale = S_factor[at] sf_result *= (scale[0] * flex.exp(-scale[1] * q_array * q_array)) dummy_sf *= (flex.exp(-1.25 * q_array * q_array)) scat_lib.load_scattering_info(at, sf_result, dummy_sf) return scat_lib
def fvec_callable(pfh,current_values): G = current_values[0] B_factor = current_values[1] I_obs = observations_original_sel.data() sigI_obs = observations_original_sel.sigmas() observations_original_sel_two_theta = observations_original_sel.two_theta(wavelength=wavelength) two_theta = observations_original_sel_two_theta.data() sin_theta_over_lambda_sq = observations_original_sel_two_theta.sin_theta_over_lambda_sq().data() excursions = ((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs) - I_ref) / sigI_obs corr_now, slope_now = get_overall_correlation(G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs, I_ref) """ print "SCALE G=%5.3f B_factor=%5.3f J=%6.3f cc=%6.3f slope=%6.3f"% \ (G, B_factor, sum(excursions**2), corr_now, slope_now) plt.scatter(I_ref,(G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs),s=10, marker='x', c='r') plt.title('J=%6.5f CC=%6.5f Slope=%6.5f'%(sum(excursions**2), corr_now, slope_now)) plt.xlabel('Reference intensity') plt.ylabel('Observed intensity (scaled)') plt.show() """ return excursions
def df(self, x): k, B = float(x[0]), float(x[1]) d_star_sq = self.calc.d_star_sq().data() tmp = self.obs.data() - k * flex.exp(-B*d_star_sq) * self.calc.data() dfdk = flex.sum(-2. * tmp * flex.exp(-B*d_star_sq) * self.calc.data()) dfdB = flex.sum(2. * tmp * k * d_star_sq * flex.exp(-B*d_star_sq) * self.calc.data()) return numpy.array([dfdk, dfdB])
def sharp_map(sites_frac, map_coeffs, ss=None, b_sharp=None, b_min=-150, b_max=150, step=10): if (ss is None): ss = 1. / flex.pow2(map_coeffs.d_spacings().data()) / 4. from cctbx import miller if (b_sharp is None): t = -1 map_coeffs_best = None b_sharp_best = None for b_sharp in range(b_min, b_max, step): map_coeffs_ = map_coeffs.deep_copy() sc2 = flex.exp(b_sharp * ss) map_coeffs_ = map_coeffs_.customized_copy(data=map_coeffs_.data() * sc2) t_ = sharp_evaluation_target(sites_frac=sites_frac, map_coeffs=map_coeffs_) if (t_ > t): t = t_ b_sharp_best = b_sharp map_coeffs_best = map_coeffs_.deep_copy() print "b_sharp:", b_sharp_best, t else: scale = flex.exp(b_sharp * ss) map_coeffs_best = map_coeffs.customized_copy(data=map_coeffs.data() * scale) b_sharp_best = b_sharp return map_coeffs_best, b_sharp_best
def aniso_ratio_p_value(self,rat): return -3 coefs = flex.double( [-1.7647171873040273, -3.4427008004789115, -1.097150249786379, 0.17303317520973829, 0.35955513268118661, 0.066276397961476205, -0.064575726062529232, -0.0063025873711609016, 0.0749945566688624, 0.14803702885155121, 0.154284467861286]) fit_e = scitbx.math.chebyshev_polynome(11,0,1.0,coefs) x = flex.double( range(1000) )/999.0 start = int(rat*1000) norma = flex.sum(flex.exp(fit_e.f(x)))/x[1] x = x*(1-rat)+rat norma2 = flex.sum(flex.exp(fit_e.f(x)))/(x[1]-x[0]) return -math.log(norma2/norma )
def run(xray_structure, f_map=None, map_data=None, d_fsc_model=None): assert [f_map, map_data].count(None) == 1 xrs = xray_structure.deep_copy_scatterers().set_b_iso(value=0) if (f_map is None): f_map = miller.structure_factor_box_from_map( map=map_data, crystal_symmetry=xray_structure.crystal_symmetry()) fc = f_map.structure_factors_from_scatterers(xray_structure=xrs).f_calc() d_model_b0 = run_at_b(b=0, f_map=f_map, f_calc=fc).d_min del xrs if (d_fsc_model is None): d_fsc_model = fc.d_min_from_fsc(other=f_map, fsc_cutoff=0).d_min fo = f_map.resolution_filter(d_min=d_fsc_model) fo, fc, = fo.common_sets(fc) cc = -999 b = None ss = 1. / flex.pow2(fc.d_spacings().data()) / 4. data = fc.data() for b_ in range(-500, 500, 5): sc = flex.exp(-b_ * ss) fc_ = fc.customized_copy(data=data * sc) cc_ = fo.map_correlation(other=fc_) if (cc_ > cc): cc = cc_ b = b_ o = run_at_b(b=b, f_map=fo, f_calc=fc) return group_args(d_min=o.d_min, b_iso=b, d_model_b0=d_model_b0, d_fsc_model=d_fsc_model)
def calc_scales(self, params_in): """ Calculate an array of scales based on scale, B and wavelength using the equation $scale * exp(-2*B*(sin(theta)/wavelength)^2)$ Reuturn a scale vector for all the reflections in self, using the parameters defined in the array params. :params: a tuple of the form appropriate for the crystal symetry, such as one produced by get_x0(). This method only uses params[0] (scale) and params[1] (B) :return: a list of scales for all the miller indicies in self """ if self.use_scales: scale = params_in[0] B = params_in[1] sin_sq_theta = self.miller_array.two_theta(wavelength=self.wavelength) \ .sin_theta_over_lambda_sq().data() scales = scale * self.miller_array.data() exp_arg = flex.double(-2 * B * sin_sq_theta) return flex.double(flex.double(scales) * flex.exp(exp_arg)) else: # Horrible way to get vector of ones... return flex.double(self.miller_array.data() / self.miller_array.data())
def dump_R_in_bins(obs, calc, scale_B=True, log_out=sys.stdout, n_bins=20): #obs, calc = obs.common_sets(calc, assert_is_similar_symmetry=False) if scale_B: scale, B = kBdecider(obs, calc).run() d_star_sq = calc.d_star_sq().data() calc = calc.customized_copy(data = scale * flex.exp(-B*d_star_sq) * calc.data()) binner = obs.setup_binner(n_bins=n_bins) count=0 log_out.write("dmax - dmin: R (nref) <I1> <I2> scale\n") for i_bin in binner.range_used(): tmp_obs = obs.select(binner.bin_indices() == i_bin) tmp_calc = calc.select(binner.bin_indices() == i_bin) low = binner.bin_d_range(i_bin)[0] high = binner.bin_d_range(i_bin)[1] if scale_B: scale = 1. else: scale = flex.sum(tmp_obs.data()*tmp_calc.data()) / flex.sum(flex.pow2(tmp_calc.data())) R = flex.sum(flex.abs(tmp_obs.data() - scale*tmp_calc.data())) / flex.sum(0.5 * tmp_obs.data() + 0.5 * scale*tmp_calc.data()) log_out.write("%5.2f - %5.2f: %.5f (%d) %.1f %.1f %.3e\n" % (low, high, R, len(tmp_obs.data()), flex.mean(tmp_obs.data()), flex.mean(tmp_calc.data()), scale)) log_out.write("Overall R = %.5f (scale=%.3e, %%comp=%.3f)\n\n" % (calc_R(obs, calc, do_scale=not scale_B) + (obs.completeness()*100.,)) )
def ls_ff_weights(f_obs, atom, B): d_star_sq_data = f_obs.d_star_sq().data() table = wk1995(atom).fetch() ff = table.at_d_star_sq(d_star_sq_data) * flex.exp( -B / 4.0 * d_star_sq_data) weights = 1.0 / flex.pow2(ff) return weights
def calc_full_refl(self, I_o_p_set, sin_theta_over_lambda_sq_set, G, B, p_set, rs_set, flag_volume_correction=True): # avoid a floating-point exception argument = -2*B*sin_theta_over_lambda_sq_set if (argument<-75).count(True)>0 or (argument>75).count(True)>0: raise ValueError("flex.exp arg out of bounds") I_o_full_set = I_o_p_set/(G * flex.exp(argument) * p_set) return I_o_full_set
def scattering_factor(a, b, c, q): result = q * 0 stol = q / (math.pi * 4) for aa, bb in zip(a, b): result += aa * flex.exp(-bb * stol * stol) result += c return result
def calc_scales(self, params_in): """ Calculate an array of scales based on scale, B and wavelength using the equation $scale * exp(-2*B*(sin(theta)/wavelength)^2)$ Reuturn a scale vector for all the reflections in self, using the parameters defined in the array params. :params: a tuple of the form appropriate for the crystal symetry, such as one produced by get_x0(). This method only uses params[0] (scale) and params[1] (B) :return: a list of scales for all the miller indicies in self """ if self.use_scales: scale = params_in[0] B = params_in[1] sin_sq_theta = self.miller_array.two_theta(wavelength=self.wavelength) \ .sin_theta_over_lambda_sq().data() scales = scale * self.miller_array.data() exp_arg = flex.double(-2 * B * sin_sq_theta) return flex.double(flex.double(scales) * flex.exp(exp_arg)) else: # Horrible way to get vector of ones... return flex.double(self.miller_array.data()/self.miller_array.data())
def func_scale(self, params, *args): mean_I_r = args[0] mean_I_o = args[1] mean_stol_sq = args[2] G, B = params mean_I_o_scaled = mean_I_o/(G * flex.exp(flex.double(-2 * B * mean_stol_sq))) return (mean_I_r - mean_I_o_scaled)
def compute_cost_refine_crystal(self, I_ref, observations_original_sel, crystal_init_orientation, wavelength, parameters): G = parameters[0] B_factor = parameters[1] rotx = parameters[2] roty = parameters[3] ry = parameters[4] rz = parameters[5] I_obs = observations_original_sel.data() sigI_obs = observations_original_sel.sigmas() observations_original_sel_two_theta = observations_original_sel.two_theta(wavelength=wavelength) two_theta = observations_original_sel_two_theta.data() sin_theta_over_lambda_sq = observations_original_sel_two_theta.sin_theta_over_lambda_sq().data() effective_orientation = crystal_init_orientation.rotate_thru((1,0,0),rotx ).rotate_thru((0,1,0),roty) effective_a_star = sqr(effective_orientation.reciprocal_matrix()) ph = partiality_handler(wavelength, 0) partiality = ph.calc_partiality_anisotropy_set(effective_a_star, observations_original_sel.indices(), ry, rz, two_theta) excursions = (((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs)/partiality) - I_ref) / sigI_obs return excursions
def run(mtz, bs): # Open mtz mtz_file = iotbx.mtz.object(mtz) miller_arrays = mtz_file.as_miller_arrays() print "Opening", mtz for b in bs: mtz_out = os.path.splitext(os.path.basename(args[1]))[0] + "_b%.2f.mtz" % b mtz_dataset = None labels = ["H", "K", "L"] for i, ar in enumerate(miller_arrays): fake_label = 2 * string.uppercase[i] for lab in guess_array_output_labels(ar): labels.append(lab) array_types = get_original_array_types(mtz_file, ar.info().labels) default_types = iotbx.mtz.default_column_types(ar) if len(default_types) == len(array_types): column_types = array_types else: column_types = None if ar.is_xray_data_array() or ar.is_complex_array(): fac = 2. if ar.is_xray_intensity_array() else 1. print "Applying B=%.2f to %s" % (b*fac, ar.info()) k = flex.exp(-ar.d_star_sq().data() * b*fac) ar = ar.customized_copy(data=ar.data()*k, sigmas=ar.sigmas()*k if ar.sigmas() else None) mtz_dataset = add_array_to_mtz_dataset(mtz_dataset, ar, fake_label, column_types) # Decide labels and write mtz file mtz_object = mtz_dataset.mtz_object() invalid_chars = re.compile("[^A-Za-z0-9_\-+\(\)]") used = dict([ (label, 0) for label in labels ]) for i, column in enumerate(mtz_object.columns()): if column.label() != labels[i] : label = labels[i] original_label = label assert used[label] == 0 try: column.set_label(label) except RuntimeError, e: if ("new_label is used already" in str(e)) : col_names = [ col.label() for col in mtz_object.columns() ] raise RuntimeError(("Duplicate column label '%s': current labels "+ "are %s; user-specified output labels are %s.") % (label, " ".join(col_names), " ".join(labels))) else: used[original_label] += 1 mtz_object.write(file_name=mtz_out) print print "Writing:", mtz_out print
def voigt(self, x, sig, nu): if nu < 0: nu = 0 elif nu > 1: nu = 1 f1 = nu * math.sqrt(math.log(2)/math.pi) * flex.exp(-4*math.log(2)*((x/sig)**2)) * (1/abs(sig)) f2 = (1-nu)/(math.pi*abs(sig)*(1+(4*((x/sig)**2)))) f3 = ((nu * math.sqrt(math.log(2)/math.pi))/abs(sig)) + ((1-nu)/(math.pi*abs(sig))) svx = (f1 + f2)/f3 return svx
def f_ordered_solvent(f, n_water_atoms_absent, bf_atoms_absent, absent_atom_type): nsym = f.space_group().order_z() n_lost_w = nsym * n_water_atoms_absent data = f.data() * n_lost_w d_star_sq_data = f.d_star_sq().data() table = wk1995(absent_atom_type).fetch() ff = table.at_d_star_sq(d_star_sq_data) factor = ff * flex.exp(-bf_atoms_absent / 4.0 * d_star_sq_data) f_by_m = miller.array(miller_set=f, data=data * factor) return f_by_m
def get_hl(f_obs_cmpl, k_blur, b_blur): f_model_phases = f_obs_cmpl.phases().data() sin_f_model_phases = flex.sin(f_model_phases) cos_f_model_phases = flex.cos(f_model_phases) ss = 1./flex.pow2(f_obs_cmpl.d_spacings().data()) / 4. t = 2*k_blur * flex.exp(-b_blur*ss) hl_a_model = t * cos_f_model_phases hl_b_model = t * sin_f_model_phases hl_data = flex.hendrickson_lattman(a = hl_a_model, b = hl_b_model) hl = f_obs_cmpl.customized_copy(data = hl_data) return hl
def log_inv_fit(x, y, degree=5): """Fit the values log(1 / y(x)) then return the inverse of this fit. x, y should be iterables, the order of the polynomial for the transformed fit needs to be specified. This will be useful for e.g. Rmerge.""" fit = curve_fitting.univariate_polynomial_fit( x, flex.log(1 / y), degree=degree, max_iterations=100 ) f = curve_fitting.univariate_polynomial(*fit.params) return 1 / flex.exp(f(x))
def alpha_beta(f_dist, n_atoms_included, n_nonwater_atoms_absent, n_water_atoms_absent, bf_atoms_absent, final_error, absent_atom_type): nsym = f_dist.space_group().order_z() ss = 1. / flex.pow2(f_dist.d_spacings().data()) n_part = nsym * n_atoms_included n_lost_p = nsym * n_nonwater_atoms_absent n_lost_w = nsym * n_water_atoms_absent f_dist_data = flex.abs(f_dist.data()) a_d = flex.exp(-0.25 * ss * final_error**2 * math.pi**3) d_star_sq_data = f_dist.d_star_sq().data() assert approx_equal(ss, d_star_sq_data) table = wk1995(absent_atom_type).fetch() ff = table.at_d_star_sq(d_star_sq_data) factor = ff * flex.exp(-bf_atoms_absent / 4.0 * d_star_sq_data) b_d = ((1.-a_d*a_d)*n_part+n_lost_p+n_lost_w*(1.-f_dist_data*f_dist_data))*\ factor*factor alpha = f_dist.array(data=a_d) beta = f_dist.array(data=b_d) return alpha, beta
def calc_full_refl(self, I_o_p_set, sin_theta_over_lambda_sq_set, G, B, p_set, rs_set, flag_volume_correction=True): I_o_full_set = I_o_p_set / ( G * flex.exp(-2 * B * sin_theta_over_lambda_sq_set) * p_set) return I_o_full_set
def get_hl(f_obs_cmpl, k_blur, b_blur): f_model_phases = f_obs_cmpl.phases().data() sin_f_model_phases = flex.sin(f_model_phases) cos_f_model_phases = flex.cos(f_model_phases) ss = 1. / flex.pow2(f_obs_cmpl.d_spacings().data()) / 4. t = 2 * k_blur * flex.exp(-b_blur * ss) hl_a_model = t * cos_f_model_phases hl_b_model = t * sin_f_model_phases hl_data = flex.hendrickson_lattman(a=hl_a_model, b=hl_b_model) hl = f_obs_cmpl.customized_copy(data=hl_data) return hl
def log_fit(x, y, degree=5): """Fit the values log(y(x)) then return exp() to this fit. x, y should be iterables containing floats of the same size. The order is the order of polynomial to use for this fit. This will be useful for e.g. I/sigma.""" fit = curve_fitting.univariate_polynomial_fit( x, flex.log(y), degree=degree, max_iterations=100 ) f = curve_fitting.univariate_polynomial(*fit.params) return flex.exp(f(x))
def fvec_callable(pfh,current_values): rotx = current_values[0] roty = current_values[1] ry = spot_radius rz = spot_radius G = scale_factors[0] B_factor = scale_factors[1] I_obs = observations_original_sel.data() sigI_obs = observations_original_sel.sigmas() observations_original_sel_two_theta = observations_original_sel.two_theta(wavelength=wavelength) two_theta = observations_original_sel_two_theta.data() sin_theta_over_lambda_sq = observations_original_sel_two_theta.sin_theta_over_lambda_sq().data() effective_orientation = crystal_init_orientation.rotate_thru((1,0,0),rotx ).rotate_thru((0,1,0),roty) effective_a_star = sqr(effective_orientation.reciprocal_matrix()) ph = partiality_handler(wavelength, 0) partiality = ph.calc_partiality_anisotropy_set(effective_a_star, observations_original_sel.indices(), ry, rz, two_theta) excursions = (((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs)/partiality) - I_ref) / sigI_obs corr_now, slope_now = get_overall_correlation((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs)/partiality, I_ref) """ print "ROTATION G=%5.3f B_factor=%5.3f rotx=%6.5f roty=%6.5f ry=%6.5f rz=%6.5f J=%6.3f cc=%6.3f slope=%6.3f p_mean=%6.3f"% \ (G, B_factor, rotx*180/math.pi, roty*180/math.pi, ry, rz, sum(excursions**2), corr_now, slope_now, flex.mean(partiality)) plt.scatter(I_ref,(G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs)/partiality,s=10, marker='x', c='r') plt.title('J=%6.5f CC=%6.5f Slope=%6.5f'%(sum(excursions**2), corr_now, slope_now)) plt.xlabel('Reference intensity') plt.ylabel('Observed intensity (scaled)') plt.show() """ return excursions
def compute_cost_refine_scale(self, I_ref, observations_original_sel, wavelength, parameters): G = parameters[0] B_factor = parameters[1] I_obs = observations_original_sel.data() sigI_obs = observations_original_sel.sigmas() observations_original_sel_two_theta = observations_original_sel.two_theta(wavelength=wavelength) two_theta = observations_original_sel_two_theta.data() sin_theta_over_lambda_sq = observations_original_sel_two_theta.sin_theta_over_lambda_sq().data() excursions = ((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs) - I_ref) / sigI_obs return excursions
def f_ordered_solvent(f, n_water_atoms_absent, bf_atoms_absent, absent_atom_type): nsym = f.space_group().order_z() n_lost_w = nsym * n_water_atoms_absent data = f.data() * n_lost_w d_star_sq_data = f.d_star_sq().data() table = wk1995(absent_atom_type).fetch() ff = table.at_d_star_sq(d_star_sq_data) factor = ff * flex.exp(-bf_atoms_absent/4.0*d_star_sq_data) f_by_m = miller.array(miller_set = f, data = data*factor) return f_by_m
def __init__(self, scattering_info, d_star_sq_array, p_scale=0.0, b_wilson=0.0, magic_fudge_factor=2.0): ## First recompute some parameters scattering_info.scat_data(d_star_sq_array) ## mean intensity self.mean_intensity = scattering_info.sigma_tot_sq self.mean_intensity = self.mean_intensity*(1.0+scattering_info.gamma_tot) ## I am missing a factor 2 somewhere self.mean_intensity/=magic_fudge_factor self.mean_intensity=self.mean_intensity*flex.exp( -d_star_sq_array*b_wilson/2.0) self.mean_intensity*=math.exp(-p_scale) ## the associated standard deviation self.sigma_intensity = scattering_info.gamma_tot_sigma self.sigma_intensity = scattering_info.sigma_tot_sq*self.sigma_intensity self.sigma_intensity = self.sigma_intensity*flex.exp( -d_star_sq_array*b_wilson/2.0) self.sigma_intensity*= math.exp(-p_scale)
def alpha_beta(f_dist, n_atoms_included, n_nonwater_atoms_absent, n_water_atoms_absent, bf_atoms_absent, final_error, absent_atom_type): nsym = f_dist.space_group().order_z() ss = 1./flex.pow2(f_dist.d_spacings().data()) n_part = nsym * n_atoms_included n_lost_p = nsym * n_nonwater_atoms_absent n_lost_w = nsym * n_water_atoms_absent f_dist_data = flex.abs(f_dist.data()) a_d = flex.exp( -0.25 * ss * final_error**2 * math.pi**3 ) d_star_sq_data = f_dist.d_star_sq().data() assert approx_equal(ss,d_star_sq_data) table = wk1995(absent_atom_type).fetch() ff = table.at_d_star_sq(d_star_sq_data) factor = ff * flex.exp(-bf_atoms_absent/4.0*d_star_sq_data) b_d = ((1.-a_d*a_d)*n_part+n_lost_p+n_lost_w*(1.-f_dist_data*f_dist_data))*\ factor*factor alpha = f_dist.array(data = a_d) beta = f_dist.array(data = b_d) return alpha, beta
def sharp_map(sites_frac, map_coeffs, ss = None, b_sharp=None, b_min = -150, b_max = 150, step = 10): if(ss is None): ss = 1./flex.pow2(map_coeffs.d_spacings().data()) / 4. from cctbx import miller if(b_sharp is None): t=-1 map_coeffs_best = None b_sharp_best = None for b_sharp in range(b_min,b_max,step): map_coeffs_ = map_coeffs.deep_copy() sc2 = flex.exp(b_sharp*ss) map_coeffs_ = map_coeffs_.customized_copy(data = map_coeffs_.data()*sc2) t_=sharp_evaluation_target(sites_frac=sites_frac, map_coeffs=map_coeffs_) if(t_>t): t=t_ b_sharp_best = b_sharp map_coeffs_best = map_coeffs_.deep_copy() print "b_sharp:", b_sharp_best, t else: scale = flex.exp(b_sharp*ss) map_coeffs_best = map_coeffs.customized_copy(data=map_coeffs.data()*scale) b_sharp_best = b_sharp return map_coeffs_best, b_sharp_best
def random_data(B_add=35, n_residues=585.0, d_min=3.5): unit_cell = uctbx.unit_cell( (81.0, 81.0, 61.0, 90.0, 90.0, 120.0) ) xtal = crystal.symmetry(unit_cell, " P 3 ") ## In P3 I do not have to worry about centrics or reflections with different ## epsilons. miller_set = miller.build_set( crystal_symmetry = xtal, anomalous_flag = False, d_min = d_min) ## Now make an array with d_star_sq values d_star_sq = miller_set.d_spacings().data() d_star_sq = 1.0/(d_star_sq*d_star_sq) asu = {"H":8.0*n_residues*1.0, "C":5.0*n_residues*1.0, "N":1.5*n_residues*1.0, "O":1.2*n_residues*1.0} scat_info = absolute_scaling.scattering_information( asu_contents = asu, fraction_protein=1.0, fraction_nucleic=0.0) scat_info.scat_data(d_star_sq) gamma_prot = scat_info.gamma_tot sigma_prot = scat_info.sigma_tot_sq ## The number of residues is multriplied by the Z of the spacegroup protein_total = sigma_prot * (1.0+gamma_prot) ## add a B-value of 35 please protein_total = protein_total*flex.exp(-B_add*d_star_sq/2.0) ## Now that has been done, ## We can make random structure factors normalised_random_intensities = \ random_transform.wilson_intensity_variate(protein_total.size()) random_intensities = normalised_random_intensities*protein_total*math.exp(6) std_dev = random_intensities*5.0/100.0 noise = random_transform.normal_variate(N=protein_total.size()) noise = noise*std_dev random_intensities=noise+random_intensities ## STuff the arrays in the miller array miller_array = miller.array(miller_set, data=random_intensities, sigmas=std_dev) miller_array=miller_array.set_observation_type( xray.observation_types.intensity()) miller_array = miller_array.f_sq_as_f() return (miller_array)
def find_b(fo, fc): # TODO: Need to use linear #tmp(f=fo) #fo=fo.resolution_filter(d_min=5) #fo, fc, = fo.common_sets(fc) cc = -999 b = None ss = 1. / flex.pow2(fc.d_spacings().data()) / 4. data = fc.data() for b_ in range(-500, 500, 5): sc = flex.exp(-b_ * ss) fc_ = fc.customized_copy(data=data * sc) cc_ = fo.map_correlation(other=fc_) if (cc_ > cc): cc = cc_ b = b_ return b
def extreme_wilson_outliers(self, p_extreme_wilson=1e-1, return_data=False): n_acentric = self.acentric_work.data().size() n_centric = self.centric_work.data().size() extreme_acentric = 1.0 - \ flex.pow(1.0 - flex.exp(-self.acentric_work.data() ),float(n_acentric)) extreme_centric = 1.0 - \ flex.pow(erf(flex.sqrt(self.centric_work.data()/2.0) ),float(n_centric)) acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson) centric_selection = flex.bool(extreme_centric > p_extreme_wilson) all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection)) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=extreme_acentric.concatenate(extreme_centric)) all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) log_string = """ Outlier rejection based on extreme value Wilson statistics. ----------------------------------------------------------- Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. The p-value is obtained using extreme value distributions of the Wilson distribution. """ % (p_extreme_wilson) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >> self.out print >> self.out, log_string print >> self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def random_data(B_add=35, n_residues=585.0, d_min=3.5): unit_cell = uctbx.unit_cell((81.0, 81.0, 61.0, 90.0, 90.0, 120.0)) xtal = crystal.symmetry(unit_cell, " P 3 ") ## In P3 I do not have to worry about centrics or reflections with different ## epsilons. miller_set = miller.build_set(crystal_symmetry=xtal, anomalous_flag=False, d_min=d_min) ## Now make an array with d_star_sq values d_star_sq = miller_set.d_spacings().data() d_star_sq = 1.0 / (d_star_sq * d_star_sq) asu = { "H": 8.0 * n_residues * 1.0, "C": 5.0 * n_residues * 1.0, "N": 1.5 * n_residues * 1.0, "O": 1.2 * n_residues * 1.0 } scat_info = absolute_scaling.scattering_information(asu_contents=asu, fraction_protein=1.0, fraction_nucleic=0.0) scat_info.scat_data(d_star_sq) gamma_prot = scat_info.gamma_tot sigma_prot = scat_info.sigma_tot_sq ## The number of residues is multriplied by the Z of the spacegroup protein_total = sigma_prot * (1.0 + gamma_prot) ## add a B-value of 35 please protein_total = protein_total * flex.exp(-B_add * d_star_sq / 2.0) ## Now that has been done, ## We can make random structure factors normalised_random_intensities = \ random_transform.wilson_intensity_variate(protein_total.size()) random_intensities = normalised_random_intensities * protein_total * math.exp( 6) std_dev = random_intensities * 5.0 / 100.0 noise = random_transform.normal_variate(N=protein_total.size()) noise = noise * std_dev random_intensities = noise + random_intensities ## STuff the arrays in the miller array miller_array = miller.array(miller_set, data=random_intensities, sigmas=std_dev) miller_array = miller_array.set_observation_type( xray.observation_types.intensity()) miller_array = miller_array.f_sq_as_f() return (miller_array)
def extreme_wilson_outliers(self, p_extreme_wilson=1e-1, return_data=False): n_acentric = self.acentric_work.data().size() n_centric = self.centric_work.data().size() extreme_acentric = 1.0 - flex.pow(1.0 - flex.exp(-self.acentric_work.data()), float(n_acentric)) extreme_centric = 1.0 - flex.pow(erf(flex.sqrt(self.centric_work.data() / 2.0)), float(n_centric)) acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson) centric_selection = flex.bool(extreme_centric > p_extreme_wilson) all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection), ) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=extreme_acentric.concatenate(extreme_centric), ) all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) log_string = """ Outlier rejection based on extreme value Wilson statistics. ----------------------------------------------------------- Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. The p-value is obtained using extreme value distributions of the Wilson distribution. """ % ( p_extreme_wilson ) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >>self.out print >>self.out, log_string print >>self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def basic_wilson_outliers(self, p_basic_wilson=1e-6, return_data=False): p_acentric_single = 1.0 - (1.0 - flex.exp(-self.acentric_work.data())) p_centric_single = 1.0 - erf(flex.sqrt(self.centric_work.data() / 2.0)) acentric_selection = flex.bool(p_acentric_single > p_basic_wilson) centric_selection = flex.bool(p_centric_single > p_basic_wilson) # combine all in a single miller array all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection), ) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=p_acentric_single.concatenate(p_centric_single), ) # get the order right all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) # prepare a table with results please log_string = """ Outlier rejection based on basic Wilson statistics. -------------------------------------------------- See Read, Acta Cryst. (1999). D55, 1759-1764. for details. Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. """ % ( p_basic_wilson ) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >>self.out print >>self.out, log_string print >>self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def basic_wilson_outliers(self, p_basic_wilson=1E-6, return_data=False): p_acentric_single = 1.0 - (1.0 - flex.exp(-self.acentric_work.data())) p_centric_single = 1.0 - erf(flex.sqrt(self.centric_work.data() / 2.0)) acentric_selection = flex.bool(p_acentric_single > p_basic_wilson) centric_selection = flex.bool(p_centric_single > p_basic_wilson) # combine all in a single miller array all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection)) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=p_acentric_single.concatenate(p_centric_single)) # get the order right all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) # prepare a table with results please log_string = """ Outlier rejection based on basic Wilson statistics. -------------------------------------------------- See Read, Acta Cryst. (1999). D55, 1759-1764. for details. Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. """ % (p_basic_wilson) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >> self.out print >> self.out, log_string print >> self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def __init__(self, miller_array, kernel_width=None, n_bins=23, n_term=13, d_star_sq_low=None, d_star_sq_high=None, auto_kernel=False, number_of_sorted_reflections_for_auto_kernel=50): ## Autokernel is either False, true or a specific integer if kernel_width is None: assert (auto_kernel is not False) if auto_kernel is not False: assert (kernel_width==None) assert miller_array.size()>0 ## intensity arrays please work_array = None if not miller_array.is_real_array(): raise RuntimeError("Please provide real arrays only") ## I might have to change this upper condition if miller_array.is_xray_amplitude_array(): work_array = miller_array.f_as_f_sq() if miller_array.is_xray_intensity_array(): work_array = miller_array.deep_copy() work_array = work_array.set_observation_type(miller_array) ## If type is not intensity or amplitude ## raise an execption please if not miller_array.is_xray_intensity_array(): if not miller_array.is_xray_amplitude_array(): raise RuntimeError("Observation type unknown") ## declare some shorthands I_obs = work_array.data() epsilons = work_array.epsilons().data().as_double() d_star_sq_hkl = work_array.d_spacings().data() d_star_sq_hkl = 1.0/(d_star_sq_hkl*d_star_sq_hkl) ## Set up some limits if d_star_sq_low is None: d_star_sq_low = flex.min(d_star_sq_hkl) if d_star_sq_high is None: d_star_sq_high = flex.max(d_star_sq_hkl) ## A feeble attempt to determine an appropriate kernel width ## that seems to work reasonable in practice self.kernel_width=kernel_width if auto_kernel is not False: ## get the d_star_sq_array and sort it sort_permut = flex.sort_permutation(d_star_sq_hkl) ## if auto_kernel==True: number=number_of_sorted_reflections_for_auto_kernel else: number=int(auto_kernel) if number > d_star_sq_hkl.size(): number = d_star_sq_hkl.size()-1 self.kernel_width = d_star_sq_hkl[sort_permut[number]]-d_star_sq_low assert self.kernel_width > 0 ## Making the d_star_sq_array assert (n_bins>1) ## assure that there are more then 1 bins for interpolation self.d_star_sq_array = chebyshev_lsq_fit.chebyshev_nodes( n=n_bins, low=d_star_sq_low, high=d_star_sq_high, include_limits=True) ## Now get the average intensity please ## ## This step can be reasonably time consuming self.mean_I_array = scaling.kernel_normalisation( d_star_sq_hkl = d_star_sq_hkl, I_hkl = I_obs, epsilon = epsilons, d_star_sq_array = self.d_star_sq_array, kernel_width = self.kernel_width ) self.var_I_array = scaling.kernel_normalisation( d_star_sq_hkl = d_star_sq_hkl, I_hkl = I_obs*I_obs, epsilon = epsilons*epsilons, d_star_sq_array = self.d_star_sq_array, kernel_width = self.kernel_width ) self.var_I_array = self.var_I_array - self.mean_I_array*self.mean_I_array self.weight_sum = self.var_I_array = scaling.kernel_normalisation( d_star_sq_hkl = d_star_sq_hkl, I_hkl = I_obs*0.0+1.0, epsilon = epsilons*0.0+1.0, d_star_sq_array = self.d_star_sq_array, kernel_width = self.kernel_width ) eps = 1e-16 # XXX Maybe this should be larger? self.bin_selection = (self.mean_I_array > eps) sel_pos = self.bin_selection.iselection() # FIXME rare bug: this crashes when the majority of the data are zero, # e.g. because resolution limit was set too high and F/I filled in with 0. # it would be good to catch such cases in advance by inspecting the binned # values, and raise a different error message. assert sel_pos.size() > 0 if (sel_pos.size() < self.mean_I_array.size() / 2) : raise Sorry("Analysis could not be continued because more than half "+ "of the data have values below 1e-16. This usually indicates either "+ "an inappropriately high resolution cutoff, or an error in the data "+ "file which artificially creates a higher resolution limit.") self.mean_I_array = self.mean_I_array.select(sel_pos) self.d_star_sq_array = self.d_star_sq_array.select(sel_pos) self.var_I_array = flex.log( self.var_I_array.select( sel_pos ) ) self.weight_sum = self.weight_sum.select(sel_pos) self.mean_I_array = flex.log( self.mean_I_array ) ## Fit a chebyshev polynome please normalizer_fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit( n_term, self.d_star_sq_array, self.mean_I_array ) self.normalizer = chebyshev_polynome( n_term, d_star_sq_low, d_star_sq_high, normalizer_fit_lsq.coefs) var_lsq_fit = chebyshev_lsq_fit.chebyshev_lsq_fit( n_term, self.d_star_sq_array, self.var_I_array ) self.var_norm = chebyshev_polynome( n_term, d_star_sq_low, d_star_sq_high, var_lsq_fit.coefs) ws_fit = chebyshev_lsq_fit.chebyshev_lsq_fit( n_term, self.d_star_sq_array, self.weight_sum ) self.weight_sum = chebyshev_polynome( n_term, d_star_sq_low, d_star_sq_high, ws_fit.coefs) ## The data wil now be normalised using the ## chebyshev polynome we have just obtained self.mean_I_array = flex.exp( self.mean_I_array) self.normalizer_for_miller_array = flex.exp( self.normalizer.f(d_star_sq_hkl) ) self.var_I_array = flex.exp( self.var_I_array ) self.var_norm = flex.exp( self.var_norm.f(d_star_sq_hkl) ) self.weight_sum = flex.exp( self.weight_sum.f(d_star_sq_hkl)) self.normalised_miller = None self.normalised_miller_dev_eps = None if work_array.sigmas() is not None: self.normalised_miller = work_array.customized_copy( data = work_array.data()/self.normalizer_for_miller_array, sigmas = work_array.sigmas()/self.normalizer_for_miller_array ).set_observation_type(work_array) self.normalised_miller_dev_eps = self.normalised_miller.customized_copy( data = self.normalised_miller.data()/epsilons, sigmas = self.normalised_miller.sigmas()/epsilons)\ .set_observation_type(work_array) else: self.normalised_miller = work_array.customized_copy( data = work_array.data()/self.normalizer_for_miller_array ).set_observation_type(work_array) self.normalised_miller_dev_eps = self.normalised_miller.customized_copy( data = self.normalised_miller.data()/epsilons)\ .set_observation_type(work_array)
def calc_full_refl(self, I_o_p_set, sin_theta_over_lambda_sq_set, G, B, p_set, rs_set, flag_volume_correction=True): I_o_full_set = I_o_p_set/(G * flex.exp(-2*B*sin_theta_over_lambda_sq_set) * p_set) return I_o_full_set
def ls_ff_weights(f_obs, atom, B): d_star_sq_data = f_obs.d_star_sq().data() table = wk1995(atom).fetch() ff = table.at_d_star_sq(d_star_sq_data) * flex.exp(-B/4.0*d_star_sq_data) weights = 1.0/flex.pow2(ff) return weights
def do_something_clever(self,obs,sobs,calc,mock): # first get the sort order # sort on the calculated data please sort_order = flex.sort_permutation( calc ) inverse_sort_order = sort_order.inverse_permutation() sorted_obs = obs.select(sort_order) sorted_sobs = sobs.select(sort_order) sorted_calc = calc.select(sort_order) sorted_mock = mock.select(sort_order) log_calc = flex.log(sorted_mock) deltas = flex.log(sorted_obs) - flex.log(sorted_calc) old_deltas = deltas.deep_copy() # make bins on the basis of the order bin_size = float(sorted_obs.size())/self.n_e_bins bin_size = int(bin_size) + 1 ebin = flex.int() count=0 for ii in xrange( sorted_obs.size() ): if ii%bin_size==0: count+=1 ebin.append( count-1 ) # the bins have been setup, now we can reorder stuff for ibin in xrange(self.n_e_bins): this_bin_selection = flex.bool( ebin == ibin ) tmp_n = (this_bin_selection).count(True) permute = flex.sort_permutation( flex.random_double( tmp_n ) ) #select and swap selected_deltas = deltas.select( this_bin_selection ) selected_deltas = selected_deltas.select( permute ) selected_sobs = sorted_sobs.select( this_bin_selection ) selected_sobs = selected_sobs.select( permute ) # we have to make a sanity check so that the selected deltas are not very weerd # a safeguard to prevent the introductoin of outliers mean_delta = flex.mean( selected_deltas ) std_delta = math.sqrt( flex.mean( selected_deltas*selected_deltas ) - mean_delta*mean_delta ) outliers = flex.bool( flex.abs(selected_deltas-mean_delta)>self.thres*std_delta ) #print list( flex.abs(selected_deltas-mean_delta)/std_delta ) #print list( outliers ) if (outliers).count(True) > 0 : non_out_delta = selected_deltas.select( ~outliers ) tmp_permut = flex.sort_permutation( flex.random_double( (~outliers).count(True) ) ) tmp_delta = non_out_delta.select( tmp_permut ) tmp_delta = tmp_delta[0:(outliers).count(True)] selected_deltas = selected_deltas.set_selected( outliers.iselection(), tmp_delta ) #set the deltas back please deltas = deltas.set_selected(this_bin_selection, selected_deltas) sorted_sobs = sorted_sobs.set_selected(this_bin_selection, selected_sobs) #the deltas have been swapped, apply things back please log_calc = log_calc + deltas log_calc = flex.exp(log_calc) #now we have to get things back in proper order again thank you new_fobs = log_calc.select(inverse_sort_order) new_sobs = sorted_sobs.select(inverse_sort_order) return new_fobs, new_sobs
print "Scale for", Is[i][0], "is", scale print Is[0][1].data().size() # Prepare plot data for_plot = OrderedDict() # {name: [mean, ...], ..} binner = Is[0][1].setup_binner(n_bins=params.nbins)#reflections_per_bin=50) for i_bin in binner.range_used(): for name, I, (scale, b) in Is: dmax, dmin = binner.bin_d_range(i_bin) Isel = I.resolution_filter(d_max=dmax, d_min=dmin) #Isel = I.select(binner.bin_indices() == i_bin) # crash if not common if params.over_sigma: data = Isel.data() / Isel.sigmas() else: bfac = flex.exp(-b * Isel.d_star_sq().data()) if b != 0 else 1. data = Isel.data() *scale*bfac if len(data)==0: print "WARNING: ", name, "No data in %f .. %f" % binner.bin_d_range(i_bin) for_plot.setdefault(name, []).append(float("nan")) elif params.logscale: for_plot.setdefault(name, []).append(math.log(flex.mean(data))) # taking log<I> else: for_plot.setdefault(name, []).append(flex.mean(data)) # If only two data in, calc CC. extra = [] if len(Is) == 2 and params.extra.lower() != "no": for i_bin in binner.range_used(): dmax, dmin = binner.bin_d_range(i_bin) #Isel0, Isel1 = map(lambda x:x[1].select(binner.bin_indices() == i_bin), Is)
def run(params, xfiles): # read reference arrays = iotbx.file_reader.any_file(params.reference.file).file_server.miller_arrays arrays = filter(lambda ar: ar.is_xray_data_array(), arrays) if params.reference.label is not None: arrays = filter(lambda ar: ar.info().label_string() == params.reference.label, arrays) if len(arrays) != 1: print "Can't decide data to use in reference file:", params.reference.file print "Choose label" for ar in arrays: print ar.info().label_string() return refdata = arrays[0].as_intensity_array() refdata = refdata.resolution_filter(d_max=params.reference.d_max, d_min=params.reference.d_min) print "file n.common k b cc.org cc.mean cc.scaled a b c al be ga" for xf in xfiles: print "# Reading", xf try: xfile = DenzoXfile(xf) except: traceback.print_exc() continue a = xfile.miller_array(anomalous_flag=refdata.anomalous_flag()) a = a.select(a.sigmas() > 0) a = a.resolution_filter(d_min=params.d_min, d_max=params.d_max) if params.sigma_cutoff is not None: a = a.select(a.data()/a.sigmas() >= params.sigma_cutoff) a = a.merge_equivalents(use_internal_variance=False).array() tmp, a = refdata.common_sets(a, assert_is_similar_symmetry=False) n_common = tmp.size() if n_common == 0: print "# No useful reflection in this file. skip." continue corr = flex.linear_correlation(tmp.data(), a.data()) cc_org = corr.coefficient() if corr.is_well_defined() else float("nan") # Calc CC in resolution bin and average tmp.setup_binner(auto_binning=True) cc_bins = [] for i_bin in tmp.binner().range_used(): sel = tmp.binner().selection(i_bin) corr = flex.linear_correlation(tmp.select(sel).data(), a.select(sel).data()) if not corr.is_well_defined(): continue cc_bins.append(corr.coefficient()) cc_mean = sum(cc_bins) / float(len(cc_bins)) if len(cc_bins) > 0 else float("nan") # Determine scale and B k, b = kBdecider(tmp, a).run() bfac = flex.exp(-b * a.d_star_sq().data()) if b != 0 else 1. corr = flex.linear_correlation(tmp.data(), a.data() * k*bfac) cc_scaled = corr.coefficient() if corr.is_well_defined() else float("nan") print "%s %5d %.3e %.3e %.4f %.4f %.4f" % (xf, n_common, k, b, cc_org, cc_mean, cc_scaled), print ("%.3f "*6)%a.unit_cell().parameters() if params.show_plot: import pylab from matplotlib.ticker import FuncFormatter s3_formatter = lambda x,pos: "inf" if x == 0 else "%.2f" % (x**(-1/3)) fig, ax1 = pylab.plt.subplots() plot_x = map(lambda i: tmp.binner().bin_d_range(i)[1]**(-3), tmp.binner().range_used()) #for name, ar in (("reference", tmp), ("data", a)): vals = map(lambda i: flex.mean(tmp.data().select(tmp.binner().selection(i))), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="reference") scale = flex.sum(tmp.data()*a.data()) / flex.sum(flex.pow2(a.data())) print "Linear-scale=", scale vals = map(lambda i: scale*flex.mean(a.data().select(tmp.binner().selection(i))), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data") vals = map(lambda i: flex.mean((a.data()*k*bfac).select(tmp.binner().selection(i))), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data_scaled") """ from mmtbx.scaling import absolute_scaling, relative_scaling ls_scaling = relative_scaling.ls_rel_scale_driver(tmp, tmp.customized_copy(data=a.data(),sigmas=a.sigmas()), use_intensities=True, scale_weight=True, use_weights=True) ls_scaling.show() vals = map(lambda i: flex.mean(ls_scaling.derivative.resolution_filter(*tmp.binner().bin_d_range(i)).data()), tmp.binner().range_used()) pylab.plot(plot_x, vals, label="data_scaled2") """ pylab.legend() pylab.xlabel('resolution (d^-3)') pylab.ylabel('<I>') pylab.setp(pylab.gca().get_legend().get_texts(), fontsize="small") pylab.title('Scaled with B-factors (%.2f)' % b) pylab.gca().xaxis.set_major_formatter(FuncFormatter(s3_formatter)) ax2 = ax1.twinx() ax2.plot(plot_x, cc_bins, "black") ax2.set_ylabel('CC') pylab.show()
def run(params, mtzfiles): arrays = get_arrays(mtzfiles, d_min=params.dmin, d_max=params.dmax) if params.take_common: arrays = commonalize(arrays) maxlen_f = max(map(lambda x: len(x[0]), arrays)) ref_f_obs = arrays[0][1] scales = [] for f, f_obs, f_model, flag in arrays: if ref_f_obs == f_obs: k, B = 1., 0 else: k, B = kBdecider(ref_f_obs, f_obs).run() scales.append((k, B)) if params.reference != "first": if params.reference == "bmin": # scale to strongest kref, bref = max(scales, key=lambda x:x[1]) elif params.reference == "bmax": # scale to most weak kref, bref = min(scales, key=lambda x:x[1]) elif params.reference == "bmed": # scale to most weak perm = range(len(scales)) perm.sort(key=lambda i:scales[i][1]) kref, bref = scales[perm[len(perm)//2]] else: raise "Never reaches here" print "# Set K=%.2f B=%.2f as reference" % (kref,bref) scales = map(lambda x: (x[0]/kref, x[1]-bref), scales) # not bref-x[1], because negated later print ("%"+str(maxlen_f)+"s r_work r_free cc_work.E cc_free.E sigmaa fom k B") % "filename" for (f, f_obs, f_model, flag), (k, B) in zip(arrays, scales): d_star_sq = f_obs.d_star_sq().data() scale = k * flex.exp(-B*d_star_sq) # Normalized #f_obs.setup_binner(auto_binning=True) #f_model.setup_binner(auto_binning=True) #e_obs, e_model = map(lambda x:x.quasi_normalize_structure_factors(), (f_obs, f_model)) e_obs = absolute_scaling.kernel_normalisation(f_obs.customized_copy(data=f_obs.data()*scale, sigmas=None), auto_kernel=True) e_obs = e_obs.normalised_miller_dev_eps.f_sq_as_f() e_model = absolute_scaling.kernel_normalisation(f_model.customized_copy(data=f_model.data()*scale, sigmas=None), auto_kernel=True) e_model = e_model.normalised_miller_dev_eps.f_sq_as_f() f_obs_w, f_obs_t = f_obs.select(~flag.data()), f_obs.select(flag.data()) f_model_w, f_model_t = f_model.select(~flag.data()), f_model.select(flag.data()) e_obs_w, e_obs_t = e_obs.select(~flag.data()), e_obs.select(flag.data()) e_model_w, e_model_t = e_model.select(~flag.data()), e_model.select(flag.data()) r_work = calc_r(f_obs_w, f_model_w, scale.select(~flag.data())) r_free = calc_r(f_obs_t, f_model_t, scale.select(flag.data())) cc_work_E = calc_cc(e_obs_w, e_model_w, False) cc_free_E = calc_cc(e_obs_t, e_model_t, False) #cc_work_E2 = calc_cc(e_obs_w, e_model_w, True) #cc_free_E2 = calc_cc(e_obs_t, e_model_t, True) se = calc_sigmaa(f_obs, f_model, flag) sigmaa = flex.mean(se.sigmaa().data()) fom = flex.mean(se.fom().data()) print ("%"+str(maxlen_f)+"s %.4f %.4f % 7.4f % 7.4f %.4e %.4e %.3e %.3e") % (f, r_work, r_free, cc_work_E, cc_free_E, sigmaa, fom, k, B)
def do_something_clever(self, obs, sobs, calc, mock): # first get the sort order # sort on the calculated data please sort_order = flex.sort_permutation(calc) inverse_sort_order = sort_order.inverse_permutation() sorted_obs = obs.select(sort_order) sorted_sobs = sobs.select(sort_order) sorted_calc = calc.select(sort_order) sorted_mock = mock.select(sort_order) log_calc = flex.log(sorted_mock) deltas = flex.log(sorted_obs) - flex.log(sorted_calc) old_deltas = deltas.deep_copy() # make bins on the basis of the order bin_size = float(sorted_obs.size()) / self.n_e_bins bin_size = int(bin_size) + 1 ebin = flex.int() count = 0 for ii in range(sorted_obs.size()): if ii % bin_size == 0: count += 1 ebin.append(count - 1) # the bins have been setup, now we can reorder stuff for ibin in range(self.n_e_bins): this_bin_selection = flex.bool(ebin == ibin) tmp_n = (this_bin_selection).count(True) permute = flex.sort_permutation(flex.random_double(tmp_n)) #select and swap selected_deltas = deltas.select(this_bin_selection) selected_deltas = selected_deltas.select(permute) selected_sobs = sorted_sobs.select(this_bin_selection) selected_sobs = selected_sobs.select(permute) # we have to make a sanity check so that the selected deltas are not very weerd # a safeguard to prevent the introductoin of outliers mean_delta = flex.mean(selected_deltas) std_delta = math.sqrt( flex.mean(selected_deltas * selected_deltas) - mean_delta * mean_delta) outliers = flex.bool( flex.abs(selected_deltas - mean_delta) > self.thres * std_delta) #print list( flex.abs(selected_deltas-mean_delta)/std_delta ) #print list( outliers ) if (outliers).count(True) > 0: non_out_delta = selected_deltas.select(~outliers) tmp_permut = flex.sort_permutation( flex.random_double((~outliers).count(True))) tmp_delta = non_out_delta.select(tmp_permut) tmp_delta = tmp_delta[0:(outliers).count(True)] selected_deltas = selected_deltas.set_selected( outliers.iselection(), tmp_delta) #set the deltas back please deltas = deltas.set_selected(this_bin_selection, selected_deltas) sorted_sobs = sorted_sobs.set_selected(this_bin_selection, selected_sobs) #the deltas have been swapped, apply things back please log_calc = log_calc + deltas log_calc = flex.exp(log_calc) #now we have to get things back in proper order again thank you new_fobs = log_calc.select(inverse_sort_order) new_sobs = sorted_sobs.select(inverse_sort_order) return new_fobs, new_sobs
def calc_average_I_sigI(self, I, sigI, G, B, p, SE_I, sin_theta_over_lambda_sq, avg_mode, SE, iph, d_spacings): for i in range(len(d_spacings)): if d_spacings[i] < iph.d_min_partiality: p[i] = 1.0 I_full = I / (G * flex.exp(-2 * B * sin_theta_over_lambda_sq) * p) sigI_full = sigI / (G * flex.exp(-2 * B * sin_theta_over_lambda_sq) * p) # filter out outliers if np.std(I_full) > 0: I_full_as_sigma = (I_full - np.mean(I_full)) / np.std(I_full) i_sel = flex.abs(I_full_as_sigma) <= iph.sigma_max_merge I_full = I_full.select(i_sel) sigI_full = sigI_full.select(i_sel) SE = SE.select(i_sel) # normalize the SE max_w = 1.0 min_w = 0.6 if len(SE) == 1 or ((flex.min(SE) - flex.max(SE)) == 0): SE_norm = flex.double([min_w + ((max_w - min_w) / 2)] * len(SE)) else: m = (max_w - min_w) / (flex.min(SE) - flex.max(SE)) b = max_w - (m * flex.min(SE)) SE_norm = (m * SE) + b if avg_mode == "weighted": I_avg = flex.sum(SE_norm * I_full) / flex.sum(SE_norm) sigI_avg = flex.sum(SE_norm * sigI_full) / flex.sum(SE_norm) elif avg_mode == "average": I_avg = flex.mean(I_full) sigI_avg = flex.mean(sigI_full) # Rmeas, Rmeas_w, multiplicity multiplicity = len(I_full) if multiplicity == 1: r_meas_w_top = 0 r_meas_w_btm = 0 r_meas_top = 0 r_meas_btm = 0 else: n_obs = multiplicity r_meas_w_top = flex.sum(((I_full - I_avg) * SE_norm) ** 2) * math.sqrt(n_obs / (n_obs - 1)) r_meas_w_btm = flex.sum((I_full * SE_norm) ** 2) r_meas_top = flex.sum((I_full - I_avg) ** 2) * math.sqrt(n_obs / (n_obs - 1)) r_meas_btm = flex.sum((I_full) ** 2) # for calculattion of cc1/2 # sepearte the observations into two groups if multiplicity == 1: I_avg_even = 0 I_avg_odd = 0 else: i_even = range(0, len(I_full), 2) i_odd = range(1, len(I_full), 2) I_even = I_full.select(i_even) sigI_even = sigI_full.select(i_even) SE_norm_even = SE_norm.select(i_even) I_odd = I_full.select(i_odd) sigI_odd = sigI_full.select(i_odd) SE_norm_odd = SE_norm.select(i_odd) if len(i_even) > len(i_odd): I_odd.append(I_even[len(I_even) - 1]) sigI_odd.append(sigI_even[len(I_even) - 1]) SE_norm_odd.append(SE_norm_even[len(I_even) - 1]) if avg_mode == "weighted": I_avg_even = flex.sum(SE_norm_even * I_even) / flex.sum(SE_norm_even) I_avg_odd = flex.sum(SE_norm_odd * I_odd) / flex.sum(SE_norm_odd) elif avg_mode == "average": I_avg_even = flex.mean(I_even) I_avg_odd = flex.mean(I_odd) return ( I_avg, sigI_avg, (r_meas_w_top, r_meas_w_btm, r_meas_top, r_meas_btm, multiplicity), I_avg_even, I_avg_odd, )
def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, html_maker=None): self.clusters = {} prefix = os.path.join(self.wdir, "cctable") assert (b_scale, use_normalized).count(True) <= 1 if len(self.arrays) < 2: print "WARNING: less than two data! can't do cc-based clustering" self.clusters[1] = [float("nan"), [0]] return # Absolute scaling using Wilson-B factor if b_scale: from mmtbx.scaling.matthews import p_vm_calculator from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling ofs_wilson = open("%s_wilson_scales.dat"%prefix, "w") n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess ofs_wilson.write("# guessed n_residues= %d\n" % n_residues) ofs_wilson.write("file wilsonB\n") for f in self.arrays: arr = self.arrays[f] iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0) wilson_b = iso_scale_and_b.b_wilson ofs_wilson.write("%s %.3f\n" % (f, wilson_b)) if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..? tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices())/4.) self.arrays[f] = arr.customized_copy(data=arr.data()*tmp, sigmas=arr.sigmas()*tmp) ofs_wilson.close() elif use_normalized: from mmtbx.scaling.absolute_scaling import kernel_normalisation for f in self.arrays: arr = self.arrays[f] normaliser = kernel_normalisation(arr, auto_kernel=True) self.arrays[f] = arr.customized_copy(data=arr.data()/normaliser.normalizer_for_miller_array, sigmas=arr.sigmas()/normaliser.normalizer_for_miller_array) # Prep args = [] for i in xrange(len(self.arrays)-1): for j in xrange(i+1, len(self.arrays)): args.append((i,j)) # Calc all CC worker = lambda x: calc_cc(self.arrays.values()[x[0]], self.arrays.values()[x[1]]) results = easy_mp.pool_map(fixed_func=worker, args=args, processes=nproc) # Check NaN and decide which data to remove idx_bad = {} nans = [] cc_data_for_html = [] for (i,j), (cc,nref) in zip(args, results): cc_data_for_html.append((i,j,cc,nref)) if cc==cc: continue idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 nans.append([i,j]) if html_maker is not None: html_maker.add_cc_clustering_details(cc_data_for_html) idx_bad = idx_bad.items() idx_bad.sort(key=lambda x:x[1]) remove_idxes = set() for idx, badcount in reversed(idx_bad): if len(filter(lambda x: idx in x, nans)) == 0: continue remove_idxes.add(idx) nans = filter(lambda x: idx not in x, nans) if len(nans) == 0: break use_idxes = filter(lambda x: x not in remove_idxes, xrange(len(self.arrays))) # Make table: original index (in file list) -> new index (in matrix) count = 0 org2now = collections.OrderedDict() for i in xrange(len(self.arrays)): if i in remove_idxes: continue org2now[i] = count count += 1 if len(remove_idxes) > 0: open("%s_notused.lst"%prefix, "w").write("\n".join(map(lambda x: self.arrays.keys()[x], remove_idxes))) # Make matrix mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes))) for (i,j), (cc,nref) in zip(args, results): if i in remove_idxes or j in remove_idxes: continue mat[org2now[j], org2now[i]] = cc open("%s.matrix"%prefix, "w").write(" ".join(map(lambda x:"%.4f"%x, mat.flatten()))) ofs = open("%s.dat"%prefix, "w") ofs.write(" i j cc nref\n") for (i,j), (cc,nref) in zip(args, results): ofs.write("%4d %4d %.4f %4d\n" % (i,j,cc,nref)) open("%s_ana.R"%prefix, "w").write("""\ treeToList2 <- function(htree) { # stolen from $CCP4/share/blend/R/blend0.R groups <- list() itree <- dim(htree$merge)[1] for (i in 1:itree) { il <- htree$merge[i,1] ir <- htree$merge[i,2] if (il < 0) lab1 <- htree$labels[-il] if (ir < 0) lab2 <- htree$labels[-ir] if (il > 0) lab1 <- groups[[il]] if (ir > 0) lab2 <- groups[[ir]] lab <- c(lab1,lab2) lab <- as.integer(lab) groups <- c(groups,list(lab)) } return(groups) } cc<-scan("%(prefix)s.matrix") md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE) hc <- hclust(as.dist(md),method="ward") pdf("tree.pdf") plot(hc) dev.off() png("tree.png",height=1000,width=1000) plot(hc) dev.off() hc$labels <- c(%(hclabels)s) groups <- treeToList2(hc) cat("ClNumber Nds Clheight IDs\\n",file="./CLUSTERS.txt") for (i in 1:length(groups)) { sorted_groups <- sort(groups[[i]]) linea <- sprintf("%%04d %%4d %%7.3f %%s\\n", i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" ")) cat(linea, file="./CLUSTERS.txt", append=TRUE) } # reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/ library(rjson) HCtoJSON<-function(hc){ labels<-hc$labels merge<-data.frame(hc$merge) for (i in (1:nrow(merge))) { if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))} else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))} else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))} else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))} } eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")"))) return(JSON) } JSON<-HCtoJSON(hc) cat(JSON, file="dendro.json") q(save="yes") """ % dict(prefix=os.path.basename(prefix), ncol=len(self.arrays), hclabels=",".join(map(lambda x: "%d"%(x+1), org2now.keys())))) call(cmd="Rscript", arg="%s_ana.R" % os.path.basename(prefix), wdir=self.wdir) output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines() for l in output[1:]: sp = l.split() clid, clheight, ids = sp[0], sp[2], sp[3:] self.clusters[int(clid)] = [float(clheight), map(int,ids)]
def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, cluster_method="ward", distance_eqn="sqrt(1-cc)", min_common_refs=3, html_maker=None): """ Using correlation as distance metric (for hierarchical clustering) https://stats.stackexchange.com/questions/165194/using-correlation-as-distance-metric-for-hierarchical-clustering Correlation "Distances" and Hierarchical Clustering http://research.stowers.org/mcm/efg/R/Visualization/cor-cluster/index.htm """ self.clusters = {} prefix = os.path.join(self.wdir, "cctable") assert (b_scale, use_normalized).count(True) <= 1 distance_eqns = { "sqrt(1-cc)": lambda x: numpy.sqrt(1. - x), "1-cc": lambda x: 1. - x, "sqrt(1-cc^2)": lambda x: numpy.sqrt(1. - x**2), } cc_to_distance = distance_eqns[ distance_eqn] # Fail when unknown options assert cluster_method in ("single", "complete", "average", "weighted", "centroid", "median", "ward" ) # available methods in scipy if len(self.arrays) < 2: print "WARNING: less than two data! can't do cc-based clustering" self.clusters[1] = [float("nan"), [0]] return # Absolute scaling using Wilson-B factor if b_scale: from mmtbx.scaling.matthews import p_vm_calculator from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w") n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess ofs_wilson.write("# guessed n_residues= %d\n" % n_residues) ofs_wilson.write("file wilsonB\n") for f in self.arrays: arr = self.arrays[f] iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0) wilson_b = iso_scale_and_b.b_wilson ofs_wilson.write("%s %.3f\n" % (f, wilson_b)) if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..? tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices()) / 4.) self.arrays[f] = arr.customized_copy(data=arr.data() * tmp, sigmas=arr.sigmas() * tmp) ofs_wilson.close() elif use_normalized: from mmtbx.scaling.absolute_scaling import kernel_normalisation failed = {} for f in self.arrays: arr = self.arrays[f] try: normaliser = kernel_normalisation(arr, auto_kernel=True) self.arrays[f] = arr.customized_copy( data=arr.data() / normaliser.normalizer_for_miller_array, sigmas=arr.sigmas() / normaliser.normalizer_for_miller_array) except Exception, e: failed.setdefault(e.message, []).append(f) if failed: msg = "" for r in failed: msg += " %s\n%s\n" % (r, "\n".join( map(lambda x: " %s" % x, failed[r]))) raise Sorry( "intensity normalization failed by following reason(s):\n%s" % msg)
def reverse_reparam(values): return 1.0 / (1.0 + flex.exp(-values))
def compute(self, f_calc, scale_factor=None): self.calculated = f_calc a, b, c, d, e, f = self._params if self._obs_part_dirty: # The part depending only on |F_o|^2 if c == 0: q = None else: exp_args = self.observed.sin_theta_over_lambda_sq().data() if e != 0: k_sqr = exp_args.deep_copy() exp_args *= c exp_vals = flex.exp(exp_args) if c > 0: q = exp_vals else: q = 1 - exp_vals self._q = q if self.observed.sigmas() is not None: self._den_obs = flex.pow2(self.observed.sigmas()) if d != 0: self._den_obs += d if e != 0: e_times_sin_theta_sq = k_sqr e_times_sin_theta_sq *= e * self._wavelength self._den_obs += e_times_sin_theta_sq else: self._den_obs = None negatives = self.observed.data() < 0 self._p_obs = self.observed.data().deep_copy() self._p_obs.set_selected(negatives, 0) self._p_obs *= f # self._obs_part_dirty = False # The part depending on |F_c|^2 as well q = self._q f_c = self.calculated.data() p = flex.norm(f_c) if scale_factor is None: scale_factor = self.observed.scale_factor(self.calculated, cutoff_factor=0.99) self.scale_factor = scale_factor p *= scale_factor p *= 1 - f p += self._p_obs den = p.deep_copy() den *= a * a der = None if self.computing_derivatives_wrt_f_c: der = 2 * den den += b if self.computing_derivatives_wrt_f_c: der += b den *= p if self._den_obs is not None: den += self._den_obs if q is None: w = 1 / den else: w = q / den if self.computing_derivatives_wrt_f_c: if scale_factor is not None: # don't modify f_c in place f_c = f_c * math.sqrt(scale_factor) der *= -flex.pow2(w) der *= 4. / 3 der = der * f_c self.weights = w self.derivatives_wrt_f_c = der
def reverse_reparam(values): return 1.0/(1.0 + flex.exp(-values)) self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array))
def map_coefficients(self, map_type, acentrics_scale = 2.0, centrics_pre_scale = 1.0, exclude_free_r_reflections=False, fill_missing=False, fill_missing_method="f_model", ncs_average=False, isotropize=True, sharp=False, post_processing_callback=None, pdb_hierarchy=None, # XXX required for map_type=llg merge_anomalous=None, use_shelx_weight=False, shelx_weight_parameter=1.5) : map_name_manager = mmtbx.map_names(map_name_string = map_type) # Special case #1: anomalous map if(map_name_manager.anomalous): if(self.anom_diff is not None): # Formula from page 141 in "The Bijvoet-Difference Fourier Synthesis", # Jeffrey Roach, METHODS IN ENZYMOLOGY, VOL. 374 return miller.array(miller_set = self.anom_diff, data = self.anom_diff.data()/(2j)) else: return None # Special case #2: anomalous residual map elif (map_name_manager.anomalous_residual) : if (self.anom_diff is not None) : return anomalous_residual_map_coefficients( fmodel=self.fmodel, exclude_free_r_reflections=exclude_free_r_reflections) else : return None # Special case #3: Phaser SAD LLG map elif (map_name_manager.phaser_sad_llg) : if (pdb_hierarchy is None) : raise RuntimeError("pdb_hierarchy must not be None when a Phaser SAD "+ "LLG map is requested.") if (self.anom_diff is not None) : return get_phaser_sad_llg_map_coefficients( fmodel=self.fmodel, pdb_hierarchy=pdb_hierarchy) else : return None # Special case #4: Fcalc map mnm = mmtbx.map_names(map_name_string = map_type) if(mnm.k==0 and abs(mnm.n)==1): if(fill_missing): return self.fmodel.xray_structure.structure_factors( d_min = self.fmodel.f_obs().d_min()).f_calc() else: return self.fmodel.f_obs().structure_factors_from_scatterers( xray_structure = self.fmodel.xray_structure).f_calc() # if(self.mch is None): self.mch = self.fmodel.map_calculation_helper() ffs = fo_fc_scales( fmodel = self.fmodel, map_type_str = map_type, acentrics_scale = acentrics_scale, centrics_scale = centrics_pre_scale) fo_scale, fc_scale = ffs.fo_scale, ffs.fc_scale coeffs = combine( fmodel = self.fmodel, map_type_str = map_type, fo_scale = fo_scale, fc_scale = fc_scale, map_calculation_helper = self.mch, use_shelx_weight = use_shelx_weight, shelx_weight_parameter = shelx_weight_parameter).map_coefficients() r_free_flags = None # XXX the default scale array (used for the isotropize option) needs to be # calculated and processed now to avoid array size errors scale_default = 1. / (self.fmodel.k_isotropic()*self.fmodel.k_anisotropic()) scale_array = coeffs.customized_copy(data=scale_default) if (exclude_free_r_reflections) : if (coeffs.anomalous_flag()) : coeffs = coeffs.average_bijvoet_mates() r_free_flags = self.fmodel.r_free_flags() if (r_free_flags.anomalous_flag()) : r_free_flags = r_free_flags.average_bijvoet_mates() scale_array = scale_array.average_bijvoet_mates() coeffs = coeffs.select(~r_free_flags.data()) scale_array = scale_array.select(~r_free_flags.data()) scale=None if (ncs_average) and (post_processing_callback is not None) : # XXX NCS averaging done here assert hasattr(post_processing_callback, "__call__") coeffs = post_processing_callback( map_coeffs=coeffs, fmodel=self.fmodel, map_type=map_type) if(isotropize): if(scale is None): if (scale_array.anomalous_flag()) and (not coeffs.anomalous_flag()) : scale_array = scale_array.average_bijvoet_mates() scale = scale_array.data() coeffs = coeffs.customized_copy(data = coeffs.data()*scale) if(fill_missing): if(coeffs.anomalous_flag()): coeffs = coeffs.average_bijvoet_mates() coeffs = fill_missing_f_obs( coeffs = coeffs, fmodel = self.fmodel, method = fill_missing_method) if(sharp): ss = 1./flex.pow2(coeffs.d_spacings().data()) / 4. from cctbx import adptbx b = flex.mean(self.fmodel.xray_structure.extract_u_iso_or_u_equiv() * adptbx.u_as_b(1))/2 k_sharp = 1./flex.exp(-ss * b) coeffs = coeffs.customized_copy(data = coeffs.data()*k_sharp) if (merge_anomalous) and (coeffs.anomalous_flag()) : return coeffs.average_bijvoet_mates() return coeffs
def map_coefficients(self, map_type, acentrics_scale = 2.0, centrics_pre_scale = 1.0, exclude_free_r_reflections=False, fill_missing=False, fill_missing_method="f_model", isotropize=True, sharp=False, pdb_hierarchy=None, # XXX required for map_type=llg merge_anomalous=None, use_shelx_weight=False, shelx_weight_parameter=1.5): map_name_manager = mmtbx.map_names(map_name_string = map_type) # Special case #1: anomalous map if(map_name_manager.anomalous): if(self.anom_diff is not None): # Formula from page 141 in "The Bijvoet-Difference Fourier Synthesis", # Jeffrey Roach, METHODS IN ENZYMOLOGY, VOL. 374 return miller.array(miller_set = self.anom_diff, data = self.anom_diff.data()/(2j)) else: return None # Special case #2: anomalous residual map elif (map_name_manager.anomalous_residual): if (self.anom_diff is not None): return anomalous_residual_map_coefficients( fmodel=self.fmodel, exclude_free_r_reflections=exclude_free_r_reflections) else : return None # Special case #3: Phaser SAD LLG map elif (map_name_manager.phaser_sad_llg): if (pdb_hierarchy is None): raise RuntimeError("pdb_hierarchy must not be None when a Phaser SAD "+ "LLG map is requested.") if (self.anom_diff is not None): return get_phaser_sad_llg_map_coefficients( fmodel=self.fmodel, pdb_hierarchy=pdb_hierarchy) else : return None # Special case #4: Fcalc map mnm = mmtbx.map_names(map_name_string = map_type) if(mnm.k==0 and abs(mnm.n)==1): if(fill_missing): return self.fmodel.xray_structure.structure_factors( d_min = self.fmodel.f_obs().d_min()).f_calc() else: return self.fmodel.f_obs().structure_factors_from_scatterers( xray_structure = self.fmodel.xray_structure).f_calc() # if(self.mch is None): self.mch = self.fmodel.map_calculation_helper() ffs = fo_fc_scales( fmodel = self.fmodel, map_type_str = map_type, acentrics_scale = acentrics_scale, centrics_scale = centrics_pre_scale) fo_scale, fc_scale = ffs.fo_scale, ffs.fc_scale coeffs = combine( fmodel = self.fmodel, map_type_str = map_type, fo_scale = fo_scale, fc_scale = fc_scale, map_calculation_helper = self.mch, use_shelx_weight = use_shelx_weight, shelx_weight_parameter = shelx_weight_parameter).map_coefficients() r_free_flags = None # XXX the default scale array (used for the isotropize option) needs to be # calculated and processed now to avoid array size errors scale_default = 1. / (self.fmodel.k_isotropic()*self.fmodel.k_anisotropic()) scale_array = coeffs.customized_copy(data=scale_default) if (exclude_free_r_reflections): if (coeffs.anomalous_flag()): coeffs = coeffs.average_bijvoet_mates() r_free_flags = self.fmodel.r_free_flags() if (r_free_flags.anomalous_flag()): r_free_flags = r_free_flags.average_bijvoet_mates() scale_array = scale_array.average_bijvoet_mates() coeffs = coeffs.select(~r_free_flags.data()) scale_array = scale_array.select(~r_free_flags.data()) scale=None if(isotropize): if(scale is None): if (scale_array.anomalous_flag()) and (not coeffs.anomalous_flag()): scale_array = scale_array.average_bijvoet_mates() scale = scale_array.data() coeffs = coeffs.customized_copy(data = coeffs.data()*scale) if(fill_missing): if(coeffs.anomalous_flag()): coeffs = coeffs.average_bijvoet_mates() coeffs = fill_missing_f_obs( coeffs = coeffs, fmodel = self.fmodel, method = fill_missing_method) if(sharp): ss = 1./flex.pow2(coeffs.d_spacings().data()) / 4. from cctbx import adptbx b = flex.mean(self.fmodel.xray_structure.extract_u_iso_or_u_equiv() * adptbx.u_as_b(1))/2 k_sharp = 1./flex.exp(-ss * b) coeffs = coeffs.customized_copy(data = coeffs.data()*k_sharp) if (merge_anomalous) and (coeffs.anomalous_flag()): return coeffs.average_bijvoet_mates() return coeffs
def determine_polar(self, observations_original, iparams, pickle_filename, pres=None): """ Determine polarity based on input data. The function still needs isomorphous reference so, if flag_polar is True, miller_array_iso must be supplied in input file. """ if iparams.indexing_ambiguity.flag_on == False: return "h,k,l", 0, 0 cc_asu = 0 cc_rev = 0 if iparams.indexing_ambiguity.index_basis_in is not None: if iparams.indexing_ambiguity.index_basis_in.endswith("mtz"): # use reference mtz file to determine polarity from iotbx import reflection_file_reader reflection_file_polar = reflection_file_reader.any_reflection_file( iparams.indexing_ambiguity.index_basis_in ) miller_arrays_polar = reflection_file_polar.as_miller_arrays() miller_array_polar = miller_arrays_polar[0] miller_array_polar = miller_array_polar.resolution_filter( d_min=iparams.indexing_ambiguity.d_min, d_max=iparams.indexing_ambiguity.d_max ) # for post-refinement, apply the scale factors and partiality first if pres is not None: # observations_original = pres.observations_original.deep_copy() two_theta = observations_original.two_theta(wavelength=pres.wavelength).data() from mod_leastsqr import calc_partiality_anisotropy_set alpha_angle = flex.double([0] * len(observations_original.indices())) spot_pred_x_mm = flex.double([0] * len(observations_original.indices())) spot_pred_y_mm = flex.double([0] * len(observations_original.indices())) detector_distance_mm = pres.detector_distance_mm partiality, dummy, dummy, dummy = calc_partiality_anisotropy_set( pres.unit_cell, 0, 0, observations_original.indices(), pres.ry, pres.rz, pres.r0, pres.re, two_theta, alpha_angle, pres.wavelength, pres.crystal_orientation, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence, ) # partiality = pres.partiality sin_theta_over_lambda_sq = ( observations_original.two_theta(pres.wavelength).sin_theta_over_lambda_sq().data() ) I_full = flex.double( observations_original.data() / (pres.G * flex.exp(flex.double(-2 * pres.B * sin_theta_over_lambda_sq)) * partiality) ) sigI_full = flex.double( observations_original.sigmas() / (pres.G * flex.exp(flex.double(-2 * pres.B * sin_theta_over_lambda_sq)) * partiality) ) observations_original = observations_original.customized_copy(data=I_full, sigmas=sigI_full) observations_asu = observations_original.map_to_asu() observations_rev = self.get_observations_non_polar( observations_original, iparams.indexing_ambiguity.assigned_basis ) matches = miller.match_multi_indices( miller_indices_unique=miller_array_polar.indices(), miller_indices=observations_asu.indices() ) I_ref_match = flex.double([miller_array_polar.data()[pair[0]] for pair in matches.pairs()]) I_obs_match = flex.double([observations_asu.data()[pair[1]] for pair in matches.pairs()]) cc_asu = flex.linear_correlation(I_ref_match, I_obs_match).coefficient() n_refl_asu = len(matches.pairs()) matches = miller.match_multi_indices( miller_indices_unique=miller_array_polar.indices(), miller_indices=observations_rev.indices() ) I_ref_match = flex.double([miller_array_polar.data()[pair[0]] for pair in matches.pairs()]) I_obs_match = flex.double([observations_rev.data()[pair[1]] for pair in matches.pairs()]) cc_rev = flex.linear_correlation(I_ref_match, I_obs_match).coefficient() n_refl_rev = len(matches.pairs()) polar_hkl = "h,k,l" if cc_rev > (cc_asu * 1.01): polar_hkl = iparams.indexing_ambiguity.assigned_basis else: # use basis in the given input file polar_hkl = "h,k,l" basis_pickle = pickle.load(open(iparams.indexing_ambiguity.index_basis_in, "rb")) if pickle_filename in basis_pickle: polar_hkl = basis_pickle[pickle_filename] else: # set default polar_hkl to h,k,l polar_hkl = "h,k,l" return polar_hkl, cc_asu, cc_rev
def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, html_maker=None): self.clusters = {} prefix = os.path.join(self.wdir, "cctable") assert (b_scale, use_normalized).count(True) <= 1 if len(self.arrays) < 2: print "WARNING: less than two data! can't do cc-based clustering" self.clusters[1] = [float("nan"), [0]] return # Absolute scaling using Wilson-B factor if b_scale: from mmtbx.scaling.matthews import p_vm_calculator from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w") n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess ofs_wilson.write("# guessed n_residues= %d\n" % n_residues) ofs_wilson.write("file wilsonB\n") for f in self.arrays: arr = self.arrays[f] iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0) wilson_b = iso_scale_and_b.b_wilson ofs_wilson.write("%s %.3f\n" % (f, wilson_b)) if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..? tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices()) / 4.) self.arrays[f] = arr.customized_copy(data=arr.data() * tmp, sigmas=arr.sigmas() * tmp) ofs_wilson.close() elif use_normalized: from mmtbx.scaling.absolute_scaling import kernel_normalisation for f in self.arrays: arr = self.arrays[f] normaliser = kernel_normalisation(arr, auto_kernel=True) self.arrays[f] = arr.customized_copy( data=arr.data() / normaliser.normalizer_for_miller_array, sigmas=arr.sigmas() / normaliser.normalizer_for_miller_array) # Prep args = [] for i in xrange(len(self.arrays) - 1): for j in xrange(i + 1, len(self.arrays)): args.append((i, j)) # Calc all CC if self.use_sfdist: worker = lambda x: calc_sfdist(self.arrays.values()[x[0]], self.arrays.values()[x[1]]) else: worker = lambda x: calc_cc(self.arrays.values()[x[0]], self.arrays.values()[x[1]]) results = easy_mp.pool_map(fixed_func=worker, args=args, processes=nproc) # Check NaN and decide which data to remove idx_bad = {} nans = [] cc_data_for_html = [] for (i, j), (cc, nref) in zip(args, results): cc_data_for_html.append((i, j, cc, nref)) if cc == cc: continue idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 nans.append([i, j]) if html_maker is not None: html_maker.add_cc_clustering_details(cc_data_for_html) idx_bad = idx_bad.items() idx_bad.sort(key=lambda x: x[1]) remove_idxes = set() for idx, badcount in reversed(idx_bad): if len(filter(lambda x: idx in x, nans)) == 0: continue remove_idxes.add(idx) nans = filter(lambda x: idx not in x, nans) if len(nans) == 0: break use_idxes = filter(lambda x: x not in remove_idxes, xrange(len(self.arrays))) # Make table: original index (in file list) -> new index (in matrix) count = 0 org2now = collections.OrderedDict() for i in xrange(len(self.arrays)): if i in remove_idxes: continue org2now[i] = count count += 1 if len(remove_idxes) > 0: open("%s_notused.lst" % prefix, "w").write("\n".join( map(lambda x: self.arrays.keys()[x], remove_idxes))) # Make matrix mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes))) for (i, j), (cc, nref) in zip(args, results): if i in remove_idxes or j in remove_idxes: continue mat[org2now[j], org2now[i]] = cc open("%s.matrix" % prefix, "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten()))) ofs = open("%s.dat" % prefix, "w") ofs.write(" i j cc nref\n") for (i, j), (cc, nref) in zip(args, results): ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref)) open("%s_ana.R" % prefix, "w").write("""\ treeToList2 <- function(htree) { # stolen from $CCP4/share/blend/R/blend0.R groups <- list() itree <- dim(htree$merge)[1] for (i in 1:itree) { il <- htree$merge[i,1] ir <- htree$merge[i,2] if (il < 0) lab1 <- htree$labels[-il] if (ir < 0) lab2 <- htree$labels[-ir] if (il > 0) lab1 <- groups[[il]] if (ir > 0) lab2 <- groups[[ir]] lab <- c(lab1,lab2) lab <- as.integer(lab) groups <- c(groups,list(lab)) } return(groups) } cc<-scan("%(prefix)s.matrix") md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE) hc <- hclust(as.dist(md),method="ward") pdf("tree.pdf") plot(hc) dev.off() png("tree.png",height=1000,width=1000) plot(hc) dev.off() hc$labels <- c(%(hclabels)s) groups <- treeToList2(hc) cat("ClNumber Nds Clheight IDs\\n",file="./CLUSTERS.txt") for (i in 1:length(groups)) { sorted_groups <- sort(groups[[i]]) linea <- sprintf("%%04d %%4d %%7.3f %%s\\n", i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" ")) cat(linea, file="./CLUSTERS.txt", append=TRUE) } # reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/ library(rjson) HCtoJSON<-function(hc){ labels<-hc$labels merge<-data.frame(hc$merge) for (i in (1:nrow(merge))) { if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))} else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))} else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))} else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))} } eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")"))) return(JSON) } JSON<-HCtoJSON(hc) cat(JSON, file="dendro.json") q(save="yes") """ % dict(prefix=os.path.basename(prefix), ncol=len(self.arrays), hclabels=",".join(map(lambda x: "%d" % (x + 1), org2now.keys())))) call(cmd="Rscript", arg="%s_ana.R" % os.path.basename(prefix), wdir=self.wdir) output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines() for l in output[1:]: sp = l.split() clid, clheight, ids = sp[0], sp[2], sp[3:] self.clusters[int(clid)] = [float(clheight), map(int, ids)]