def set_fstats(self, fstats): self.all_spots = None self.spots = collections.OrderedDict() self.total_integrated_signal = {} self.mean_integrated_signal= {} self.median_integrated_signal= {} self.n_spots = {} for k in sorted(fstats.nodes.keys()): node = fstats.nodes[k] # XXX some data in node.data will be corrupted (e.g. resolution, wts) but x and y coordinates look ok (it works later). why? if node.descriptor == "spots_total": self.all_spots = node.data else: self.spots[node.descriptor] = node.data # Pre-calculate stats for k in self.keys(): summed_wts = [flex.sum(spot.wts) for spot in self.get_spots(k)] self.intensities[k] = summed_wts self.resolutions[k] = [spot.resolution for spot in self.get_spots(k)] total_summed = flex.sum(flex.double(summed_wts)) if len(summed_wts) > 0: self.mean_integrated_signal[k] = total_summed / len(summed_wts) self.median_integrated_signal[k] = flex.median(flex.double(summed_wts)) else: self.mean_integrated_signal[k] = 0. self.median_integrated_signal[k] = 0. self.total_integrated_signal[k] = total_summed self.n_spots[k] = len(summed_wts)
def dump_R_in_bins(obs, calc, scale_B=True, log_out=sys.stdout, n_bins=20): #obs, calc = obs.common_sets(calc, assert_is_similar_symmetry=False) if scale_B: scale, B = kBdecider(obs, calc).run() d_star_sq = calc.d_star_sq().data() calc = calc.customized_copy(data = scale * flex.exp(-B*d_star_sq) * calc.data()) binner = obs.setup_binner(n_bins=n_bins) count=0 log_out.write("dmax - dmin: R (nref) <I1> <I2> scale\n") for i_bin in binner.range_used(): tmp_obs = obs.select(binner.bin_indices() == i_bin) tmp_calc = calc.select(binner.bin_indices() == i_bin) low = binner.bin_d_range(i_bin)[0] high = binner.bin_d_range(i_bin)[1] if scale_B: scale = 1. else: scale = flex.sum(tmp_obs.data()*tmp_calc.data()) / flex.sum(flex.pow2(tmp_calc.data())) R = flex.sum(flex.abs(tmp_obs.data() - scale*tmp_calc.data())) / flex.sum(0.5 * tmp_obs.data() + 0.5 * scale*tmp_calc.data()) log_out.write("%5.2f - %5.2f: %.5f (%d) %.1f %.1f %.3e\n" % (low, high, R, len(tmp_obs.data()), flex.mean(tmp_obs.data()), flex.mean(tmp_calc.data()), scale)) log_out.write("Overall R = %.5f (scale=%.3e, %%comp=%.3f)\n\n" % (calc_R(obs, calc, do_scale=not scale_B) + (obs.completeness()*100.,)) )
def scale_data(indices, iobs, scale_ref, parameter, calc_cc): k, b, cc = 1, float("nan"), float("nan") sortp = yamtbx_utils_ext.sort_permutation_fast_less(indices) indices = indices.select(sortp) iobs = iobs.select(sortp) sel0, sel1 = yamtbx_utils_ext.my_common_indices(scale_ref.indices(), indices) #indices = indices.select(sel1) iobs_c = iobs.select(sel1) ref_c = scale_ref.data().select(sel0) if iobs_c.size() < 10 and ref_c.size() < 10: return k, b, cc if parameter == "k": k = flex.sum(ref_c*iobs_c) / flex.sum(flex.pow2(iobs_c)) elif parameter == "kb": from yamtbx.dataproc.scale_data import kBdecider kbd = kBdecider(scale_ref, miller.array(scale_ref.customized_copy(indices=indices),data=iobs)) k, b = kbd.run() else: raise "Never reaches here" if calc_cc: corr = flex.linear_correlation(ref_c, iobs_c) if corr.is_well_defined(): cc = corr.coefficient() return k, b, cc
def df(self, x): k, B = float(x[0]), float(x[1]) d_star_sq = self.calc.d_star_sq().data() tmp = self.obs.data() - k * flex.exp(-B*d_star_sq) * self.calc.data() dfdk = flex.sum(-2. * tmp * flex.exp(-B*d_star_sq) * self.calc.data()) dfdB = flex.sum(2. * tmp * k * d_star_sq * flex.exp(-B*d_star_sq) * self.calc.data()) return numpy.array([dfdk, dfdB])
def calc_k(f_obs, i_calc): fc = flex.sqrt(i_calc) num = flex.sum(f_obs * fc) den = flex.sum(fc * fc) assert den != 0 k = num / den return k
def method2_include_pehHKL_I_explicitly(): # There's no reason why we can't get the Gi's by analytical least squares skeys = list(G.images_strong.keys()) skeys.sort() for key in skeys: print("image", key) numerator = 0. denominator = 0. nkeys = len(G.images_strong[key]) for ikey, HKL in enumerate(G.images_strong[key]): MD = G.images_strong[key][HKL] #from IPython import embed; embed() terms1 = MD["model"] * per_HKL_I[HKL] / MD["simtbx_intensity"] terms2 = terms1 * terms1 terms0 = MD["obs"] * terms1 numerator += flex.sum(terms0) denominator += flex.sum(terms2) G.images_Gi[key] = numerator / denominator for ikey, HKL in enumerate(G.images_strong[key]): plt.subplot(nkeys, 1, 1 + ikey) MD = G.images_strong[key][HKL] assert len(MD["obs"]) == 61 print(HKL, MD, "7122 lookup", per_HKL_I_7122[HKL], per_HKL_I_7122[HKL] / MD["simtbx_intensity"]) plt.plot(range(7090, 7151), (G.images_Gi[key]) * MD["model"] * per_HKL_I[HKL] / MD["simtbx_intensity"], "b-") plt.plot(range(7090, 7151), MD["obs"], "r-") #start here. can we shwo we are actually at a minimum of teh target function, considering it is LSQ? plt.show() if key % 100 == 0: print(key, "Gi:", G.images_Gi[key])
def r_value(self, out): top = flex.abs(self.der_primset.data() - self.nat_primset.data()) bottom = flex.abs(self.der_primset.data() + self.nat_primset.data()) / 2.0 top = flex.sum(top) bottom = flex.sum(bottom) print >> out, "Current R value: %4.3f" % (top / bottom)
def method2_include_pehHKL_I_explicitly(): # There's no reason why we can't get the Gi's by analytical least squares skeys = list(G.images_strong.keys()) skeys.sort() for key in skeys: print ("image",key) numerator = 0.; denominator = 0. nkeys = len(G.images_strong[key]) for ikey, HKL in enumerate(G.images_strong[key]): #plt.subplot(nkeys,1,1+ikey) MD = G.images_strong[key][HKL] #print (HKL,MD,"7122 lookup",per_HKL_I_7122[HKL],per_HKL_I_7122[HKL]/MD["simtbx_intensity"]) #plt.plot(range(7090,7151),MD["model"] * per_HKL_I[HKL] / MD["simtbx_intensity"],"k-") #plt.plot(range(7090,7151),1E10*MD["obs"],"r-") terms1 = MD["model"] * per_HKL_I[HKL] / MD["simtbx_intensity"] terms2 = terms1 * terms1 terms0 = MD["obs"] * terms1 numerator+=flex.sum(terms0) denominator+=flex.sum(terms2) G.images_Gi[key]=numerator/denominator for ikey, HKL in enumerate(G.images_strong[key]): plt.subplot(nkeys,1,1+ikey) MD = G.images_strong[key][HKL] print (HKL,MD,"7122 lookup",per_HKL_I_7122[HKL],per_HKL_I_7122[HKL]/MD["simtbx_intensity"]) plt.plot(range(7090,7151),(G.images_Gi[key]) * MD["model"] * per_HKL_I[HKL] / MD["simtbx_intensity"],"b-") plt.plot(range(7090,7151),MD["obs"],"r-") plt.show() if key%100==0: print (key, "Gi:", G.images_Gi[key])
def exercise_01(grid_step = 0.03, d_min = 1.0, wing_cutoff = 1.e-9): xrs = random_structure.xray_structure( space_group_info = sgtbx.space_group_info("P 1"), elements = ["O","N","C","P","S","U","AU"]*1, random_u_iso = True, general_positions_only = False) # avoid excessive_range_error_limit crash bs = xrs.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1) sel = bs < 1 bs = bs.set_selected(sel, 1) xrs.set_b_iso(values = bs) # p = xrs.unit_cell().parameters() timer = user_plus_sys_time() res = manager(nx = int(p[0]/grid_step), ny = int(p[1]/grid_step), nz = int(p[2]/grid_step), scattering_type_registry = xrs.scattering_type_registry(), unit_cell = xrs.unit_cell(), scatterers = xrs.scatterers(), wing_cutoff = wing_cutoff) print "time: %10.4f" % (timer.elapsed()) f_calc_dir = xrs.structure_factors( d_min = d_min, algorithm = "direct").f_calc() # f_calc_den = f_calc_dir.structure_factors_from_map(map = res.density_array, use_scale = True) f1 = flex.abs(f_calc_dir.data()) f2 = flex.abs(f_calc_den.data()) r = flex.sum(flex.abs(f1-f2))/flex.sum(f2) print "r-factor:", r assert r < 1.e-4, r
def r_value(self,out): top = flex.abs(self.der_primset.data()- self.nat_primset.data()) bottom = flex.abs(self.der_primset.data() + self.nat_primset.data())/2.0 top=flex.sum(top) bottom=flex.sum(bottom) print >> out, "Current R value: %4.3f"%(top/bottom)
def linear_fit2(x,y,s): # now constant subtraction, original crysol approach var = s*s # x Ical; y Iexp; s sigma sum_x2 = flex.sum( x*x/var ) sum_xy = flex.sum( x*y/var ) N = x.size() scale = sum_xy/sum_x2 offset = 0 return scale, offset
def exercise_sampled_model_density_1(): import iotbx.pdb pdb_str1 = """ CRYST1 10.000 10.000 10.000 90.00 90.00 90.00 P 1 ATOM 1 CB PHE A 1 5.000 5.000 5.000 1.00 15.00 C ANISOU 1 CB PHE A 1 900 2900 100 0 0 0 C TER END """ pdb_str2 = """ CRYST1 10.000 10.000 10.000 90.00 90.00 90.00 P 1 ATOM 1 CB PHE A 1 5.000 5.000 5.000 1.00 15.00 C TER END """ # for pdb_str in [pdb_str1, pdb_str2]: print pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_str) xrs = pdb_inp.xray_structure_simple() # crystal_gridding = maptbx.crystal_gridding( unit_cell=xrs.unit_cell(), space_group_info=xrs.space_group_info(), symmetry_flags=maptbx.use_space_group_symmetry, step=0.1) m = mmtbx.real_space.sampled_model_density( xray_structure=xrs, n_real=crystal_gridding.n_real()).data() # max_index = [(i - 1) // 2 for i in crystal_gridding.n_real()] complete_set = miller.build_set( crystal_symmetry=xrs.crystal_symmetry(), anomalous_flag=False, max_index=max_index) indices = complete_set.indices() indices.append((0, 0, 0)) # complete_set = complete_set.customized_copy(indices=indices) f_obs_cmpl = complete_set.structure_factors_from_map( map=m, use_scale=True, anomalous_flag=False, use_sg=False) fc = complete_set.structure_factors_from_scatterers( xray_structure=xrs).f_calc() # f1 = abs(fc).data() f2 = abs(f_obs_cmpl).data() r = 200 * flex.sum(flex.abs(f1 - f2)) / flex.sum(f1 + f2) assert r < 0.5 print r # fft_map = miller.fft_map(crystal_gridding=crystal_gridding, fourier_coefficients=f_obs_cmpl) fft_map.apply_volume_scaling() m_ = fft_map.real_map_unpadded() print m.as_1d().min_max_mean().as_tuple() print m_.as_1d().min_max_mean().as_tuple() assert approx_equal(m.as_1d().min_max_mean().as_tuple(), m_.as_1d().min_max_mean().as_tuple(), 1.e-3) # Must be smaller!?
def exercise_py_LS(obs, f_calc, weighting, verbose): weighting.computing_derivatives_wrt_f_c = True r = xray.unified_least_squares_residual(obs, weighting=weighting) rt = r(f_calc, compute_derivatives=True) if obs.is_xray_amplitude_array(): assert(isinstance(rt, xray.targets_least_squares_residual)) elif obs.is_xray_intensity_array(): assert(isinstance(rt, xray.targets_least_squares_residual_for_intensity)) scale_factor = rt.scale_factor() gr_ana = rt.derivatives() K = scale_factor w = weighting.weights if w is not None: w = w.deep_copy() dw_dfc = weighting.derivatives_wrt_f_c if dw_dfc is not None: dw_dfc = dw_dfc.deep_copy() y_o = obs.data() if w is None: w = flex.double(obs.size(), 1) sum_w_y_o_sqr = flex.sum(w * y_o * y_o) f_c = f_calc.data().deep_copy() if obs.is_xray_amplitude_array(): y_c = flex.abs(f_c) der = f_c * (1/y_c) elif obs.is_xray_intensity_array(): y_c = flex.norm(f_c) der = 2 * f_c gr_explicit = w*2*K*(K*y_c - y_o) * der / sum_w_y_o_sqr sum_w_squares = flex.sum(w*flex.pow2(K*y_c - y_o)) assert approx_equal(gr_ana, gr_explicit) gr_fin = flex.complex_double() eps = 1.e-6 for i_refl in xrange(obs.size()): gc = [] for i_part in [0,1]: fc0 = f_calc.data()[i_refl] ts = [] for signed_eps in [eps,-eps]: if (i_part == 0): f_calc.data()[i_refl] = complex(fc0.real + signed_eps, fc0.imag) else: f_calc.data()[i_refl] = complex(fc0.real, fc0.imag + signed_eps) rt = r(f_calc, compute_derivatives=False, scale_factor=scale_factor) ts.append(rt.target()) f_calc.data()[i_refl] = fc0 gc.append((ts[0]-ts[1])/(2*eps)) gr_fin.append(complex(*gc)) if (verbose): print "ana:", list(gr_ana) print "fin:", list(gr_fin) if dw_dfc is None: assert approx_equal(gr_fin, gr_ana) else: gr_total_ana = ( gr_ana + dw_dfc*(flex.pow2(K*y_c - y_o)/sum_w_y_o_sqr - sum_w_squares*flex.pow2(y_o)/sum_w_y_o_sqr**2) ) assert approx_equal(gr_fin, gr_total_ana)
def _rmsds_core(self, reflections): """calculate unweighted RMSDs""" resid_x = flex.sum(reflections["x_resid2"]) resid_y = flex.sum(reflections["y_resid2"]) resid_z = flex.sum(reflections["delpsical2"]) n = len(reflections) rmsds = (sqrt(resid_x / n), sqrt(resid_y / n), sqrt(resid_z / n)) return rmsds
def get_r_split(self): try: r_split_bin = (1 / math.sqrt(2)) * ( flex.sum(flex.abs(self.I_even - self.I_odd)) / (flex.sum(self.I_even + self.I_odd) * 0.5)) except Exception, e: print "Warning: R_split calculation failed." print e r_split_bin = 0
def poly_residual(xp, y, params): """Compute the residual between the observations y[i] and sum_j params[j] x[i]^j. For efficiency, x[i]^j are pre-calculated in xp.""" c = len(y) e = flex.double([flex.sum(xp[j] * params) for j in range(c)]) return flex.sum(flex.pow2(y - e))
def algorithm_3(i_obs, fc, f_masks): """ Unphased two-step search """ F = [fc] + f_masks Gnm = [] cs = {} cntr = 0 nm = [] # Compute and store Gnm for n, Fn in enumerate(F): for m, Fm in enumerate(F): if m < n: continue Gnm.append(flex.real(Fn.data() * flex.conj(Fm.data()))) cs[(n, m)] = cntr cntr += 1 nm.append((n, m)) # Keep track of indices for "upper triangular matrix vs full" for k, v in zip(list(cs.keys()), list(cs.values())): i, j = k if i == j: continue else: cs[(j, i)] = v # Generate and solve system Ax=b, x = A_1*b A = [] b = [] for u, Gnm_u in enumerate(Gnm): for v, Gnm_v in enumerate(Gnm): scale = 2 n, m = nm[v] if n == m: scale = 1 A.append(flex.sum(Gnm_u * Gnm_v) * scale) b.append(flex.sum(Gnm_u * i_obs.data())) A = matrix.sqr(A) A_1 = A.inverse() b = matrix.col(b) x = A_1 * b # Expand Xmn from solution x Xmn = [] for n, Fn in enumerate(F): rows = [] for m, Fm in enumerate(F): x_ = x[cs[(n, m)]] rows.append(x_) Xmn.append(rows) # Do formula (19) lnK = [] for j, Fj in enumerate(F): t1 = flex.sum(flex.log(flex.double(Xmn[j]))) t2 = 0 for n, Fn in enumerate(F): for m, Fm in enumerate(F): t2 += math.log(Xmn[n][m]) t2 = t2 / (2 * len(F)) lnK.append(1 / len(F) * (t1 - t2)) return [math.exp(x) for x in lnK]
def calc_R(obs, calc, do_scale=True): #obs, calc = obs.common_sets(calc, assert_is_similar_symmetry=False) if do_scale: scale = flex.sum(obs.data()*calc.data()) / flex.sum(flex.pow2(calc.data())) else: scale = 1. R = flex.sum(flex.abs(obs.data() - scale*calc.data())) / flex.sum(0.5 * obs.data() + 0.5 * scale*calc.data()) return R, scale
def get_r_split(self): try: r_split_bin = (1 / math.sqrt(2)) * ( flex.sum(flex.abs(self.I_even - self.I_odd)) / (flex.sum(self.I_even + self.I_odd) * 0.5)) except Exception as e: print("Warning: R_split calculation failed.") print(e) r_split_bin = 0 return r_split_bin if self.get_size() else 0
def _rmsds_core(self, reflections): """calculate unweighted RMSDs""" resid_x = flex.sum(reflections['x_resid2']) resid_y = flex.sum(reflections['y_resid2']) resid_z = flex.sum(reflections['delpsical2']) n = len(reflections) rmsds = (sqrt(resid_x / n), sqrt(resid_y / n), sqrt(resid_z / n)) return rmsds
def find_similar_matches(self, target, matches, used, overlap_thres): tmp_a = flex.double(self.set_a.size(), 0.0).set_selected(target[0], 1.0) result = flex.size_t() for ii in xrange(len(matches)): if not used[ii]: match = matches[ii][0] tmp_b = flex.double(self.set_a.size(), 0.0).set_selected(match, 1.0) similar = flex.sum(tmp_a * tmp_b) / flex.sum(tmp_a) if similar > overlap_thres: result.append(ii) return result
def find_similar_matches( self, target, matches, used, overlap_thres ): tmp_a = flex.double( self.set_a.size() , 0.0 ).set_selected( target[0], 1.0 ) result = flex.size_t() for ii in range(len(matches) ): if not used[ii]: match = matches[ii][0] tmp_b = flex.double( self.set_a.size(), 0.0 ).set_selected( match, 1.0 ) similar = flex.sum( tmp_a*tmp_b )/flex.sum( tmp_a ) if similar > overlap_thres: result.append( ii ) return( result )
def _scale_helper(num, den, selection=None, num_num=False): from cctbx.array_family import flex if (selection is not None): num = num.select(selection) den = den.select(selection) if (den.size() == 0): raise RuntimeError("No data for scale calculation.") denom = flex.sum(den*den) if (denom == 0): raise RuntimeError("Zero denominator in scale calculation.") if (num_num): return flex.sum(num*num) / denom return flex.sum(num*den) / denom
def poly_residual(xp, y, params): '''Compute the residual between the observations y[i] and sum_j params[j] x[i]^j. For efficiency, x[i]^j are pre-calculated in xp.''' r = 0.0 n = len(params) c = len(y) e = flex.double([flex.sum(xp[j] * params) for j in range(c)]) return flex.sum(flex.pow2(y - e))
def _scale_helper(num, den, selection=None, num_num=False): from cctbx.array_family import flex if (selection is not None): num = num.select(selection) den = den.select(selection) if (den.size() == 0): raise RuntimeError("No data for scale calculation.") denom = flex.sum(den * den) if (denom == 0): raise RuntimeError("Zero denominator in scale calculation.") if (num_num): return flex.sum(num * num) / denom return flex.sum(num * den) / denom
def algorithm_4(f_obs, F, phase_source, max_cycles=100, auto_converge_eps=1.e-7, use_cpp=True): """ Phased simultaneous search (alg4) """ fc, f_masks = F[0], F[1:] fc = fc.deep_copy() F = [fc] + F[1:] # C++ version if (use_cpp): return mosaic_ext.alg4([f.data() for f in F], f_obs.data(), phase_source.data(), max_cycles, auto_converge_eps) # Python version (1.2-3 times slower, but much more readable!) cntr = 0 x_prev = None while True: f_obs_cmpl = f_obs.phase_transfer(phase_source=phase_source) A = [] b = [] for j, Fj in enumerate(F): A_rows = [] for n, Fn in enumerate(F): Gjn = flex.real(Fj.data() * flex.conj(Fn.data())) A_rows.append(flex.sum(Gjn)) Hj = flex.real(Fj.data() * flex.conj(f_obs_cmpl.data())) b.append(flex.sum(Hj)) A.extend(A_rows) A = matrix.sqr(A) A_1 = A.inverse() b = matrix.col(b) x = A_1 * b # fc_d = flex.complex_double(phase_source.indices().size(), 0) for i, f in enumerate(F): fc_d += f.data() * x[i] phase_source = phase_source.customized_copy(data=fc_d) x_ = x[:] # cntr += 1 if (cntr > max_cycles): break if (x_prev is None): x_prev = x_[:] else: max_diff = flex.max( flex.abs(flex.double(x_prev) - flex.double(x_))) if (max_diff <= auto_converge_eps): break x_prev = x_[:] return x_
def aniso_ratio_p_value(self,rat): return -3 coefs = flex.double( [-1.7647171873040273, -3.4427008004789115, -1.097150249786379, 0.17303317520973829, 0.35955513268118661, 0.066276397961476205, -0.064575726062529232, -0.0063025873711609016, 0.0749945566688624, 0.14803702885155121, 0.154284467861286]) fit_e = scitbx.math.chebyshev_polynome(11,0,1.0,coefs) x = flex.double( range(1000) )/999.0 start = int(rat*1000) norma = flex.sum(flex.exp(fit_e.f(x)))/x[1] x = x*(1-rat)+rat norma2 = flex.sum(flex.exp(fit_e.f(x)))/(x[1]-x[0]) return -math.log(norma2/norma )
def linear_fit(x,y,s): # Standard least square fitting var = s*s sum_x2 = flex.sum( x*x/var ) #sum_y2 = flex.sum( y*y/var ) sum_xy = flex.sum( x*y/var ) sum_x = flex.sum( x / var ) sum_y = flex.sum( y / var ) N = x.size() sum_inv_var = flex.sum(1.0/var) det = sum_inv_var * sum_x2 - sum_x * sum_x scale = (sum_inv_var * sum_xy - sum_x*sum_y ) / det offset = (sum_x2*sum_y - sum_x * sum_xy) /det return scale, offset
def f_obs(self): fo2 = self.fo2.as_intensity_array() f_obs = fo2.as_amplitude_array() if self.use_set_completion: if self._f_mask is not None: f_model = self.f_model() else: f_model = self.f_calc data_substitute = flex.abs(f_model.data()) scale_factor = flex.sum(f_obs.data()) / flex.sum(f_model.common_set(f_obs).as_amplitude_array().data()) f_obs = f_obs.matching_set( other=self.complete_set, data_substitute=scale_factor * flex.abs(f_model.data()), sigmas_substitute=0 ) return f_obs
def exercise(prefix="tst_helix_sheet_recs_as_pdb_files"): of = open(prefix+".pdb", "w") print >> of, pdb_str of.close() xrs1 = iotbx.pdb.input(file_name=prefix+".pdb").xray_structure_simple() easy_run.call("phenix.helix_sheet_recs_as_pdb_files %s"%(prefix+".pdb")) xrs2 = iotbx.pdb.input( file_name="HELIX_1_1_ALA_E_1_ALA_E_16_1_16.pdb").xray_structure_simple(crystal_symmetry=xrs1.crystal_symmetry()) fc1 = xrs1.structure_factors(d_min=3).f_calc() fc2 = fc1.structure_factors_from_scatterers( xray_structure=xrs2).f_calc() fc1=flex.abs(abs(fc1).data()) fc2=flex.abs(abs(fc2).data()) assert flex.sum(flex.abs(fc1-fc2))/flex.sum(flex.abs(fc1+fc2)) < 1.e-3
def method2_include_pehHKL_I_explicitly(): # There's no reason why we can't get the Gi's by analytical least squares for key in G.images_strong: numerator = 0.; denominator = 0. nkeys = len(G.images_strong[key]) for ikey, HKL in enumerate(G.images_strong[key]): MD = G.images_strong[key][HKL] terms1 = G.images_strong[key][HKL]["model"] # it's already in the model: * per_HKL_I[HKL] terms2 = terms1 * terms1 terms0 = G.images_strong[key][HKL]["obs"] * terms1 numerator+=flex.sum(terms0) denominator+=flex.sum(terms2) G.images_Gi[key]=numerator/denominator if key%100==0: print (key, "Gi:", G.images_Gi[key])
def __init__(self, f_obs, r_free_flags, xray_structure, f_calc, target_memory): self.f_obs = f_obs self.r_free_flags = r_free_flags self.xray_structure = xray_structure self.f_calc = f_calc if (target_memory is None): # XXX could be more elegant! den = self.f_obs.data() num = flex.abs(self.f_calc.data()) denom = flex.sum(num*den) numerator = flex.sum(den*den) if (denom == 0): raise RuntimeError("Zero denominator in scale calculation.") previous_overall_scaleK = numerator/denom previous_overall_scaleU = 0. previous_variances = None adaptor = phaser.phenix_adaptors.sad_target.data_adaptor( f_obs=f_obs, r_free_flags=r_free_flags, verbose=True) self.refine_sad_object = adaptor.target( xray_structure=xray_structure, previous_overall_scaleK=previous_overall_scaleK, previous_overall_scaleU=previous_overall_scaleU, previous_variances=previous_variances) self.refine_sad_object.set_f_calc(f_calc=f_calc) target_memory = self.target_memory() assert len(target_memory) == 4 assert target_memory[0] == "ml_sad" previous_overall_scaleK = target_memory[1] previous_overall_scaleU = target_memory[2] previous_variances = target_memory[3] adaptor = phaser.phenix_adaptors.sad_target.data_adaptor( f_obs=f_obs, r_free_flags=r_free_flags, verbose=True) self.refine_sad_object = adaptor.target( xray_structure=xray_structure, previous_overall_scaleK=previous_overall_scaleK, previous_overall_scaleU=previous_overall_scaleU, previous_variances=previous_variances) self.refine_sad_object.set_f_calc(f_calc=f_calc) self.refine_sad_object.reject_outliers()
def update_target_and_grads(self, x): self.x = x s = 1 #180/math.pi i_model = flex.double(self.i_obs.data().size(), 0) for n, kn in enumerate(self.x): for m, km in enumerate(self.x): tmp = self.F[n].data() * flex.conj(self.F[m].data()) i_model += kn * km * flex.real(tmp) #pn = self.F[n].phases().data()*s #pm = self.F[m].phases().data()*s #Fn = flex.abs(self.F[n].data()) #Fm = flex.abs(self.F[m].data()) #i_model += kn*km*Fn*Fm*flex.cos(pn-pm) diff = i_model - self.i_obs.data() t = flex.sum(diff * diff) / 4 # g = flex.double() for j in range(len(self.F)): tmp = flex.double(self.i_obs.data().size(), 0) for m, km in enumerate(self.x): tmp += km * flex.real( self.F[j].data() * flex.conj(self.F[m].data())) #pj = self.F[j].phases().data()*s #pm = self.F[m].phases().data()*s #Fj = flex.abs(self.F[j].data()) #Fm = flex.abs(self.F[m].data()) #tmp += km * Fj*Fm*flex.cos(pj-pm) g.append(flex.sum(diff * tmp)) self.t = t self.g = g # if self.use_curvatures: d = flex.double() for j in range(len(self.F)): tmp1 = flex.double(self.i_obs.data().size(), 0) tmp2 = flex.double(self.i_obs.data().size(), 0) for m, km in enumerate(self.x): zz = flex.real(self.F[j].data() * flex.conj(self.F[m].data())) tmp1 += km * zz tmp2 += zz #pj = self.F[j].phases().data()*s #pm = self.F[m].phases().data()*s #Fj = flex.abs(self.F[j].data()) #Fm = flex.abs(self.F[m].data()) #tmp += km * Fj*Fm*flex.cos(pj-pm) d.append(flex.sum(tmp1 * tmp1 + tmp2)) self.d = d
def show_overall_observations(obs, redundancy, I, I_SIGI, out=None): if out == None: import sys out = sys.stdout from libtbx.str_utils import format_value obs.setup_binner(n_bins=15) result = [] for i_bin in obs.binner().range_used(): sel_w = obs.binner().selection(i_bin) sel_fo_all = obs.select(sel_w) d_max_, d_min_ = sel_fo_all.d_max_min() d_range = obs.binner().bin_legend(i_bin=i_bin, show_bin_number=False, show_counts=False) sel_redundancy = redundancy.select(sel_w) sel_absent = sel_redundancy.count(0) sel_complete_tag = "[%d/%d]" % (sel_redundancy.size() - sel_absent, sel_redundancy.size()) sel_measurements = flex.sum(sel_redundancy) sel_data = I.select(sel_w) sel_sig = I_SIGI.select(sel_w) if (sel_data.size() > 0 and sel_measurements > 0): bin = resolution_bin( i_bin=i_bin, d_range=d_range, redundancy=flex.mean(sel_redundancy.as_double()), complete_tag=sel_complete_tag, measurements=sel_measurements, mean_I=flex.sum(sel_data) / sel_measurements, mean_I_sigI=flex.sum(sel_sig) / sel_measurements, ) result.append(bin) print( "\n Bin Resolution Range Compl. <Redundancy> #Measurements <I> <I/sig(I)>", file=out) for bin in result: fmt = " %s %s %s %s %s %s %s" print(fmt % ( format_value("%3d", bin.i_bin), format_value("%-13s", bin.d_range), format_value("%13s", bin.complete_tag), format_value("%4.0f", bin.redundancy), format_value("%8d", bin.measurements), format_value("%8.1f", bin.mean_I), format_value("%8.1f", bin.mean_I_sigI), ), file=out)
def log_p_obs_given_gamma(self, gamma): dof = self.degrees_of_freedom x_gamma = (gamma * self.delta_fc2.data() - self.delta_fo2.data()) \ / self.delta_fo2.sigmas() if self.probability_plot_slope is not None: x_gamma /= self.probability_plot_slope return -(1+dof)/2 * flex.sum(flex.log(flex.pow2(x_gamma) + dof))
def map_stat(distances, map_values): result = [] # n_points_max = -1 nn=20 x = [[i/100,i/100+nn/100.] for i in range(0,800, nn)] for x_ in x: l,r = x_ sel = distances >= l sel &= distances < r mv = map_values.select(sel) if(mv.size()>n_points_max): n_points_max = mv.size() # for x_ in x: l,r = x_ sel = distances >= l sel &= distances < r mv = map_values.select(sel) if(mv.size()>0): sz = mv.size() rms = math.sqrt( flex.sum(mv*mv)/sz ) #fr = sz*100./map_values.size() fr = sz*1./n_points_max result.append([l, r, flex.mean(mv), rms, sz, fr]) return result
def compute_functional(self): top = self.diff1 - self.diff2*self.x[0] top= top*top bottom = self.v1+self.v2*self.x[0]*self.x[0] result = top/bottom result=flex.sum(result) return result
def compute_chi_sq(fo_sq, fc_sq, a,b): weighting.a = a weighting.b = b weights = weighting( fo_sq.data(), fo_sq.sigmas(), fc_sq.data(), scale_factor) return (flex.sum( weights * flex.pow2(fo_sq.data() - scale_factor * fc_sq.data())))
def set_refinable_parameters(xray_structure, parameters, selections, enforce_positivity=False): # XXX PVA: Code below is terribly inefficient and MUST be moved into C++ sz = xray_structure.scatterers().size() i = 0 for sel in selections: # pre-check for positivity begin # spread negative occupancies across i_seqs having positive ones par_all = flex.double() par_neg = flex.double() i_p = i for sel_ in sel: p = parameters[i_p] par_all.append(p) if(p<0): par_neg.append(p) i_p += 1 if(enforce_positivity and par_neg.size()>0): par_all = par_all - flex.min(par_all) fs = flex.sum(par_all) if(fs != 0): par_all = par_all / fs # pre-check for positivity end for j, sel_ in enumerate(sel): sel__b = flex.bool(sz, flex.size_t(sel_)) xray_structure.set_occupancies(par_all[j], sel__b) i+=1
def compute_functional_and_gradients(self): coord_x = self.x[0:self.NN] coord_y = self.x[self.NN:2*self.NN] inner = self.rij_matrix - coord_x.matrix_outer_product(coord_x) - coord_y.matrix_outer_product(coord_y) elements = self.wij_matrix*inner*inner f = 0.5 * flex.sum(elements) # quick gradients wrij_matrix = self.wij_matrix * self.rij_matrix term_1 = wrij_matrix.matrix_multiply(coord_x).concatenate(wrij_matrix.matrix_multiply(coord_y)) temp_2 = self.wij_matrix * (coord_x.matrix_outer_product(coord_x)) term_2x = (temp_2).matrix_multiply(coord_x) term_2y = (temp_2).matrix_multiply(coord_y) temp_3 = self.wij_matrix * (coord_y.matrix_outer_product(coord_y)) term_3x = (temp_3).matrix_multiply(coord_x) term_3y = (temp_3).matrix_multiply(coord_y) term_2 = term_2x.concatenate(term_2y) term_3 = term_3x.concatenate(term_3y) grad = -2.* ( term_1 - term_2 - term_3 ) if self.verbose: print "Functional",f #from matplotlib import pyplot as plt #plt.plot(coord_x,coord_y,"r.") #plt.axes().set_aspect("equal") #plt.show() return f,grad
def vectors(self): self.database.initialize_tables_and_insert_command() self.tile_rmsd = [0.]*64 for run,tokens in self.literals(): try: itile = self.register_line( float(tokens[2]),float(tokens[3]), float(tokens[5]),float(tokens[6]), float(tokens[8]),float(tokens[9]), float(tokens[11]),float(tokens[12]) ) if run is not None: self.database.insert(run,itile,tokens) yield "OK" except ValueError: print "Valueerror" self.database.send_insert_command() for x in xrange(64): if self.tilecounts[x]==0: continue self.radii[x]/=self.tilecounts[x] sum_cv = matrix.col(self.mean_cv[x]) self.mean_cv[x] = sum_cv/self.tilecounts[x] mean_cv = matrix.col(self.mean_cv[x]) selection = (self.master_tiles == x) selected_cv = self.master_cv.select(selection) if len(selected_cv)>0: self.tile_rmsd[x] = math.sqrt( flex.mean(flex.double([ (matrix.col(cv) - mean_cv).length_sq() for cv in selected_cv ])) ) else: self.tile_rmsd[x]=0. self.overall_N = flex.sum(flex.int( [int(t) for t in self.tilecounts] )) self.overall_cv = matrix.col(self.overall_cv)/self.overall_N self.overall_rmsd = math.sqrt( self.sum_sq_cv / self.overall_N )
def __init__( self, target_functor, selections, refine_adp, refine_occ, compute_gradients=True, rtg=None, weight=None): assert [refine_adp, refine_occ].count(True) == 1 t_r = target_functor(compute_gradients=compute_gradients) self.f = t_r.target_work() if(rtg is not None): self.f = self.f*weight+rtg.residual_sum if(compute_gradients): target_grads_wrt_par = t_r.gradients_wrt_atomic_parameters( u_iso = refine_adp, occupancy = refine_occ) if(rtg is not None): target_grads_wrt_par = target_grads_wrt_par*weight+rtg.gradients self.grads_wrt_par = [] for sel in selections: target_grads_wrt_par_sel = target_grads_wrt_par.select(sel) self.grads_wrt_par.append(flex.sum(target_grads_wrt_par_sel)) else: self.grads_wrt_par = None
def compute_gradient(self): tmp_bottom = self.v1+self.v2*self.x[0]*self.x[0] tmp_top = self.diff1 - self.diff2*self.x[0] part1 = -2.0*self.x[0]*tmp_top*tmp_top*self.v2/( tmp_bottom*tmp_bottom ) part2 = -2.0*self.diff2*tmp_top/tmp_bottom result=flex.sum( part1+part2) return(flex.double([result]))
def show_terms(structure, term_table, coseq_dict=None): assert len(term_table) == structure.scatterers().size() for scatterer,terms in zip(structure.scatterers(), term_table): print scatterer.label, list(terms), if (coseq_dict is not None): terms_to_match = list(terms[1:]) have_match = False tags = coseq_dict.keys() tags.sort() for tag in tags: for coseq_terms in coseq_dict[tag]: n = min(len(coseq_terms), len(terms_to_match)) if (coseq_terms[:n] == terms_to_match[:n]): print tag, have_match = True if (not have_match): print "Unknown", print sums_terms = flex.double() multiplicities = flex.double() for scatterer,terms in zip(structure.scatterers(), term_table): sums_terms.append(flex.sum(flex.size_t(list(terms)))) multiplicities.append(scatterer.multiplicity()) print "TD%d: %.2f" % ( len(terms)-1, flex.mean_weighted(sums_terms, multiplicities))
def compute_functional(self, x): """Compute the target function at coordinates `x`. Args: x (scitbx.array_family.flex.double): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: f (float): The value of the target function at coordinates `x`. """ assert (x.size() // self.dim) == (self._lattices.size() * len(self._sym_ops)) inner = self.rij_matrix.deep_copy() NN = x.size() // self.dim for i in range(self.dim): coord = x[i * NN:(i + 1) * NN] outer_prod = coord.matrix_outer_product(coord) inner -= outer_prod elements = inner * inner if self.wij_matrix is not None: elements = self.wij_matrix * elements f = 0.5 * flex.sum(elements) return f
def compute_functional(self): top = self.diff1 - self.diff2 * self.x[0] top = top * top bottom = self.v1 + self.v2 * self.x[0] * self.x[0] result = top / bottom result = flex.sum(result) return result
def compute_functional_and_gradients(self): coord_x = self.x[0:self.NN] coord_y = self.x[self.NN:2 * self.NN] inner = self.rij_matrix - coord_x.matrix_outer_product( coord_x) - coord_y.matrix_outer_product(coord_y) elements = self.wij_matrix * inner * inner f = 0.5 * flex.sum(elements) # quick gradients wrij_matrix = self.wij_matrix * self.rij_matrix term_1 = wrij_matrix.matrix_multiply(coord_x).concatenate( wrij_matrix.matrix_multiply(coord_y)) temp_2 = self.wij_matrix * (coord_x.matrix_outer_product(coord_x)) term_2x = (temp_2).matrix_multiply(coord_x) term_2y = (temp_2).matrix_multiply(coord_y) temp_3 = self.wij_matrix * (coord_y.matrix_outer_product(coord_y)) term_3x = (temp_3).matrix_multiply(coord_x) term_3y = (temp_3).matrix_multiply(coord_y) term_2 = term_2x.concatenate(term_2y) term_3 = term_3x.concatenate(term_3y) grad = -2. * (term_1 - term_2 - term_3) if self.verbose: print "Functional", f #from matplotlib import pyplot as plt #plt.plot(coord_x,coord_y,"r.") #plt.axes().set_aspect("equal") #plt.show() return f, grad
def f(self, x): print x B = float(x[0]) #d_star_sq = self.calc.d_star_sq().data() #obs = self.obs.data() #calc = flex.exp(-B*d_star_sq)*self.calc.data() k = self.get_linear_scale(self.obs, self.calc, B) return flex.sum(flex.pow2(self.obs.data() - k*self.calc.data()))
def ca(self, x): if(x is None): return str(0) elif(self.is_bool(x)): return str(x.count(True)) elif(self.is_size_t(x)): return str(x.size()) elif(len(x)==0): return str(0) elif(self.is_size_t(x[0])): return str(flex.sum(flex.size_t([i.size() for i in x]))) else: raise RuntimeError("Bad selection array type.")
def compute_functional_and_gradients(self): lp_h = lbfgs_target_handler() #calculate sum_sqr of the function fvec = lp_h.func(self.x, self.args) self.f = flex.sum(fvec*fvec) #calculate gradient for each parameter DELTA = 1.E-7 self.g = flex.double() for x in xrange(self.n): templist = list(self.x) templist[x]+=DELTA dvalues = flex.double(templist) dfvec = lp_h.func(dvalues, self.args) df = flex.sum(dfvec*dfvec) #calculate by finite_difference self.g.append( ( df-self.f )/DELTA ) return self.f, self.g
def compute_structure_factors(self): """ Compute the structure factors self._g of self.rho_map, as well as the 000 component self._g_000, scaling them by the number of grid points """ rho = self.rho_map.real_map() self._g_000 = flex.sum(rho) * self.fft_scale self._g = self.f_obs.structure_factors_from_map(rho, in_place_fft=True) self._g *= self.fft_scale