def run(files, params): print "filename", for cut in params.cut_ios: print "cut_ios_%.2f" % cut, print for f in files: is_xac = xds_ascii.is_xds_ascii(f) i_obs = None if is_xac: xac = xds_ascii.XDS_ASCII(f, read_data=True, i_only=True) xac.remove_rejected() i_obs = xac.i_obs().resolution_filter(d_min=params.d_min, d_max=params.d_max) if params.fix_variance_model: ao, bo = xac.variance_model an, bn = params.variance_model i_obs = i_obs.customized_copy(sigmas = flex.sqrt(flex.abs(an * (i_obs.sigmas()**2/ao + (bn-bo)*flex.pow2(i_obs.data()))))) else: ihkl = integrate_hkl_as_flex.reader(f, read_columns=("IOBS","SIGMA")) i_obs = ihkl.i_obs().resolution_filter(d_min=params.d_min, d_max=params.d_max) if params.fix_variance_model: a, b = params.variance_model i_obs = i_obs.customized_copy(sigmas = flex.sqrt(a * (i_obs.sigmas()**2 + b*flex.pow2(i_obs.data())))) cutoffs = eval_resolution(i_obs, params.n_bins, params.cut_ios) print "%s %s" % (f, " ".join(map(lambda x: "%.2f"%x, cutoffs)))
def compute_fa_values(self): self.compute_coefs() self.compute_determinant() reset_selector = (~self.selector).iselection() self.determinant = self.determinant.set_selected( reset_selector, 0 ) choice1 = -self.b + flex.sqrt( self.determinant ) choice1 /= 2*self.a choice2 = -self.b - flex.sqrt( self.determinant ) choice2 /= 2*self.a select1 = choice1 > choice2 select2 = ~select1 choice1 = choice1.set_selected( select1.iselection(), 0 ) choice2 = choice2.set_selected( select2.iselection(), 0 ) choice1 = choice1+choice2 select1 = (choice1<0).iselection() choice1 = choice1.set_selected( select1 , 0 ) self.fa = choice1 + choice2 self.set_sigma_ratio() self.sigfa = self.sigfa*self.fa
def __init__(self, w1, w2, k1, k2): self.w1=w1.deep_copy() self.w2=w2.deep_copy() if self.w1.is_xray_amplitude_array(): self.w1 = self.w1.f_as_f_sq() if self.w2.is_xray_amplitude_array(): self.w2 = self.w2.f_as_f_sq() ## common sets please self.w1,self.w2 = self.w1.common_sets( self.w2 ) ## get differences and sums please self.p1, self.n1 = self.w1.hemispheres_acentrics() self.p2, self.n2 = self.w2.hemispheres_acentrics() self.diff1 = self.p1.data() - self.n1.data() self.diff2 = self.p2.data() - self.n2.data() self.s1 = self.p1.sigmas()*self.p1.sigmas()\ + self.n1.sigmas()*self.n1.sigmas() self.s1 = flex.sqrt( self.s1 ) self.s2 = self.p2.sigmas()*self.p2.sigmas()\ + self.n2.sigmas()*self.n2.sigmas() self.s2 = flex.sqrt( self.s2 ) self.sum1 = self.p1.data() + self.n1.data() self.sum2 = self.p2.data() + self.n2.data() self.k1_sq = k1*k1 self.k2_sq = k2*k2 self.determinant=None self.fa=None self.sigfa=None self.selector=None self.iselector=None self.a=None self.b=None self.c=None self.compute_fa_values() self.fa = self.p1.customized_copy( data = self.fa, sigmas = self.sigfa).set_observation_type( self.p1 )
def __call__(self, reflections): ''' Select the reflections :param reflections: The reflections :return: The selection as a mask ''' import __builtin__ if self.column == 'intensity.sum.i_over_sigma': I = reflections['intensity.sum.value'] V = reflections['intensity.sum.variance'] mask1 = V > 0 I = I.select(mask1) V = V.select(mask1) data = I / flex.sqrt(V) elif self.column == 'intensity.prf.i_over_sigma': I = reflections['intensity.prf.value'] V = reflections['intensity.prf.variance'] mask1 = V > 0 I = I.select(mask1) V = V.select(mask1) data = I / flex.sqrt(V) else: mask1 = None data = reflections[self.column] if type(data) == double: value = __builtin__.float(self.value) elif type(data) == int: value = __builtin__.int(self.value) elif type(data) == size_t: value = __builtin__.int(self.value) elif type(data) == std_string: value = self.value elif type(data) == vec3_double: raise RuntimeError("Comparison not implemented") elif type(data) == vec2_double: raise RuntimeError("Comparison not implemented") elif type(data) == mat3_double: raise RuntimeError("Comparison not implemented") elif type(data) == int6: raise RuntimeError("Comparison not implemented") elif type(data) == shoebox: raise RuntimeError("Comparison not implemented") else: raise RuntimeError('Unknown column type') mask2 = self.op(data, self.value) if mask1 is not None: mask1.set_selected(size_t(range(len(mask1))).select(mask1), mask2) else: mask1 = mask2 return mask1
def prepare_simulation_with_noise(sim, transmittance, apply_noise, ordered_intensities=None, half_data_flag = 0): result = intensity_data() result.frame = sim["frame_lookup"] result.miller= sim['miller_lookup'] raw_obs_no_noise = transmittance * sim['observed_intensity'] if apply_noise: import scitbx.random from scitbx.random import variate, normal_distribution # bernoulli_distribution, gamma_distribution, poisson_distribution scitbx.random.set_random_seed(321) g = variate(normal_distribution()) noise = flex.sqrt(raw_obs_no_noise) * g(len(raw_obs_no_noise)) # adds in Gauss noise to signal else: noise = flex.double(len(raw_obs_no_noise),0.) raw_obs = raw_obs_no_noise + noise if half_data_flag in [1,2]: # apply selection after random numbers have been applied half_data_selection = (sim["frame_lookup"]%2)==(half_data_flag%2) result.frame = sim["frame_lookup"].select(half_data_selection) result.miller = sim['miller_lookup'].select(half_data_selection) raw_obs = raw_obs.select(half_data_selection) mean_signal = flex.mean(raw_obs) sigma_obs = flex.sqrt(flex.abs(raw_obs)) mean_sigma = flex.mean(sigma_obs) print "<I> / <sigma>", (mean_signal/ mean_sigma) scale_factor = mean_signal/10. print "Mean signal is",mean_signal,"Applying a constant scale factor of ",scale_factor #most important line; puts input data on a numerically reasonable scale result.raw_obs = raw_obs / scale_factor scaled_sigma = sigma_obs / scale_factor result.exp_var = scaled_sigma * scaled_sigma #ordered intensities gets us the unit cell & miller indices to # gain a static array of (sin theta over lambda)**2 if ordered_intensities is not None: uc = ordered_intensities.unit_cell() stol_sq = flex.double() for i in xrange(len(result.miller)): this_hkl = ordered_intensities.indices()[result.miller[i]] stol_sq_item = uc.stol_sq(this_hkl) stol_sq.append(stol_sq_item) result.stol_sq = stol_sq return result
def __init__(self, nat, der, nsr_bias=1.0): self.nat=nat.deep_copy() self.der=der.deep_copy() self.nsr_bias=1.0/nsr_bias assert self.nat.is_real_array() assert self.nat.is_real_array() if self.nat.is_xray_intensity_array(): self.nat.f_sq_as_f() if self.der.is_xray_intensity_array(): self.der.f_sq_as_f() self.nat,self.der = self.nat.common_sets(self.der) self.der = self.der.customized_copy( data = self.der.data()*self.nsr_bias, sigmas = self.der.sigmas()*self.nsr_bias).set_observation_type( self.der) self.delta_f=self.nat.customized_copy( data = ( self.der.data() - self.nat.data() ), sigmas = flex.sqrt( self.der.sigmas()*self.der.sigmas()+ self.nat.sigmas()*self.nat.sigmas() ) ).set_observation_type( self.nat ) self.abs_delta_f=self.nat.customized_copy( data = flex.abs( self.der.data() - self.nat.data() ), sigmas = flex.sqrt( self.der.sigmas()*self.der.sigmas()+ self.nat.sigmas()*self.nat.sigmas() ) ).set_observation_type( self.der ) if not self.nat.is_xray_intensity_array(): self.nat.f_as_f_sq() if not self.der.is_xray_intensity_array(): self.der.f_as_f_sq() self.delta_i=self.nat.customized_copy( data = ( self.der.data() - self.nat.data() ), sigmas = flex.sqrt( self.der.sigmas()*self.der.sigmas()+ self.nat.sigmas()*self.nat.sigmas() ) ).set_observation_type( self.nat ) self.abs_delta_i=self.nat.customized_copy( data = flex.abs( self.der.data() - self.nat.data() ), sigmas = flex.sqrt( self.der.sigmas()*self.der.sigmas()+ self.nat.sigmas()*self.nat.sigmas() ) ).set_observation_type( self.der )
def nearest_rotamer_sites_cart(self, residue): sites_cart_result = residue.atoms().extract_xyz() get_class = iotbx.pdb.common_residue_names_get_class if get_class(residue.resname) == "common_amino_acid": sites_cart = residue.atoms().extract_xyz() rotamer_iterator = self.mon_lib_srv.rotamer_iterator( fine_sampling=True, comp_id=residue.resname, atom_names=residue.atoms().extract_name(), sites_cart=sites_cart, ) if ( rotamer_iterator is None or rotamer_iterator.problem_message is not None or rotamer_iterator.rotamer_info is None ): rotamer_iterator = None if rotamer_iterator is not None: dist_min = 1.0e9 for r, rotamer_sites_cart in rotamer_iterator: d = flex.mean(flex.sqrt((sites_cart - rotamer_sites_cart).dot())) if d < dist_min: dist_min = d sites_cart_result = rotamer_sites_cart return sites_cart_result
def exercise_SFweight_spline_core(structure, d_min, verbose=0): structure.scattering_type_registry(d_min=d_min) f_obs = abs(structure.structure_factors( d_min=d_min, anomalous_flag=False).f_calc()) if (0 or verbose): f_obs.show_summary() f_obs = miller.array( miller_set=f_obs, data=f_obs.data(), sigmas=flex.sqrt(f_obs.data())) partial_structure = xray.structure( crystal_symmetry=structure, scatterers=structure.scatterers()[:-2]) f_calc = f_obs.structure_factors_from_scatterers( xray_structure=partial_structure).f_calc() test_set_flags = (flex.random_double(size=f_obs.indices().size()) < 0.1) sfweight = clipper.SFweight_spline_interface( unit_cell=f_obs.unit_cell(), space_group=f_obs.space_group(), miller_indices=f_obs.indices(), anomalous_flag=f_obs.anomalous_flag(), f_obs_data=f_obs.data(), f_obs_sigmas=f_obs.sigmas(), f_calc=f_calc.data(), test_set_flags=test_set_flags, n_refln=f_obs.indices().size()//10, n_param=20) if (0 or verbose): print "number_of_spline_parameters:",sfweight.number_of_spline_parameters() print "mean fb: %.8g" % flex.mean(flex.abs(sfweight.fb())) print "mean fd: %.8g" % flex.mean(flex.abs(sfweight.fd())) print "mean phi: %.8g" % flex.mean(sfweight.centroid_phases()) print "mean fom: %.8g" % flex.mean(sfweight.figures_of_merit()) return sfweight
def detect_outliers_solve(self): """ TT says: I toss everything > 3 sigma in the scaling, where sigma comes from the rms of everything being scaled: sigma**2 = <delta**2>- <experimental-sigmas**2> Then if a particular delta**2 > 3 sigma**2 + experimental-sigmas**2 then I toss it. """ terwilliger_sigma_array = flex.double(self.mean_df2.data) -\ flex.double(self.mean_sdf2.data) for bin_number in self.delta_f.binner().range_all(): ## The selection tells us wether or not somthing is in the correct bin selection = self.delta_f.binner().selection( bin_number ).iselection() ## Now just make a global check to test for outlierness: tmp_sigma_array = terwilliger_sigma_array[bin_number] -\ self.delta_f.sigmas()*self.delta_f.sigmas() tmp_sigma_array = flex.sqrt(tmp_sigma_array)*self.cut_level_rms potential_outliers = ( self.delta_f.data() > tmp_sigma_array ) potential_outliers = potential_outliers.select( selection ) self.result = self.result.set_selected( selection, potential_outliers ) print >> self.out print >> self.out, " %8i potential outliers detected" %( self.result.count(True) ) print >> self.out, " They will be removed from the data set" print >> self.out
def exercise(space_group_info, anomalous_flag, n_scatterers=8, d_min=2, verbose=0): structure = random_structure.xray_structure( space_group_info, elements=["const"]*n_scatterers) f_calc = structure.structure_factors( d_min=d_min, anomalous_flag=anomalous_flag).f_calc() f = abs(f_calc) f = miller.array(miller_set=f, data=f.data(), sigmas=flex.sqrt(f.data())) f = f.f_as_f_sq() g = f.expand_to_p1() merger_p1 = xray.merger( g.indices(), g.data(), g.sigmas(), g.space_group(), g.anomalous_flag(), g.unit_cell() ) p1_bic = merger_p1.bic() p1_r = merger_p1.r_abs() merger_nat = xray.merger( g.indices(), g.data(), g.sigmas(), f.space_group(), g.anomalous_flag(), g.unit_cell() ) nat_bic = merger_nat.bic() nat_r = merger_nat.r_abs() assert nat_bic >= p1_bic assert p1_r <= 1e-8
def gradients(self, xray_structure, force_update_mask=False): factor = 1.0 sites_cart = xray_structure.sites_cart() if(self.fmodel is not None): max_shift = flex.max(flex.sqrt((self.sites_cart - sites_cart).dot())) if(max_shift > self.update_gradient_threshold): self.fmodel.update_xray_structure( xray_structure = xray_structure, update_f_calc = True, update_f_mask = False) self.gx = flex.vec3_double(self.x_target_functor(compute_gradients=True).\ gradients_wrt_atomic_parameters(site=True).packed()) self.sites_cart = sites_cart if(self.restraints_manager is not None): c = self.restraints_manager.energies_sites(sites_cart = sites_cart, compute_gradients=True) self.gc = c.gradients factor *= self.wc if(c.normalization_factor is not None): factor *= c.normalization_factor result = None if(self.wx is not None): result = self.wx * self.gx if(self.wc is not None): gcw = self.wc * self.gc if(result is None): result = gcw else: result = result + gcw if(factor != 1.0): result *= 1.0 / factor #print "norms:", self.gc.norm(), self.gx.norm(), result.norm() return result
def test_twin_r_value(twin_operator): miller_array = random_data(35).map_to_asu() miller_array = miller_array.f_as_f_sq() for twin_fraction, expected_r_abs,expected_r_sq in zip( [0,0.1,0.2,0.3,0.4,0.5], [0.50,0.40,0.30,0.20,0.10,0.0], [0.333,0.213,0.120,0.0533,0.0133,0.00]): cb_op = sgtbx.change_of_basis_op( twin_operator ) miller_array_mod, miller_array_twin = miller_array.common_sets( miller_array.change_basis( cb_op ).map_to_asu() ) twinned_miller = miller_array_mod.customized_copy( data = (1.0-twin_fraction)*miller_array_mod.data() + twin_fraction*miller_array_twin.data(), sigmas = flex.sqrt( flex.pow( ((1.0-twin_fraction)*miller_array_mod.sigmas()),2.0)+\ flex.pow( ((twin_fraction)*miller_array_twin.sigmas()),2.0)) ) twinned_miller.set_observation_type( miller_array.observation_type()) twin_r = scaling.twin_r( twinned_miller.indices(), twinned_miller.data(), twinned_miller.space_group(), twinned_miller.anomalous_flag(), cb_op.c().r().as_double()[0:9] ) assert approx_equal(twin_r.r_abs_value(), expected_r_abs, 0.08) assert approx_equal(twin_r.r_sq_value(), expected_r_sq, 0.08)
def __init__(self, lambda1, lambda2, k1=1.0): ## assumed is of course that the data are scaled. ## lambda1 is the 'reference' self.w1=lambda1.deep_copy() self.w2=lambda2.deep_copy() if not self.w1.is_xray_amplitude_array(): self.w1 = self.w1.f_sq_as_f() if not self.w2.is_xray_amplitude_array(): self.w2 = self.w2.f_sq_as_f() self.w1, self.w2 = self.w1.common_sets( self.w2 ) l1p, l1n = self.w1.hemispheres_acentrics() self.mean1 = l1p.data()+l1n.data() self.diff1 = l1p.data()-l1n.data() self.v1 = ( l1p.sigmas()*l1p.sigmas() + l1n.sigmas()*l1n.sigmas() ) l2p, l2n = self.w2.hemispheres_acentrics() self.mean2 = l2p.data()+l2n.data() self.diff2 = l2p.data()-l2n.data() self.v2 = ( l2p.sigmas()*l2p.sigmas() + l2n.sigmas()*l2n.sigmas() ) self.new_diff = flex.abs( (self.diff1 + k1*self.diff2)/2.0 ) self.new_sigma_mean = flex.sqrt( (self.v1+k1*k1*self.v2)/2.0 ) self.dad = l1p.customized_copy( data = self.new_diff, sigmas = self.new_sigma_mean ).set_observation_type( self.w1 )
def calc_k(f_obs, i_calc): fc = flex.sqrt(i_calc) num = flex.sum(f_obs * fc) den = flex.sum(fc * fc) assert den != 0 k = num / den return k
def scat_data(self, d_star_sq=None): if d_star_sq is None: self.sigma_tot_sq=None self.gamma_tot_sigma=None self.gamma_tot=None if d_star_sq is not None: self.sigma_tot_sq = flex.double( d_star_sq.size() ) gaussians = {} for chemical_type, n_atoms in self.asu_contents.items(): gaussians[chemical_type] = xray_scattering.wk1995( chemical_type).fetch() f0 = gaussians[chemical_type].at_d_star_sq(d_star_sq) self.sigma_tot_sq += f0*f0*n_atoms if(d_star_sq.size()>0): ## Protein part gamma_prot = gamma_protein(d_star_sq) self.gamma_prot = gamma_prot.gamma*self.fraction_protein ## Nucleotide part; needs to be completed gamma_nuc = gamma_nucleic(d_star_sq) self.gamma_nuc = gamma_nuc.gamma*self.fraction_nucleic ## ## Totals self.gamma_tot = self.gamma_prot*self.fraction_protein +\ self.gamma_nuc*self.fraction_nucleic self.gamma_tot_sigma = (gamma_prot.sigma_gamma*self.fraction_protein)*\ (gamma_prot.sigma_gamma*self.fraction_protein)+\ (gamma_nuc.sigma_gamma*self.fraction_nucleic)*\ (gamma_nuc.sigma_gamma*self.fraction_nucleic) self.gamma_tot_sigma = flex.sqrt( self.gamma_tot_sigma )
def run_refinement( structure_ideal, structure_shake, params, i_obs=None, f_obs=None): assert (i_obs is None) == (f_obs is None) print "Ideal structure:" structure_ideal.show_summary().show_scatterers() print print "Modified structure:" structure_shake.show_summary().show_scatterers() print print "rms difference:", \ structure_ideal.rms_difference(other=structure_shake) print sdt = params.show_distances_threshold if (sdt > 0): print "structure_shake inter-atomic distances:" structure_shake.show_distances(distance_cutoff=sdt) print if (f_obs is None): i_obs = structure_ideal.structure_factors( anomalous_flag=False, d_min=1, algorithm="direct", cos_sin_table=False).f_calc().intensities() f_obs = i_obs.array(data=flex.sqrt(i_obs.data())) return refinement( i_obs=i_obs, f_obs=f_obs, xray_structure=structure_shake, params=params, reference_structure=structure_ideal)
def exercise(pdb_poor_str, d_min = 1.0, resolution_factor = 0.25): # Fit one residue in many-residues model # # answer pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_answer) pdb_inp.write_pdb_file(file_name = "answer.pdb") xrs_answer = pdb_inp.xray_structure_simple() f_calc = xrs_answer.structure_factors(d_min = d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=resolution_factor) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() mtz_dataset = f_calc.as_mtz_dataset(column_root_label = "FCmap") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name = "answer.mtz") # take TYR9 sites_answer = list( pdb_inp.construct_hierarchy().residue_groups())[1].atoms().extract_xyz() # poor mon_lib_srv = monomer_library.server.server() master_params = iotbx.phil.parse( input_string=mmtbx.monomer_library.pdb_interpretation.master_params_str, process_includes=True).extract() master_params.link_distance_cutoff=999 processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv = mon_lib_srv, params = master_params, ener_lib = monomer_library.server.ener_lib(), raw_records = flex.std_string(pdb_poor_str.splitlines()), strict_conflict_handling = True, force_symmetry = True, log = None) pdb_hierarchy_poor = processed_pdb_file.all_chain_proxies.pdb_hierarchy xrs_poor = processed_pdb_file.xray_structure() sites_cart_poor = xrs_poor.sites_cart() pdb_hierarchy_poor.write_pdb_file(file_name = "poor.pdb") # rotamer_manager = RotamerEval() get_class = iotbx.pdb.common_residue_names_get_class for model in pdb_hierarchy_poor.models(): for chain in model.chains(): for residue in chain.only_conformer().residues(): if(get_class(residue.resname) == "common_amino_acid" and int(residue.resseq)==9): # take TYR9 t0 = time.time() ro = mmtbx.refinement.real_space.fit_residue.run_with_minimization( target_map = target_map, residue = residue, xray_structure = xrs_poor, mon_lib_srv = mon_lib_srv, rotamer_manager = rotamer_manager, real_space_gradients_delta = d_min*resolution_factor, geometry_restraints_manager = processed_pdb_file.geometry_restraints_manager(show_energies=False)) sites_final = residue.atoms().extract_xyz() t1 = time.time()-t0 pdb_hierarchy_poor.adopt_xray_structure(ro.xray_structure) pdb_hierarchy_poor.write_pdb_file(file_name = "refined.pdb") dist = flex.mean(flex.sqrt((sites_answer - sites_final).dot())) # Highly unstable test assert dist < 0.9
def integration_proper(self): image_obj = self.imagefiles.imageindex(self.frame_numbers[self.image_number]) #image_obj.read() #assume image already read rawdata = image_obj.linearintdata # assume image #1 self.integration_proper_fast(rawdata,self.predicted,self.hkllist,self.detector_xy_draft) self.integrated_data = self.get_integrated_data() self.integrated_sigma= self.get_integrated_sigma() self.integrated_miller=self.get_integrated_miller() self.detector_xy = self.get_detector_xy() self.max_signal = self.get_max_signal() for correction_type in self.horizons_phil.integration.absorption_correction: if correction_type.apply: if correction_type.algorithm=="fuller_kapton": print "Absorption correction with %d reflections to correct"%(len(self.detector_xy)) from cxi_xdr_xes import absorption C = absorption.correction() if correction_type.fuller_kapton.smart_sigmas: self.fuller_kapton_absorption_correction, self.fuller_kapton_absorption_sigmas = C( panel_size_px = (self.inputpd['size1'],self.inputpd['size2']), pixel_size_mm = self.pixel_size, detector_dist_mm = self.inputai.distance(), wavelength_ang = self.inputai.wavelength, BSmasks = self.BSmasks, get_ISmask_function = self.get_ISmask, params = correction_type.fuller_kapton, i_no_skip = self.get_integrated_flag(), calc_sigmas=True ) # apply corrections and propagate error # term1 = (sig(C)/C)^2 # term2 = (sig(Imeas)/Imeas)^2 # I' = C*I # sig^2(I') = (I')^2*(term1 + term2) # sig(I') = sqrt(sig^2(I')) term1 = flex.pow(self.fuller_kapton_absorption_sigmas/self.fuller_kapton_absorption_correction, 2) term2 = flex.pow(self.integrated_sigma/self.integrated_data, 2) self.integrated_data *= self.fuller_kapton_absorption_correction integrated_sigma_squared = flex.pow(self.integrated_data, 2) * (term1 + term2) self.integrated_sigma = flex.sqrt(integrated_sigma_squared) # order is purposeful: the two lines above require that self.integrated_data has already been corrected! else: self.fuller_kapton_absorption_correction = C( panel_size_px = (self.inputpd['size1'],self.inputpd['size2']), pixel_size_mm = self.pixel_size, detector_dist_mm = self.inputai.distance(), wavelength_ang = self.inputai.wavelength, BSmasks = self.BSmasks, get_ISmask_function = self.get_ISmask, params = correction_type.fuller_kapton, i_no_skip = self.get_integrated_flag() ) # apply these corrections now self.integrated_data *= self.fuller_kapton_absorption_correction self.integrated_sigma *= self.fuller_kapton_absorption_correction #self.show_rejected_spots() return # function has been recoded in C++
def pair_sites(self, r, t, cut_off): new_sites = r.elems * self.set_b + t.elems deltas = self.set_a - new_sites deltas = flex.sqrt(deltas.dot(deltas)) select = flex.bool(deltas < cut_off) tmp_a = self.set_a.select(select.iselection()) tmp_b = self.set_b.select(select.iselection()) return tmp_a, tmp_b, select
def accelerations(self): self.stereochemistry_residuals = self.restraints_manager.energies_sites( sites_cart=self.structure.sites_cart(), compute_gradients=True ) # Harmonic restraints if self.er_data is not None: if self.er_data.er_harmonic_restraints_info is not None: harmonic_grads = self.restraints_manager.geometry.ta_harmonic_restraints( sites_cart=self.structure.sites_cart(), ta_harmonic_restraint_info=self.er_data.er_harmonic_restraints_info, weight=self.er_data.er_harmonic_restraints_weight, slack=self.er_data.er_harmonic_restraints_slack, ) assert self.stereochemistry_residuals.gradients.size() == harmonic_grads.size() self.stereochemistry_residuals.gradients += harmonic_grads result = self.stereochemistry_residuals.gradients d_max = None if self.xray_structure_last_updated is not None and self.shift_update > 0: array_of_distances_between_each_atom = flex.sqrt( self.structure.difference_vectors_cart(self.xray_structure_last_updated).dot() ) d_max = flex.max(array_of_distances_between_each_atom) if self.fmodel is not None: if d_max is not None: if d_max > self.shift_update: self.xray_structure_last_updated = self.structure.deep_copy_scatterers() self.xray_gradient = self.xray_grads() else: self.xray_gradient = self.xray_grads() result = ( self.xray_gradient * self.xray_target_weight + self.stereochemistry_residuals.gradients * self.chem_target_weight ) factor = 1.0 if self.chem_target_weight is not None: factor *= self.chem_target_weight if self.stereochemistry_residuals.normalization_factor is not None: factor *= self.stereochemistry_residuals.normalization_factor if factor != 1.0: result *= 1.0 / factor # Store RMS non-solvent atom gradients for Xray and Geo if self.er_data is not None: self.wc = self.chem_target_weight / factor self.wx = self.xray_target_weight / factor self.gg = self.stereochemistry_residuals.gradients * self.wc self.xg = self.xray_gradient * self.wx gg_pro = self.gg.select(~self.er_data.solvent_sel) xg_pro = self.xg.select(~self.er_data.solvent_sel) self.er_data.geo_grad_rms += (flex.mean_sq(gg_pro.as_double()) ** 0.5) / self.n_steps self.er_data.xray_grad_rms += (flex.mean_sq(xg_pro.as_double()) ** 0.5) / self.n_steps return result
def fit_side_chain(self, clusters): rotamer_iterator = \ mmtbx.refinement.real_space.fit_residue.get_rotamer_iterator( mon_lib_srv = self.mon_lib_srv, residue = self.residue) if(rotamer_iterator is None): return selection = flex.size_t(flatten(clusters[0].vector)) if(self.target_map is not None): start_target_value = self.get_target_value( sites_cart = self.residue.atoms().extract_xyz(), selection = selection) sites_cart_start = self.residue.atoms().extract_xyz() sites_cart_first_rotamer = list(rotamer_iterator)[0][1] self.residue.atoms().set_xyz(sites_cart_first_rotamer) axes = [] atr = [] for i, angle in enumerate(self.chi_angles[0]): cl = clusters[i] axes.append(flex.size_t(cl.axis)) atr.append(flex.size_t(cl.atoms_to_rotate)) if(self.target_map is not None): ro = ext.fit( target_value = start_target_value, axes = axes, rotatable_points_indices = atr, angles_array = self.chi_angles, density_map = self.target_map, all_points = self.residue.atoms().extract_xyz(), unit_cell = self.unit_cell, selection = selection, sin_table = self.sin_cos_table.sin_table, cos_table = self.sin_cos_table.cos_table, step = self.sin_cos_table.step, n = self.sin_cos_table.n) else: ro = ext.fit( sites_cart_start = sites_cart_start.deep_copy(), axes = axes, rotatable_points_indices = atr, angles_array = self.chi_angles, all_points = self.residue.atoms().extract_xyz(), sin_table = self.sin_cos_table.sin_table, cos_table = self.sin_cos_table.cos_table, step = self.sin_cos_table.step, n = self.sin_cos_table.n) sites_cart_result = ro.result() if(sites_cart_result.size()>0): dist = None if(self.accept_only_if_max_shift_is_smaller_than is not None): dist = flex.max(flex.sqrt((sites_cart_start - sites_cart_result).dot())) if(dist is None): self.residue.atoms().set_xyz(sites_cart_result) else: if(dist is not None and dist < self.accept_only_if_max_shift_is_smaller_than): self.residue.atoms().set_xyz(sites_cart_result) else: self.residue.atoms().set_xyz(sites_cart_start)
def peaks_mapped(self): if(self.peaks_ is None): return None assert self.mapped == False max_dist = self.params.map_next_to_model.max_model_peak_dist min_dist = self.params.map_next_to_model.min_model_peak_dist if (min_dist is None) : min_dist = 0. if (max_dist is None) : max_dist = float(sys.maxint) xray_structure = self.fmodel.xray_structure.deep_copy_scatterers() use_selection = None if(not self.params.map_next_to_model.use_hydrogens): use_selection = ~xray_structure.hd_selection() initial_number_of_sites = self.peaks_.sites.size() if(not self.silent): print >> self.log, "Filter by distance & map next to the model:" result = xray_structure.closest_distances(sites_frac = self.peaks_.sites, distance_cutoff = max_dist, use_selection = use_selection) smallest_distances_sq = result.smallest_distances_sq smallest_distances = result.smallest_distances in_box = smallest_distances_sq > 0 not_too_far = smallest_distances_sq <= max_dist**2 not_too_close = smallest_distances_sq >= min_dist**2 selection = (not_too_far & not_too_close & in_box) iseqs_of_closest_atoms = result.i_seqs.select(selection) peaks = peaks_holder( heights = self.peaks_.heights.select(selection), sites = result.sites_frac.select(selection), iseqs_of_closest_atoms = iseqs_of_closest_atoms) sd = flex.sqrt(smallest_distances_sq.select(in_box)) d_min = flex.min_default(sd, 0) d_max = flex.max_default(sd, 0) if(not self.silent): print >> self.log," mapped sites are within: %5.3f - %5.3f"%(d_min,d_max) print >> self.log, " number of sites selected in [dist_min=%5.2f, " \ "dist_max=%5.2f]: %d from: %d" % (min_dist, max_dist, peaks.sites.size(), initial_number_of_sites) smallest_distances = flex.sqrt(smallest_distances_sq.select(selection)) d_min = flex.min_default(smallest_distances, 0) d_max = flex.max_default(smallest_distances, 0) if(not self.silent): print >> self.log," mapped sites are within: %5.3f - %5.3f"%(d_min,d_max) self.mapped = True self.peaks_ = peaks return peaks
def _need_update_mask(self, sites_cart_new): if(self.sites_cart is not None and self.sites_cart.size() != sites_cart_new.size()): return True if(self.sites_cart is not None): atom_atom_distances = flex.sqrt((sites_cart_new - self.sites_cart).dot()) mean_shift = flex.mean_default(atom_atom_distances,0) if(mean_shift > self.mask_params.mean_shift_for_mask_update): return True else: return False else: return True
def exercise(rotamer_manager, sin_cos_table, d_min = 1.0, resolution_factor = 0.1): # Run into a water clash if needed: water is considered as just a map peak. # # answer PDB pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_answer) pdb_inp.write_pdb_file(file_name = "answer.pdb") xrs_answer = pdb_inp.xray_structure_simple() # answer map pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_for_map) pdb_inp.write_pdb_file(file_name = "for_map.pdb") xrs_map = pdb_inp.xray_structure_simple() f_calc = xrs_map.structure_factors(d_min = d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=resolution_factor) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() mtz_dataset = f_calc.as_mtz_dataset(column_root_label = "FCmap") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name = "answer.mtz") # poor mon_lib_srv = monomer_library.server.server() processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv = mon_lib_srv, ener_lib = monomer_library.server.ener_lib(), raw_records = flex.std_string(pdb_poor.splitlines()), strict_conflict_handling = True, force_symmetry = True, log = None) pdb_hierarchy_poor = processed_pdb_file.all_chain_proxies.pdb_hierarchy xrs_poor = processed_pdb_file.xray_structure() sites_cart_poor = xrs_poor.sites_cart() pdb_hierarchy_poor.write_pdb_file(file_name = "poor.pdb") # grm = mmtbx.restraints.manager( geometry=processed_pdb_file.geometry_restraints_manager(show_energies=False), normalization = True) for i in [1,2]: print "-"*10 result = mmtbx.refinement.real_space.fit_residues.run( pdb_hierarchy = pdb_hierarchy_poor, crystal_symmetry = xrs_poor.crystal_symmetry(), map_data = target_map, do_all = True, massage_map = False, rotamer_manager = rotamer_manager, sin_cos_table = sin_cos_table, mon_lib_srv = mon_lib_srv) pdb_hierarchy_poor = result.pdb_hierarchy # result.pdb_hierarchy.write_pdb_file(file_name = "refined.pdb", crystal_symmetry=xrs_poor.crystal_symmetry()) dist = flex.max(flex.sqrt((xrs_answer.sites_cart() - result.pdb_hierarchy.atoms().extract_xyz()).dot())) assert dist < 0.75, dist # to make it work on marbles
def _show_and_track(self): cc = self._get_cc() s2 = self.xray_structure.sites_cart() if(self.cc_best is None or cc>self.cc_best): self.cc_best = cc self.sites_cart_best = s2.deep_copy() if(self.log): fmt="%sCC=%6.4f (best to keep CC=%6.4f), moved from start (max/mean)=%s" s1 = self.sites_cart_start d = "%6.3f %6.3f"%flex.sqrt((s1-s2).dot()).min_max_mean().as_tuple()[1:] print >> self.log, fmt%(self.prefix, cc, self.cc_best, d)
def alpha_beta(self): if self.alpha is None: print "re calc a/b" self.alpha = self.ta_d*flex.sqrt( self.normalized_obs_f.normalizer_for_miller_array/ self.normalized_calc_f.normalizer_for_miller_array) self.beta = (1.0-self.ta_d*self.ta_d)*\ self.normalized_obs_f.normalizer_for_miller_array self.alpha = self.miller_obs.array(data=self.alpha) self.beta = self.miller_obs.array(data=self.beta) return self.alpha, self.beta
def alpha_beta(self): if self.alpha is None: self.alpha = self.sigmaa_miller_array.data() * flex.sqrt( self.normalized_obs_f.normalizer_for_miller_array / self.normalized_calc_f.normalizer_for_miller_array ) self.beta = ( 1.0 - self.sigmaa_miller_array.data() * self.sigmaa_miller_array.data() ) * self.normalized_obs_f.normalizer_for_miller_array self.alpha = self.miller_obs.array(data=self.alpha) self.beta = self.miller_obs.array(data=self.beta) return self.alpha, self.beta
def verlet_leapfrog_integration(self): # start verlet_leapfrog_integration loop for cycle in range(1,self.n_steps+1,1): sites_cart = None if([self.stop_at_diff,self.states_collector].count(None) != 2): sites_cart = self.xray_structure.sites_cart() if(self.stop_at_diff is not None): dist = flex.mean(flex.sqrt((self.sites_cart_start - sites_cart).dot())) if(dist >= self.stop_at_diff): return accelerations = self.accelerations() print_flag = 0 switch = math.modf(float(cycle)/self.n_print)[0] if((switch==0 or cycle==1 or cycle==self.n_steps) and self.verbose >= 1): print_flag = 1 if(self.states_collector is not None): switch2 = math.modf(float(cycle)/self.n_collect)[0] if(switch2==0 or cycle==1 or cycle==self.n_steps): self.states_collector.add(sites_cart = sites_cart) if(print_flag == 1): text = "integration step number = %5d"%cycle self.center_of_mass_info() kt=dynamics.kinetic_energy_and_temperature(self.vxyz,self.atomic_weights) self.current_temperature = kt.temperature self.ekin = kt.kinetic_energy self.print_dynamics_stat(text) if(self.stop_cm_motion): self.center_of_mass_info() self.stop_global_motion() # calculate velocities at t+dt/2 dynamics.vxyz_at_t_plus_dt_over_2( self.vxyz, self.atomic_weights, accelerations, self.tstep) # calculate the temperature and kinetic energy from new velocities kt=dynamics.kinetic_energy_and_temperature(self.vxyz,self.atomic_weights) self.current_temperature = kt.temperature self.ekin = kt.kinetic_energy self.velocity_rescaling() if(print_flag == 1 and 0): self.center_of_mass_info() self.print_dynamics_stat(text) # do the verlet_leapfrog_integration to get coordinates at t+dt self.xray_structure.set_sites_cart( sites_cart=self.xray_structure.sites_cart() + self.vxyz * self.tstep) self.xray_structure.apply_symmetry_sites() # prevent explosions by doing very quick model geometry regularization if(self.interleaved_minimization and cycle==self.n_steps): self.run_interleaved_minimization() kt=dynamics.kinetic_energy_and_temperature(self.vxyz,self.atomic_weights) self.current_temperature = kt.temperature self.ekin = kt.kinetic_energy if(print_flag == 1 and 0): self.center_of_mass_info() self.print_dynamics_stat(text) self.accelerations()
def exercise(rotamer_manager, sin_cos_table, d_min = 1.0, resolution_factor = 0.1): # Make sure it DOES NOT kicks into existing residue (chain Z). # # answer PDB pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_for_map) pdb_inp.write_pdb_file(file_name = "for_map.pdb") xrs_answer = pdb_inp.xray_structure_simple() # answer map pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_answer) xrs_map = pdb_inp.xray_structure_simple() f_calc = xrs_map.structure_factors(d_min = d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=resolution_factor) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() mtz_dataset = f_calc.as_mtz_dataset(column_root_label = "FCmap") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name = "answer.mtz") # poor mon_lib_srv = monomer_library.server.server() processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv = mon_lib_srv, ener_lib = monomer_library.server.ener_lib(), raw_records = flex.std_string(pdb_poor.splitlines()), strict_conflict_handling = True, force_symmetry = True, log = None) pdb_hierarchy_poor = processed_pdb_file.all_chain_proxies.pdb_hierarchy xrs_poor = processed_pdb_file.xray_structure() sites_cart_poor = xrs_poor.sites_cart() pdb_hierarchy_poor.write_pdb_file(file_name = "poor.pdb") # result = mmtbx.refinement.real_space.fit_residues.run( pdb_hierarchy = pdb_hierarchy_poor, crystal_symmetry = xrs_poor.crystal_symmetry(), map_data = target_map, do_all = True, rotamer_manager = rotamer_manager, sin_cos_table = sin_cos_table, mon_lib_srv = mon_lib_srv) result.pdb_hierarchy.write_pdb_file(file_name = "refined.pdb") ### sel = result.pdb_hierarchy.atom_selection_cache().selection("not chain Z") result_hierarchy = result.pdb_hierarchy.select(sel) pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_answer) pdb_inp.write_pdb_file(file_name = "answer.pdb") xrs_answer = pdb_inp.xray_structure_simple() dist = flex.max(flex.sqrt((xrs_answer.sites_cart() - result_hierarchy.atoms().extract_xyz()).dot())) print dist assert dist > 3.95, dist
def exercise_03(mon_lib_srv, ener_lib, verbose=0): # # normal run with real model # pdb_file = libtbx.env.find_in_repositories(relative_path="phenix_regression/pdb/2ERL_noH.pdb", test=os.path.isfile) if pdb_file is None: print "Skipping exercise_03: input file not available" return if verbose: log = sys.stdout else: log = StringIO() params = mmtbx.monomer_library.pdb_interpretation.master_params.extract() params.nonbonded_weight = 16 processed_pdb = mmtbx.monomer_library.pdb_interpretation.process( mon_lib_srv=mon_lib_srv, params=params, ener_lib=ener_lib, file_name=pdb_file, log=log ) xray_structure = processed_pdb.xray_structure() restraints_manager = mmtbx.restraints.manager(geometry=processed_pdb.geometry_restraints_manager()) structure_ = xray_structure.deep_copy_scatterers() gradients_calculator = cartesian_dynamics.gradients_calculator_reciprocal_space( restraints_manager=restraints_manager, sites_cart=xray_structure.sites_cart(), wc=1 ) cartesian_dynamics.run( xray_structure=xray_structure, gradients_calculator=gradients_calculator, temperature=300, n_steps=200, time_step=0.0005, log=log, verbose=1, ) rms1 = xray_structure.rms_difference(structure_) rms2 = structure_.rms_difference(xray_structure) assert rms1 == rms2 rms = rms1 if verbose: print "rms between structures before and after dynamics = ", rms array_of_distances_between_each_atom = flex.sqrt(structure_.difference_vectors_cart(xray_structure).dot()) if verbose: flex.histogram(data=array_of_distances_between_each_atom, n_slots=12).show(format_cutoffs="%6.4f") n_rms = 5.3 selected_by_rms = array_of_distances_between_each_atom > n_rms * rms outlier_sc = xray_structure.scatterers().select(selected_by_rms) if outlier_sc.size() != 0: print "number of rms outliers:", outlier_sc.size() outlier_d = array_of_distances_between_each_atom.select(selected_by_rms) for sc, d in zip(outlier_sc, outlier_d): print sc.label, d raise RuntimeError("rms outliers.")
def fit_side_chain(self, clusters): rotamer_iterator = \ mmtbx.refinement.real_space.fit_residue.get_rotamer_iterator( mon_lib_srv = self.mon_lib_srv, residue = self.residue) if(rotamer_iterator is None): return selection_clash = self.co.clash_eval_selection selection_rsr = self.co.rsr_eval_selection if(self.target_map is not None): start_target_value = self.get_target_value( sites_cart = self.residue.atoms().extract_xyz(), selection = selection_rsr) sites_cart_start = self.residue.atoms().extract_xyz() sites_cart_first_rotamer = list(rotamer_iterator)[0][1] # From this point on the coordinates in residue are to initial rotamer! self.residue.atoms().set_xyz(sites_cart_first_rotamer) axes = [] atr = [] for i, angle in enumerate(self.chi_angles[0]): cl = clusters[i] axes.append(flex.size_t(cl.axis)) atr.append(flex.size_t(cl.atoms_to_rotate)) # if(self.target_map is not None and self.xyzrad_bumpers is not None): # Get reference map values ref_map_vals = flex.double() for a in self.residue.atoms(): key = "%s_%s_%s"%( a.parent().parent().parent().id, a.parent().resname, a.name.strip()) ref_map_vals.append(self.cmv[key]) # Get radii radii = mmtbx.refinement.real_space.get_radii( residue = self.residue, vdw_radii = self.vdw_radii) # Exclude rotatable H from clash calculation tmp = flex.size_t() for i in selection_clash: if(self.rotatable_hd[self.residue.atoms()[i].i_seq]): continue tmp.append(i) selection_clash = tmp[:] # Ad hoc: S or SE have larger peaks! if(self.residue.resname in ["MET","MSE"]): scale=100 else: scale=3 moving = ext.moving( sites_cart = self.residue.atoms().extract_xyz(), sites_cart_start = sites_cart_start, radii = radii, weights = self.weights, bonded_pairs = self.pairs, ref_map_max = ref_map_vals * scale, ref_map_min = ref_map_vals / 10) # ro = ext.fit( fixed = self.xyzrad_bumpers, axes = axes, rotatable_points_indices = atr, angles_array = self.chi_angles, density_map = self.target_map, moving = moving, unit_cell = self.unit_cell, selection_clash = selection_clash, selection_rsr = selection_rsr, # select atoms to compute map target sin_table = self.sin_cos_table.sin_table, cos_table = self.sin_cos_table.cos_table, step = self.sin_cos_table.step, n = self.sin_cos_table.n) elif(self.target_map is not None and self.xyzrad_bumpers is None): ro = ext.fit( target_value = start_target_value, axes = axes, rotatable_points_indices = atr, angles_array = self.chi_angles, density_map = self.target_map, all_points = self.residue.atoms().extract_xyz(), unit_cell = self.unit_cell, selection = selection_rsr, sin_table = self.sin_cos_table.sin_table, cos_table = self.sin_cos_table.cos_table, step = self.sin_cos_table.step, n = self.sin_cos_table.n) else: ro = ext.fit( sites_cart_start = sites_cart_start.deep_copy(), axes = axes, rotatable_points_indices = atr, angles_array = self.chi_angles, all_points = self.residue.atoms().extract_xyz(), sin_table = self.sin_cos_table.sin_table, cos_table = self.sin_cos_table.cos_table, step = self.sin_cos_table.step, n = self.sin_cos_table.n) sites_cart_result = ro.result() if(sites_cart_result.size()>0): dist = None if(self.accept_only_if_max_shift_is_smaller_than is not None): dist = flex.max(flex.sqrt((sites_cart_start - sites_cart_result).dot())) if(dist is None): self.residue.atoms().set_xyz(sites_cart_result) else: if(dist is not None and dist < self.accept_only_if_max_shift_is_smaller_than): self.residue.atoms().set_xyz(sites_cart_result) else: self.residue.atoms().set_xyz(sites_cart_start) else: self.residue.atoms().set_xyz(sites_cart_start) if(self.m): self.m.add(residue = self.residue, state = "fitting") # # tune up if(self.target_map is not None): tune_up( target_map = self.target_map, residue = self.residue, mon_lib_srv = self.mon_lib_srv, rotamer_manager = self.rotamer_manager.rotamer_evaluator, unit_cell = self.unit_cell, monitor = self.m, torsion_search_start = -30, torsion_search_stop = 30, torsion_search_step = 1)
def run(pdb_hierarchy, target_map, unit_cell, real_space_gradients_delta, max_allowed_shift=1.5, max_iterations=50, log=None): lbfgs_termination_params = scitbx.lbfgs.termination_parameters( max_iterations=max_iterations) get_class = iotbx.pdb.common_residue_names_get_class def target(target_map, sites_cart, unit_cell): sites_frac = unit_cell.fractionalize(sites_cart) result = 0 for site_frac in sites_frac: result += target_map.eight_point_interpolation(site_frac) return result for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): for conformer in residue_group.conformers(): for residue in conformer.residues(): atoms = residue.atoms() if (get_class(name=residue.resname) == "common_water" and len(atoms) > 1): if (log is not None): print >> log, "chain %s resname %s resseq %s" % ( chain.id, residue.resname, residue.resseq) sites_cart_start = atoms.extract_xyz() target_start = target(target_map, sites_cart_start, unit_cell) if (log is not None): print >> log, " target_start: %6.4f" % target_start target_current = target_start sites_cart_best = sites_cart_start.deep_copy() shift_range = [-0.3, 0, 0.3] for x_shift in shift_range: for y_shift in shift_range: for z_shift in shift_range: shift = flex.vec3_double( [(x_shift, y_shift, z_shift)] * sites_cart_start.size()) sites_cart = sites_cart_start + shift residue.atoms().set_xyz(sites_cart) minimized = mmtbx.refinement.real_space.rigid_body.refine( residue=residue, density_map=target_map, geometry_restraints_manager=None, real_space_target_weight=1, real_space_gradients_delta= real_space_gradients_delta, lbfgs_termination_params= lbfgs_termination_params, unit_cell=unit_cell) sites_cart = minimized.sites_cart_residue distance_moved = flex.mean( flex.sqrt( (sites_cart - sites_cart_start).dot())) t = target(target_map, sites_cart, unit_cell) if (t >= target_current and distance_moved < max_allowed_shift): sites_cart_best = sites_cart.deep_copy( ) target_current = t residue.atoms().set_xyz(sites_cart_best) target_final = target(target_map, sites_cart_best, unit_cell) distance_moved = flex.mean( flex.sqrt((sites_cart_best - sites_cart_start).dot())) if (log is not None): print >> log, " target_final: %6.4f" % target_final print >> log, " dist. moved : %6.4f" % distance_moved
def exercise(i_pdb, pdb_for_map, rotamer_manager, sin_cos_table, d_min=1.5, resolution_factor=0.1): # Best fitting residue is a rotamer outlier (PHE 407), two scenarious: # - outlier fits density perfectly # - outlier fits not so good. # No better options to fit other than keep the outlier unchanged. # # answer PDB pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_answer) pdb_inp.write_pdb_file(file_name="answer.pdb") mon_lib_srv = monomer_library.server.server() processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=monomer_library.server.ener_lib(), raw_records=flex.std_string(pdb_answer.splitlines()), strict_conflict_handling=True, force_symmetry=True, log=None) xrs_answer = processed_pdb_file.xray_structure() # answer map pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_for_map) pdb_inp.write_pdb_file(file_name="for_map.pdb") xrs_map = pdb_inp.xray_structure_simple() f_calc = xrs_map.structure_factors(d_min=d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=resolution_factor) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() mtz_dataset = f_calc.as_mtz_dataset(column_root_label="FCmap") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name="answer_%s.mtz" % str(i_pdb)) # poor mon_lib_srv = monomer_library.server.server() processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=monomer_library.server.ener_lib(), raw_records=flex.std_string(pdb_poor.splitlines()), strict_conflict_handling=True, force_symmetry=True, log=None) pdb_hierarchy_poor = processed_pdb_file.all_chain_proxies.pdb_hierarchy xrs_poor = processed_pdb_file.xray_structure() sites_cart_poor = xrs_poor.sites_cart() pdb_hierarchy_poor.write_pdb_file(file_name="poor.pdb") # grm = mmtbx.restraints.manager( geometry=processed_pdb_file.geometry_restraints_manager( show_energies=False), normalization=True) for i in [ 1, ]: print "-" * 10 result = mmtbx.refinement.real_space.fit_residues.run( pdb_hierarchy=pdb_hierarchy_poor, crystal_symmetry=xrs_poor.crystal_symmetry(), map_data=target_map, do_all=True, massage_map=False, rotamer_manager=rotamer_manager, sin_cos_table=sin_cos_table, mon_lib_srv=mon_lib_srv) pdb_hierarchy_poor = result.pdb_hierarchy # result.pdb_hierarchy.write_pdb_file( file_name="refined_%s.pdb" % str(i_pdb), crystal_symmetry=xrs_poor.crystal_symmetry()) dist = flex.max( flex.sqrt((xrs_answer.sites_cart() - result.pdb_hierarchy.atoms().extract_xyz()).dot())) assert dist < 0.3, dist
def min_max_mean_shift(self): return "min,max,mean shift from start: %6.3f %6.3f %6.3f" % flex.sqrt( (self.sites_cart_start - self.model.get_xray_structure().sites_cart() ).dot()).min_max_mean().as_tuple()
def __init__(self, pdb_hierarchy, crystal_symmetry, angular_difference_threshold_deg=5., sequence_identity_threshold=90., quiet=False): h = pdb_hierarchy superposition_threshold = 2 * sequence_identity_threshold - 100. n_atoms_all = h.atoms_size() s_str = "altloc ' ' and (protein or nucleotide)" h = h.select(h.atom_selection_cache().selection(s_str)) h1 = iotbx.pdb.hierarchy.root() h1.append_model(h.models()[0].detached_copy()) unit_cell = crystal_symmetry.unit_cell() result = {} if not quiet: print("Find groups of chains related by translational NCS") # double loop over chains to find matching pairs related by pure translation for c1 in h1.chains(): c1.parent().remove_chain(c1) nchains = len(h1.models()[0].chains()) if ([c1.is_protein(), c1.is_na()].count(True) == 0): continue r1 = list(c1.residues()) c1_seq = "".join(c1.as_sequence()) sc_1_tmp = c1.atoms().extract_xyz() h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry) for (ii, c2) in enumerate(h1_p1.chains()): orig_c2 = h1.models()[0].chains()[ii % nchains] r2 = list(c2.residues()) c2_seq = "".join(c2.as_sequence()) sites_cart_1, sites_cart_2 = None, None sc_2_tmp = c2.atoms().extract_xyz() # chains are identical if (c1_seq == c2_seq and sc_1_tmp.size() == sc_2_tmp.size()): sites_cart_1 = sc_1_tmp sites_cart_2 = sc_2_tmp p_identity = 100. # chains are not identical, do alignment else: align_obj = mmtbx.alignment.align(seq_a=c1_seq, seq_b=c2_seq) alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") total = len(alignment.a) - alignment.a.count("-") p_identity = 100. * equal / max(1, total) if (p_identity > superposition_threshold): sites_cart_1 = flex.vec3_double() sites_cart_2 = flex.vec3_double() for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if (i1 is not None and i2 is not None and match == "|"): r1i, r2i = r1[i1], r2[i2] assert r1i.resname == r2i.resname, [ r1i.resname, r2i.resname, i1, i2 ] for a1 in r1i.atoms(): for a2 in r2i.atoms(): if (a1.name == a2.name): sites_cart_1.append(a1.xyz) sites_cart_2.append(a2.xyz) break # superpose two sequence-aligned chains if ([sites_cart_1, sites_cart_2].count(None) == 0): lsq_fit_obj = superpose.least_squares_fit( reference_sites=sites_cart_1, other_sites=sites_cart_2) angle = lsq_fit_obj.r.rotation_angle() t_frac = unit_cell.fractionalize( (sites_cart_1 - sites_cart_2).mean()) t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1] radius = flex.sum( flex.sqrt((sites_cart_1 - sites_cart_1.mean() ).dot())) / sites_cart_1.size() * 4. / 3. fracscat = min(c1.atoms_size(), c2.atoms_size()) / n_atoms_all result.setdefault(frozenset([c1, orig_c2]), []).append([ p_identity, [lsq_fit_obj.r, t_frac, angle, radius, fracscat] ]) else: result.setdefault(frozenset([c1, orig_c2]), []).append([p_identity, None]) # Build graph g = graph.adjacency_list() vertex_handle = {} for key in result: seqid = result[key][0][0] sup = min(result[key], key=lambda s: 0 if s[1] is None else s[1][2])[1] result[key] = [seqid, sup] if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)): (c1, c2) = key if (c1 not in vertex_handle): vertex_handle[c1] = g.add_vertex(label=c1) if (c2 not in vertex_handle): vertex_handle[c2] = g.add_vertex(label=c2) g.add_edge(vertex1=vertex_handle[c1], vertex2=vertex_handle[c2]) # Do connected component analysis and compose final tNCS pairs object components = connected_component_algorithm.connected_components(g) import itertools self.ncs_pairs = [] self.tncsresults = [0, "", [], 0.0] for (i, group) in enumerate(components): chains = [g.vertex_label(vertex=v) for v in group] fracscats = [] radii = [] for pair in itertools.combinations(chains, 2): sup = result[frozenset(pair)][1] fracscats.append(sup[-1]) radii.append(sup[-2]) fs = sum(fracscats) / len(fracscats) self.tncsresults[3] = fs # store fracscat in array rad = sum(radii) / len(radii) #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) ) maxorder = 1 vectors = [] previous_id = next(itertools.combinations(chains, 2))[0].id for pair in itertools.combinations(chains, 2): sup = result[frozenset(pair)][1] ncs_pair = ext.pair( r=sup[0], t=sup[1], radius=rad, radius_estimate=rad, fracscat=fs, rho_mn=flex.double( ), # rho_mn undefined, needs to be set later id=i) self.ncs_pairs.append(ncs_pair) # show tNCS pairs in group fmt = "group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f" t = ",".join([("%6.3f" % t_).strip() for t_ in sup[1]]).strip() if not quiet: print(fmt % (i, pair[0].id, pair[1].id, sup[2], t, fs)) if pair[0].id == previous_id: maxorder += 1 orthoxyz = unit_cell.orthogonalize(sup[1]) vectors.append((sup[1], orthoxyz, sup[2])) else: previous_id = pair[0].id maxorder = 1 vectors = [] if maxorder > self.tncsresults[0]: self.tncsresults[0] = maxorder self.tncsresults[1] = previous_id self.tncsresults[2] = vectors if not quiet: print("Largest TNCS order, peptide chain, fracvector, orthvector, angle, fracscat = ", \ str(self.tncsresults))
def generate_view_data(self): from scitbx.array_family import flex from scitbx import graphics_utils settings = self.settings data_for_colors = data_for_radii = None if not self.fullprocessarray: return data = self.data #self.work_array.data() sigmas = self.sigmas if (isinstance(data, flex.double) and data.all_eq(0)): data = flex.double(data.size(), 1) if ((self.multiplicities is not None) and (settings.scale_colors_multiplicity)): data_for_colors = self.multiplicities.data().as_double() assert data_for_colors.size() == data.size() elif (settings.sqrt_scale_colors) and (isinstance(data, flex.double)): data_for_colors = flex.sqrt(flex.abs(data)) elif isinstance(data, flex.complex_double): data_for_colors = self.radians foms_for_colours = self.foms # assuming last part of the labels indicates the phase label as in ["FCALC","PHICALC"] self.colourlabel = self.miller_array.info().labels[-1] elif (settings.sigma_color) and sigmas is not None: data_for_colors = sigmas.as_double() self.colourlabel = self.miller_array.info().labels[-1] else : data_for_colors = flex.abs(data.deep_copy()) uc = self.work_array.unit_cell() self.min_dist = min(uc.reciprocal_space_vector((1,1,1))) * self.renderscale min_radius = 0.05 * self.min_dist max_radius = 0.45 * self.min_dist if ((self.multiplicities is not None) and (settings.scale_radii_multiplicity)): data_for_radii = self.multiplicities.data().as_double() if (settings.sigma_radius) and sigmas is not None: data_for_radii = sigmas * self.multiplicities.as_double() assert data_for_radii.size() == data.size() elif (settings.sigma_radius) and sigmas is not None: data_for_radii = sigmas.as_double() else : data_for_radii = nth_power_scale(flex.abs(data.deep_copy()), settings.nth_power_scale_radii) if (settings.slice_mode): data = data.select(self.slice_selection) if (not settings.keep_constant_scale): data_for_radii = data_for_radii.select(self.slice_selection) data_for_colors = data_for_colors.select(self.slice_selection) foms_for_colours = foms_for_colours.select(self.slice_selection) if isinstance(data, flex.complex_double): if self.isUsingFOMs(): colors = graphics_utils.colour_by_phi_FOM(data_for_colors, foms_for_colours) else: colors = graphics_utils.colour_by_phi_FOM(data_for_colors, None) elif (settings.color_scheme in ["rainbow", "heatmap", "redblue"]): colors = graphics_utils.color_by_property( properties=data_for_colors, selection=flex.bool(data_for_colors.size(), True), color_all=False, gradient_type=settings.color_scheme) elif (settings.color_scheme == "grayscale"): colors = graphics_utils.grayscale_by_property( properties=data_for_colors, selection=flex.bool(data_for_colors.size(), True), shade_all=False, invert=settings.black_background) else : if (settings.black_background): base_color = (1.0,1.0,1.0) else : base_color = (0.0,0.0,0.0) colors = flex.vec3_double(data_for_colors.size(), base_color) if (settings.slice_mode) and (settings.keep_constant_scale): colors = colors.select(self.slice_selection) data_for_radii = data_for_radii.select(self.slice_selection) #if (settings.sqrt_scale_radii) and (not settings.scale_radii_multiplicity): # data_for_radii = flex.sqrt(flex.abs(data_for_radii)) if len(data_for_radii): dat2 = flex.abs(flex.double([e for e in data_for_radii if not math.isnan(e)])) # don't divide by 0 if dealing with selection of Rfree array where all values happen to be zero scale = max_radius/(flex.max(dat2) + 0.001) radii = data_for_radii * (self.settings.scale * scale) assert radii.size() == colors.size() else: radii = flex.double() max_radius = 0 self.radii = radii self.max_radius = max_radius self.min_radius = min_radius self.colors = colors if isinstance(data, flex.complex_double): self.foms = foms_for_colours
def exercise_3(mon_lib_srv, ener_lib): #test torsion restraints for use_reference in ['True', 'False', 'top_out', 'None']: processed_pdb_file = pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, raw_records=flex.std_string(pdb_str_2.splitlines()), strict_conflict_handling=True, force_symmetry=True, log=None) grm = processed_pdb_file.geometry_restraints_manager() xrs2 = processed_pdb_file.xray_structure(show_summary=False) awl2 = processed_pdb_file.all_chain_proxies.pdb_hierarchy.atoms_with_labels( ) pdb2 = processed_pdb_file.all_chain_proxies.pdb_hierarchy pdb_inp3 = iotbx.pdb.input(source_info=None, lines=pdb_str_3) xrs3 = pdb_inp3.xray_structure_simple() ph3 = pdb_inp3.construct_hierarchy() ph3.atoms().reset_i_seq() awl3 = ph3.atoms_with_labels() sites_cart_reference = flex.vec3_double() selection = flex.size_t() min_selection = flex.size_t() reference_names = [ "N", "CA", "CB", "CG", "CD", "NE", "CZ", "NH1", "NH2" ] minimize_names = ["CG", "CD", "NE", "CZ", "NH1", "NH2"] for a2, a3 in zip(tuple(awl2), tuple(awl3)): assert a2.resname == a3.resname assert a2.name == a3.name assert a2.i_seq == a3.i_seq if (a2.resname == "ARG" and a2.name.strip() in reference_names): selection.append(a2.i_seq) sites_cart_reference.append(a3.xyz) if a2.name.strip() in minimize_names: min_selection.append(a2.i_seq) assert selection.size() == len(reference_names) selection_bool = flex.bool(xrs2.scatterers().size(), min_selection) if (use_reference == 'True'): grm.add_chi_torsion_restraints_in_place( pdb_hierarchy=pdb2, sites_cart=sites_cart_reference, selection=selection, sigma=2.5) elif (use_reference == 'top_out'): grm.add_chi_torsion_restraints_in_place( pdb_hierarchy=pdb2, sites_cart=sites_cart_reference, selection=selection, sigma=2.5, limit=180.0, top_out_potential=True) elif (use_reference == 'None'): grm.add_chi_torsion_restraints_in_place( pdb_hierarchy=pdb2, sites_cart=sites_cart_reference, selection=selection, sigma=2.5) grm.remove_chi_torsion_restraints_in_place(selection=selection) d1 = flex.mean( flex.sqrt((xrs2.sites_cart().select(min_selection) - xrs3.sites_cart().select(min_selection)).dot())) print "distance start (use_reference: %s): %6.4f" % ( str(use_reference), d1) assert d1 > 4.0 assert approx_equal( flex.max( flex.sqrt((xrs2.sites_cart().select(~selection_bool) - xrs3.sites_cart().select(~selection_bool)).dot())), 0) from cctbx import geometry_restraints import mmtbx.refinement.geometry_minimization import scitbx.lbfgs grf = geometry_restraints.flags.flags(default=True) grf.nonbonded = False sites_cart = xrs2.sites_cart() minimized = mmtbx.refinement.geometry_minimization.lbfgs( sites_cart=sites_cart, correct_special_position_tolerance=1.0, geometry_restraints_manager=grm, sites_cart_selection=flex.bool(sites_cart.size(), min_selection), geometry_restraints_flags=grf, lbfgs_termination_params=scitbx.lbfgs.termination_parameters( max_iterations=5000)) xrs2.set_sites_cart(sites_cart=sites_cart) d2 = flex.mean( flex.sqrt((xrs2.sites_cart().select(min_selection) - xrs3.sites_cart().select(min_selection)).dot())) print "distance final (use_reference: %s): %6.4f" % ( str(use_reference), d2) if (use_reference in ['True', 'top_out']): assert d2 < 0.02, d2 else: assert d2 > 4.0, d2 assert approx_equal( flex.max( flex.sqrt((xrs2.sites_cart().select(~selection_bool) - xrs3.sites_cart().select(~selection_bool)).dot())), 0) #test torsion manipulation grm.remove_chi_torsion_restraints_in_place() grm.remove_chi_torsion_restraints_in_place() sites_cart_reference = [] selections_reference = [] for model in pdb2.models(): for chain in model.chains(): for residue in chain.residues(): sites_cart_reference.append(residue.atoms().extract_xyz()) selections_reference.append(residue.atoms().extract_i_seq()) #one residue at a time (effectively chi angles only) for sites_cart, selection in zip(sites_cart_reference, selections_reference): grm.add_chi_torsion_restraints_in_place(pdb_hierarchy=pdb2, sites_cart=sites_cart, selection=selection) assert grm.get_n_chi_torsion_proixes() == 6 grm.remove_chi_torsion_restraints_in_place() #all sites at once, chi angles only sites_cart = xrs2.sites_cart() grm.add_chi_torsion_restraints_in_place(pdb_hierarchy=pdb2, sites_cart=sites_cart, selection=None, chi_angles_only=True) assert grm.get_n_chi_torsion_proixes() == 6 #all sites at once, all torsions grm.add_chi_torsion_restraints_in_place(pdb_hierarchy=pdb2, sites_cart=sites_cart, selection=None, chi_angles_only=False) # grm.get_chi_torsion_proxies().show_sorted( # by_value='residual', # sites_cart=sites_cart, # site_labels=[atom.id_str() for atom in pdb2.atoms()]) assert grm.get_n_chi_torsion_proixes( ) == 12, grm.get_n_chi_torsion_proixes()
def twin_the_data_and_analyse(twin_operator, twin_fraction=0.2): out_string = StringIO() miller_array = random_data(35).map_to_asu() miller_array = miller_array.f_as_f_sq() cb_op = sgtbx.change_of_basis_op(twin_operator) miller_array_mod, miller_array_twin = miller_array.common_sets( miller_array.change_basis(cb_op).map_to_asu()) twinned_miller = miller_array_mod.customized_copy( data = (1.0-twin_fraction)*miller_array_mod.data() + twin_fraction*miller_array_twin.data(), sigmas = flex.sqrt( flex.pow( ((1.0-twin_fraction)*miller_array_mod.sigmas()),2.0)+\ flex.pow( ((twin_fraction)*miller_array_twin.sigmas()),2.0)) ) twinned_miller.set_observation_type(miller_array.observation_type()) twin_anal_object = t_a.twin_analyses(twinned_miller, out=out_string, verbose=-100) index = twin_anal_object.twin_summary.most_worrysome_twin_law assert approx_equal(twin_anal_object.twin_summary.britton_alpha[index], twin_fraction, eps=0.1) assert approx_equal(twin_anal_object.twin_law_dependent_analyses[index]. ml_murray_rust.estimated_alpha, twin_fraction, eps=0.1) ## Untwinned data standards if twin_fraction == 0: ## L-test assert approx_equal(twin_anal_object.l_test.mean_l, 0.50, eps=0.1) ## Wilson ratios assert approx_equal(twin_anal_object.twin_summary.i_ratio, 2.00, eps=0.1) ## H-test assert approx_equal( twin_anal_object.twin_law_dependent_analyses[index].h_test.mean_h, 0.50, eps=0.1) ## Perfect twin standards if twin_fraction == 0.5: assert approx_equal(twin_anal_object.l_test.mean_l, 0.375, eps=0.1) assert approx_equal(twin_anal_object.twin_summary.i_ratio, 1.50, eps=0.1) assert approx_equal( twin_anal_object.twin_law_dependent_analyses[index].h_test.mean_h, 0.00, eps=0.1) ## Just make sure we actually detect significant twinning if twin_fraction > 0.10: assert (twin_anal_object.twin_summary.maha_l > 3.0) ## The patterson origin peak should be smallish ... assert (twin_anal_object.twin_summary.patterson_p_value > 0.01) # and the brief test should be passed as well answer = t_a.twin_analyses_brief(twinned_miller, out=out_string, verbose=-100) if twin_fraction > 0.10: assert answer is True
def get_data_from_xac(params, xac): if xac.endswith(".pkl"): tmp = pickle.load(open(xac)) else: tmp = xds_ascii.XDS_ASCII(xac) sel_remove = flex.bool(tmp.iobs.size(), False) if params.min_peak is not None: sel = tmp.peak < params.min_peak sel_remove |= sel elif params.min_peak_percentile is not None: q = numpy.percentile(tmp.peak, params.min_peak_percentile) print "percentile %.2f %s" % (q, xac) sel = tmp.peak < q sel_remove |= sel if params.skip_rejected: sel_remove |= (tmp.sigma_iobs <= 0) if params.dmin is not None: sel_remove |= ~tmp.as_miller_set().resolution_filter_selection(d_min=params.dmin) if params.correct_peak: sel_remove |= (tmp.peak < 1) # remove PEAK==0 # Remove selected print "DEBUG:: removing %d reflections" % sel_remove.count(True) #sum(sel_remove)# tmp.remove_selection(sel_remove) if not params.skip_rejected: tmp.sigma_iobs = flex.abs(tmp.sigma_iobs) # Correct I,sigI if needed if params.correct_peak: tmp.iobs *= tmp.peak * .01 tmp.sigma_iobs *= tmp.peak * .01 if params.cancel_rlp: tmp.iobs /= tmp.rlp tmp.sigma_iobs /= tmp.rlp if params.polarization.correct: # Only works with single-panel detector!! # Assumes detector fast = (1,0,0), slow = (0,1,0) sin_sq_2theta = tmp.symm.unit_cell().sin_sq_two_theta(tmp.indices, tmp.wavelength) cos_sq_2theta = 1. - sin_sq_2theta sin_theta = tmp.wavelength / tmp.symm.unit_cell().d(tmp.indices) / 2. Eppi = numpy.cross(params.polarization.plane_normal, params.polarization.incident_beam_direction) Eppi /= numpy.linalg.norm(Eppi) S = flex.vec3_double(tmp.xd - tmp.orgx, tmp.yd - tmp.orgy, flex.double(tmp.xd.size(), tmp.distance/tmp.qx)) S /= S.norms() zp = S.dot(Eppi.tolist()) * 2. * sin_theta cosrho = zp / flex.sqrt(sin_sq_2theta) P0 = 0.5 * (1. + cos_sq_2theta) PP = (params.polarization.fraction - 0.5) * (2.*cosrho**2 - 1.) * sin_sq_2theta P = P0 - PP # Apply correction tmp.iobs /= P tmp.sigma_iobs /= P if 0: # debug for x, y, p in zip(tmp.xd, tmp.yd, P): print "pdebug:: %.2f %.2f %.4e" % (x, y, p) return tmp
def run(args): import libtbx.load_env usage = "%s [options]" % libtbx.env.dispatcher_name parser = OptionParser( usage=usage, phil=phil_scope, check_format=False, epilog=help_message ) params, options, args = parser.parse_args( show_diff_phil=True, return_unhandled=True ) assert len(args) == 1 from iotbx.reflection_file_reader import any_reflection_file intensities = None f = args[0] arrays = any_reflection_file(f).as_miller_arrays(merge_equivalents=False) for ma in arrays: print(ma.info().labels) if ma.info().labels == ["I", "SIGI"]: intensities = ma elif ma.info().labels == ["IMEAN", "SIGIMEAN"]: intensities = ma elif ma.info().labels == ["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]: intensities = ma assert intensities is not None if params.d_min is not None: intensities = intensities.resolution_filter(d_min=params.d_min) from cctbx.array_family import flex # see also: # cctbx/miller/merge_equivalents.h # cctbx/miller/equivalent_reflection_merging.tex # this should calculate the external variance, i.e. V(y) = sum(v_i) merging_external = intensities.merge_equivalents(use_internal_variance=False) multiplicities = merging_external.redundancies().data() external_sigmas = merging_external.array().sigmas() # sigmas should be bigger not smaller external_sigmas *= flex.sqrt(multiplicities.as_double()) # set the sigmas to 1, and calculate the mean intensities and internal variances intensities_copy = intensities.customized_copy( sigmas=flex.double(intensities.size(), 1) ) merging_internal = intensities_copy.merge_equivalents() merged_intensities = merging_internal.array() internal_sigmas = merging_internal.array().sigmas() # sigmas should be bigger not smaller internal_sigmas *= flex.sqrt(multiplicities.as_double()) # select only those reflections with sufficient repeat observations sel = multiplicities > 3 external_sigmas = external_sigmas.select(sel) internal_sigmas = internal_sigmas.select(sel) merged_intensities = merged_intensities.select(sel) # what we want to plot/do linear regression with y = flex.pow2(internal_sigmas / merged_intensities.data()) x = flex.pow2(external_sigmas / merged_intensities.data()) sel = (x < 1) & (y < 1) x = x.select(sel) y = y.select(sel) # set backend before importing pyplot import matplotlib # matplotlib.use('Agg') linreg = flex.linear_regression(x, y) linreg.show_summary() import math print(1 / math.sqrt(linreg.slope() * linreg.y_intercept())) # x = -flex.log10(x) # y = -flex.log10(y) x = 1 / x y = 1 / y from matplotlib import pyplot pyplot.scatter(x, y, marker="+", s=20, alpha=1, c="black") pyplot.show() pyplot.clf() # chi^2 plot vs resolution # i.e. <var(int)>/<var(ext)> # where var(ext) and var(int) are as defined in equations 4 & 5 respectively # in Blessing (1997) internal_var = merged_intensities.customized_copy(data=flex.pow2(internal_sigmas)) external_var = merged_intensities.customized_copy(data=flex.pow2(external_sigmas)) n_bins = 10 internal_var.setup_binner(n_bins=n_bins) external_var.use_binning_of(internal_var) mean_internal = internal_var.mean(use_binning=True) mean_external = external_var.mean(use_binning=True) y = [mean_internal.data[i + 1] / mean_external.data[i + 1] for i in range(n_bins)] x = [mean_internal.binner.bin_centers(2)] pyplot.scatter(x, y) pyplot.xlabel("1/d^2") pyplot.ylabel("<var(int)>/<var(ext)>") pyplot.show() pyplot.clf() return
def plot_projections( projections, filename=None, colours=None, marker_size=3, font_size=6, gridsize=None, label_indices=False, epochs=None, colour_map=None, ): projections_all = projections # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear matplotlib.use("Agg") # use a non-interactive backend from matplotlib import pylab, pyplot if epochs is not None and colour_map is not None: epochs = flex.double(epochs) epochs -= flex.min(epochs) epochs /= flex.max(epochs) cmap = matplotlib.cm.get_cmap(colour_map) colours = [cmap(e) for e in epochs] elif colours is None or len(colours) == 0: colours = ["b"] * len(projections_all) elif len(colours) < len(projections_all): colours = colours * len(projections_all) fig = pyplot.figure() pyplot.scatter([0], [0], marker="+", c="0.75", s=100) cir = pylab.Circle((0, 0), radius=1.0, fill=False, color="0.75") pylab.gca().add_patch(cir) if gridsize is not None: x = flex.double() y = flex.double() for i, projections in enumerate(projections_all): x_, y_ = projections.parts() x.extend(x_) y.extend(y_) hb = pyplot.hexbin(x, y, gridsize=gridsize, linewidths=0.2) pyplot.colorbar(hb) else: for i, projections in enumerate(projections_all): x, y = projections.parts() pyplot.scatter( x.as_numpy_array(), y.as_numpy_array(), c=colours[i], s=marker_size, edgecolors="none", ) if label_indices: for j, (hkl, proj) in enumerate(zip(label_indices, projections)): # hack to not write two labels on top of each other p1, p2 = (projections - proj).parts() if (flex.sqrt(flex.pow2(p1) + flex.pow2(p2)) < 1e-3).iselection()[0] != j: continue pyplot.text(proj[0], proj[1], str(hkl), fontsize=font_size) fig.axes[0].set_aspect("equal") pyplot.xlim(-1.1, 1.1) pyplot.ylim(-1.1, 1.1) if filename is not None: pyplot.savefig(filename, dpi=300)
def print_detailed_dynamics_stats(self): # Overall data print('\n', file=self.log) print( ' MC | Temperature (K) | Vscale | Etot = Ekin + Echem + wxExray', file=self.log) print( ' | (sys) (pro) (sol) | Fac T(K) | Ekin Echem wx Exray', file=self.log) print( ' ~E~ {0:5d} | {1:6.1f} {2:6.1f} {3:6.1f} | {4:4.1f} {5:5.1f} | {6:6.1f} {7:6.1f} {8:6.1f} {9:6.1f}' .format( self.er_data.macro_cycle, self.kt.temperature, self.non_solvent_kt.temperature, self.solvent_kt.temperature, self.v_factor, self.kt_vscale_remove.temperature, self.kt.kinetic_energy, self.stereochemistry_residuals.residual_sum, self.xray_target_weight, self.target_functor(compute_gradients=False).target_work() * self.fmodel_copy.f_calc_w().data().size(), ), file=self.log) print('\n', file=self.log) # Atomistic histrograms # - Kinetic energy # - Xray grads # - Geo grads self.atomic_ke = 0.5 * self.weights * self.vxyz.dot() self.atomic_wxray_g = self.xray_gradient * self.xray_target_weight self.atomic_wchem_g = self.stereochemistry_residuals.gradients * self.chem_target_weight def show_histogram(data, n_slots=50, out=None, prefix=""): if (out is None): out = sys.stdout print('\n' + prefix, file=out) # Stats data_basic_stats = scitbx.math.basic_statistics(data) print('\n Number : %7.4f ' % (data_basic_stats.n), file=out) print(' Min : %7.4f ' % (data_basic_stats.min), file=out) print(' Max : %7.4f ' % (data_basic_stats.max), file=out) print(' Mean : %7.4f ' % (data_basic_stats.mean), file=out) print(' Stdev : %7.4f ' % (data_basic_stats.biased_standard_deviation), file=out) print(' Skew : %7.4f ' % (data_basic_stats.skew), file=out) print(' Sum : %7.4f ' % (data_basic_stats.sum), file=out) # Histo histogram = flex.histogram(data=data, n_slots=n_slots) low_cutoff = histogram.data_min() for i, n in enumerate(histogram.slots()): high_cutoff = histogram.data_min() + histogram.slot_width() * ( i + 1) print("%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n), file=out) low_cutoff = high_cutoff out.flush() return histogram # Select for selection_type in ['System', 'Non_solvent', 'Solvent']: print('\n\n', file=self.log) if selection_type == 'System': selection = self.er_data.all_sel elif selection_type == 'Non_solvent': selection = ~self.er_data.solvent_sel elif selection_type == 'Solvent': selection = self.er_data.solvent_sel else: break # Data for histogram_type in ['Kinetic_energy', 'Xray_grad', 'Chem_grad']: if histogram_type == 'Kinetic_energy': data = self.atomic_ke.select(selection) elif histogram_type == 'Xray_grad': data = flex.sqrt( self.atomic_wxray_g.select(selection).dot()) elif histogram_type == 'Chem_grad': data = flex.sqrt( self.atomic_wchem_g.select(selection).dot()) else: break # Histrogram show_histogram(data=data, out=self.log, prefix=str(self.er_data.macro_cycle) + '_' + selection_type + '_' + histogram_type)
def match_with_reference(self, other): ''' Match reflections with another set of reflections. :param other: The reflection table to match against :return: The matches ''' from collections import defaultdict import __builtin__ logger.info("Matching reference spots with predicted reflections") logger.info(' %d observed reflections input' % len(other)) logger.info(' %d reflections predicted' % len(self)) # Get the miller index, entering flag and turn number for # Both sets of reflections i1 = self['id'] h1 = self['miller_index'] e1 = self['entering'].as_int() x1, y1, z1 = self['xyzcal.px'].parts() p1 = self['panel'] i2 = other['id'] h2 = other['miller_index'] e2 = other['entering'].as_int() x2, y2, z2 = other['xyzcal.px'].parts() p2 = other['panel'] class Match(object): def __init__(self): self.a = [] self.b = [] # Create the match lookup lookup = defaultdict(Match) for i in range(len(self)): item = h1[i] + (e1[i], i1[i], p1[i]) lookup[item].a.append(i) # Add matches from input reflections for i in range(len(other)): item = h2[i] + (e2[i], i2[i], p2[i]) if item in lookup: lookup[item].b.append(i) # Create the list of matches match1 = [] match2 = [] for item, value in lookup.iteritems(): if len(value.b) == 0: continue elif len(value.a) == 1 and len(value.b) == 1: match1.append(value.a[0]) match2.append(value.b[0]) else: matched = {} for i in value.a: d = [] for j in value.b: dx = x1[i] - x2[j] dy = y1[i] - y2[j] dz = z1[i] - z2[j] d.append((i, j, dx**2 + dy**2 + dz**2)) i, j, d = __builtin__.min(d, key=lambda x: x[2]) if j not in matched: matched[j] = (i, d) elif d < matched[j][1]: matched[j] = (i, d) for key1, value1 in matched.iteritems(): match1.append(value1[0]) match2.append(key1) # Select everything which matches sind = flex.size_t(match1) oind = flex.size_t(match2) # Sort by self index sort_index = flex.size_t( __builtin__.sorted(range(len(sind)), key=lambda x: sind[x])) sind = sind.select(sort_index) oind = oind.select(sort_index) s2 = self.select(sind) o2 = other.select(oind) h1 = s2['miller_index'] h2 = o2['miller_index'] e1 = s2['entering'] e2 = o2['entering'] assert (h1 == h2).all_eq(True) assert (e1 == e2).all_eq(True) x1, y1, z1 = s2['xyzcal.px'].parts() x2, y2, z2 = o2['xyzcal.px'].parts() distance = flex.sqrt((x1 - x2)**2 + (y1 - y2)**2 + (z1 - z2)**2) mask = distance < 2 logger.info(' %d reflections matched' % len(o2)) logger.info(' %d reflections accepted' % mask.count(True)) self.set_flags(sind.select(mask), self.flags.reference_spot) self.set_flags(sind.select(o2.get_flags(self.flags.strong)), self.flags.strong) self.set_flags(sind.select(o2.get_flags(self.flags.indexed)), self.flags.indexed) self.set_flags( sind.select(o2.get_flags(self.flags.used_in_refinement)), self.flags.used_in_refinement) other_matched_indices = oind.select(mask) other_unmatched_mask = flex.bool(len(other), True) other_unmatched_mask.set_selected( other_matched_indices, flex.bool(len(other_matched_indices), False)) other_matched = other.select(other_matched_indices) other_unmatched = other.select(other_unmatched_mask) for key, column in self.select(sind.select(mask)).cols(): other_matched[key] = column mask2 = flex.bool(len(self), False) mask2.set_selected(sind.select(mask), True) return mask2, other_matched, other_unmatched
def fit_side_chain(self, clusters): rotamer_iterator = \ mmtbx.refinement.real_space.fit_residue.get_rotamer_iterator( mon_lib_srv = self.mon_lib_srv, residue = self.residue) if (rotamer_iterator is None): return #selection_rsr = flex.size_t(flatten(clusters[0].vector)) selection_clash = self.co.clash_eval_selection selection_rsr = self.co.rsr_eval_selection if (self.target_map is not None): start_target_value = self.get_target_value( sites_cart=self.residue.atoms().extract_xyz(), selection=selection_rsr) sites_cart_start = self.residue.atoms().extract_xyz() sites_cart_first_rotamer = list(rotamer_iterator)[0][1] self.residue.atoms().set_xyz(sites_cart_first_rotamer) axes = [] atr = [] for i, angle in enumerate(self.chi_angles[0]): cl = clusters[i] axes.append(flex.size_t(cl.axis)) atr.append(flex.size_t(cl.atoms_to_rotate)) sites = self.residue.atoms().extract_xyz() if (self.target_map is not None and self.xyzrad_bumpers is not None): # Get vdW radii radii = flex.double() atom_names = [] for a in self.residue.atoms(): atom_names.append(a.name.strip()) converter = iotbx.pdb.residue_name_plus_atom_names_interpreter( residue_name=self.residue.resname, atom_names=atom_names) mon_lib_names = converter.atom_name_interpretation.mon_lib_names() for n in mon_lib_names: try: radii.append(self.vdw_radii[n.strip()] - 0.25) except KeyError: radii.append(1.5) # XXX U, Uranium, OXT are problems! # xyzrad_residue = ext.xyzrad(sites_cart=sites, radii=radii) # ro = ext.fit(target_value=start_target_value, xyzrad_bumpers=self.xyzrad_bumpers, axes=axes, rotatable_points_indices=atr, angles_array=self.chi_angles, density_map=self.target_map, all_points=xyzrad_residue, unit_cell=self.unit_cell, selection_clash=selection_clash, selection_rsr=selection_rsr, sin_table=self.sin_cos_table.sin_table, cos_table=self.sin_cos_table.cos_table, step=self.sin_cos_table.step, n=self.sin_cos_table.n) elif (self.target_map is not None and self.xyzrad_bumpers is None): ro = ext.fit(target_value=start_target_value, axes=axes, rotatable_points_indices=atr, angles_array=self.chi_angles, density_map=self.target_map, all_points=sites, unit_cell=self.unit_cell, selection=selection_rsr, sin_table=self.sin_cos_table.sin_table, cos_table=self.sin_cos_table.cos_table, step=self.sin_cos_table.step, n=self.sin_cos_table.n) else: ro = ext.fit(sites_cart_start=sites_cart_start.deep_copy(), axes=axes, rotatable_points_indices=atr, angles_array=self.chi_angles, all_points=self.residue.atoms().extract_xyz(), sin_table=self.sin_cos_table.sin_table, cos_table=self.sin_cos_table.cos_table, step=self.sin_cos_table.step, n=self.sin_cos_table.n) sites_cart_result = ro.result() if (sites_cart_result.size() > 0): dist = None if (self.accept_only_if_max_shift_is_smaller_than is not None): dist = flex.max( flex.sqrt((sites_cart_start - sites_cart_result).dot())) if (dist is None): self.residue.atoms().set_xyz(sites_cart_result) else: if (dist is not None and dist < self.accept_only_if_max_shift_is_smaller_than): self.residue.atoms().set_xyz(sites_cart_result) else: self.residue.atoms().set_xyz(sites_cart_start) else: self.residue.atoms().set_xyz(sites_cart_start)
def exercise_2(mon_lib_srv, ener_lib): for use_reference in [True, False, None]: processed_pdb_file = pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, raw_records=flex.std_string(pdb_str_2.splitlines()), strict_conflict_handling=True, force_symmetry=True, log=None) grm = processed_pdb_file.geometry_restraints_manager() xrs2 = processed_pdb_file.xray_structure(show_summary=False) awl2 = processed_pdb_file.all_chain_proxies.pdb_hierarchy.atoms_with_labels( ) pdb_inp3 = iotbx.pdb.input(source_info=None, lines=pdb_str_3) xrs3 = pdb_inp3.xray_structure_simple() ph3 = pdb_inp3.construct_hierarchy() ph3.atoms().reset_i_seq() awl3 = ph3.atoms_with_labels() sites_cart_reference = flex.vec3_double() selection = flex.size_t() reference_names = ["CG", "CD", "NE", "CZ", "NH1", "NH2"] for a2, a3 in zip(tuple(awl2), tuple(awl3)): assert a2.resname == a3.resname assert a2.name == a3.name assert a2.i_seq == a3.i_seq if (a2.resname == "ARG" and a2.name.strip() in reference_names): selection.append(a2.i_seq) sites_cart_reference.append(a3.xyz) assert selection.size() == len(reference_names) selection_bool = flex.bool(xrs2.scatterers().size(), selection) if (use_reference): grm.adopt_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart=sites_cart_reference, selection=selection, sigma=0.01)) elif (use_reference is None): grm.adopt_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart=sites_cart_reference, selection=selection, sigma=0.01)) grm.remove_reference_coordinate_restraints_in_place( selection=selection) d1 = flex.mean( flex.sqrt((xrs2.sites_cart().select(selection) - xrs3.sites_cart().select(selection)).dot())) print "distance start (use_reference: %s): %6.4f" % ( str(use_reference), d1) assert d1 > 4.0 assert approx_equal( flex.max( flex.sqrt((xrs2.sites_cart().select(~selection_bool) - xrs3.sites_cart().select(~selection_bool)).dot())), 0) from cctbx import geometry_restraints import mmtbx.refinement.geometry_minimization import scitbx.lbfgs grf = geometry_restraints.flags.flags(default=True) sites_cart = xrs2.sites_cart() minimized = mmtbx.refinement.geometry_minimization.lbfgs( sites_cart=sites_cart, correct_special_position_tolerance=1.0, geometry_restraints_manager=grm, sites_cart_selection=flex.bool(sites_cart.size(), selection), geometry_restraints_flags=grf, lbfgs_termination_params=scitbx.lbfgs.termination_parameters( max_iterations=5000)) xrs2.set_sites_cart(sites_cart=sites_cart) d2 = flex.mean( flex.sqrt((xrs2.sites_cart().select(selection) - xrs3.sites_cart().select(selection)).dot())) print "distance final (use_reference: %s): %6.4f" % ( str(use_reference), d2) if (use_reference): assert d2 < 0.005, "failed: %f<0.05" % d2 else: assert d2 > 4.0, d2 assert approx_equal( flex.max( flex.sqrt((xrs2.sites_cart().select(~selection_bool) - xrs3.sites_cart().select(~selection_bool)).dot())), 0)
def generate_view_data(self): from scitbx.array_family import flex from scitbx import graphics_utils settings = self.settings data_for_colors = data_for_radii = None data = self.data #self.work_array.data() if (isinstance(data, flex.double) and data.all_eq(0)): data = flex.double(data.size(), 1) if ((self.multiplicities is not None) and (settings.scale_colors_multiplicity)): data_for_colors = self.multiplicities.data().as_double() assert data_for_colors.size() == data.size() elif (settings.sqrt_scale_colors) and (isinstance(data, flex.double)): data_for_colors = flex.sqrt(data) else: data_for_colors = data.deep_copy() if ((self.multiplicities is not None) and (settings.scale_radii_multiplicity)): #data_for_radii = data.deep_copy() data_for_radii = self.multiplicities.data().as_double() assert data_for_radii.size() == data.size() elif (settings.sqrt_scale_radii) and (isinstance(data, flex.double)): data_for_radii = flex.sqrt(data) else: data_for_radii = data.deep_copy() if (settings.slice_mode): data = data.select(self.slice_selection) if (not settings.keep_constant_scale): data_for_radii = data_for_radii.select(self.slice_selection) data_for_colors = data_for_colors.select(self.slice_selection) if (settings.color_scheme in ["rainbow", "heatmap", "redblue"]): colors = graphics_utils.color_by_property( properties=data_for_colors, selection=flex.bool(data_for_colors.size(), True), color_all=False, gradient_type=settings.color_scheme) elif (settings.color_scheme == "grayscale"): colors = graphics_utils.grayscale_by_property( properties=data_for_colors, selection=flex.bool(data_for_colors.size(), True), shade_all=False, invert=settings.black_background) else: if (settings.black_background): base_color = (1.0, 1.0, 1.0) else: base_color = (0.0, 0.0, 0.0) colors = flex.vec3_double(data_for_colors.size(), base_color) if (settings.slice_mode) and (settings.keep_constant_scale): colors = colors.select(self.slice_selection) data_for_radii = data_for_radii.select(self.slice_selection) uc = self.work_array.unit_cell() abc = uc.parameters()[0:3] min_dist = min(uc.reciprocal_space_vector((1, 1, 1))) min_radius = 0.20 * min_dist max_radius = 40 * min_dist if (settings.sqrt_scale_radii) and ( not settings.scale_radii_multiplicity): data_for_radii = flex.sqrt(data_for_radii) if len(data_for_radii): max_value = flex.max(data_for_radii) scale = max_radius / max_value radii = data_for_radii * scale too_small = radii < min_radius if (too_small.count(True) > 0): radii.set_selected(too_small, flex.double(radii.size(), min_radius)) assert radii.size() == colors.size() else: radii = flex.double() max_radius = 0 self.radii = radii self.max_radius = max_radius self.colors = colors
def _filter_reflections_based_on_centroid_distance( reflection_table, experiment, outlier_probability=0.975, max_separation=2, ): """ Filter reflections too far from predicted position """ # Compute the x and y residuals Xobs, Yobs, _ = reflection_table["xyzobs.px.value"].parts() Xcal, Ycal, _ = reflection_table["xyzcal.px"].parts() Xres = Xobs - Xcal Yres = Yobs - Ycal # Compute the epsilon residual s0_length = 1.0 / experiment.beam.get_wavelength() s1x, s1y, s1z = reflection_table["s2"].parts() s1_length = flex.sqrt(s1x**2 + s1y**2 + s1z**2) Eres = s1_length - s0_length # Initialise the fast_mcd outlier algorithm # fast_mcd = FastMCD((Xres, Yres, Eres)) fast_mcd = FastMCD((Xres, Yres)) # get location and MCD scatter estimate T, S = fast_mcd.get_corrected_T_and_S() # get squared Mahalanobis distances # d2s = maha_dist_sq((Xres, Yres, Eres), T, S) d2s = maha_dist_sq((Xres, Yres), T, S) # Compute the cutoff mahasq_cutoff = chisq_quantile(2, outlier_probability) # compare to the threshold and select reflections selection1 = d2s < mahasq_cutoff selection2 = flex.sqrt(Xres**2 + Yres**2) < max_separation selection = selection1 & selection2 reflection_table = reflection_table.select(selection) n_refl = reflection_table.size() # Print some stuff logger.info("-" * 80) logger.info("Centroid outlier rejection") logger.info( f" Using MCD algorithm with probability = {outlier_probability}") logger.info(" Max X residual: %f" % flex.max(flex.abs(Xres))) logger.info(" Max Y residual: %f" % flex.max(flex.abs(Yres))) logger.info(" Max E residual: %f" % flex.max(flex.abs(Eres))) logger.info(" Mean X RMSD: %f" % (sqrt(flex.sum(Xres**2) / len(Xres)))) logger.info(" Mean Y RMSD: %f" % (sqrt(flex.sum(Yres**2) / len(Yres)))) logger.info(" Mean E RMSD: %f" % (sqrt(flex.sum(Eres**2) / len(Eres)))) logger.info(" MCD location estimate: %.4f, %.4f" % tuple(T)) logger.info(""" MCD scatter estimate: %.7f, %.7f, %.7f, %.7f""" % tuple(S)) logger.info(" Number of outliers: %d" % selection1.count(False)) logger.info(" Number of reflections with residual > %0.2f pixels: %d" % (max_separation, selection2.count(False))) logger.info(f"Number of reflections selection for refinement: {n_refl}") logger.info("-" * 80) return reflection_table
def tst_ls_on_f(): tmp = rs.xray_structure(sgtbx.space_group_info('P4'), elements=['C'] * 310, n_scatterers=310) sfs = tmp.structure_factors(False, 3.5).f_calc() f_mod = xray.f_model_core_data(hkl=sfs.indices(), f_atoms=sfs.data(), f_mask=sfs.data(), unit_cell=sfs.unit_cell(), k_overall=1.0, u_star=(0, 0, 0, 0, 0, 0), k_sol=0.0, u_sol=0.0, f_part=sfs.data(), k_part=0.0, u_part=0.0) target_evaluator = xray.least_squares_hemihedral_twinning_on_f( hkl_obs=sfs.indices(), f_obs=flex.sqrt(flex.abs(sfs.data()) * flex.abs(sfs.data())) * 1.0, w_obs=None, hkl_calc=sfs.indices(), space_group=sfs.space_group(), anomalous_flag=False, alpha=0.0, twin_law=[-1, 0, 0, 0, 1, 0, 0, 0, -1]) target = target_evaluator.target(sfs.data()) # The target vaslue should be zero assert approx_equal(target, 0, eps=1e-6) # the derivatives as well derivs_complex = target_evaluator.d_target_d_fmodel(sfs.data()) derivs_ab = target_evaluator.d_target_d_ab(sfs.data()) for cmplx, da, db in zip(derivs_complex, derivs_ab[0], derivs_ab[1]): assert approx_equal(cmplx.real, da, eps=1e-5) assert approx_equal(cmplx.imag, -db, eps=1e-5) for alpha in flex.double(range(50)) / 100.0: #---------------------------------------------------------------- # use fin diffs to check the derivatives to a and b old_target_evaluator = xray.least_squares_hemihedral_twinning_on_f( hkl_obs=sfs.indices(), f_obs=flex.sqrt(flex.abs(sfs.data()) * flex.abs(sfs.data())) * 1.1, w_obs=None, hkl_calc=sfs.indices(), space_group=sfs.space_group(), anomalous_flag=False, alpha=alpha, twin_law=[-1, 0, 0, 0, 1, 0, 0, 0, -1]) old_target_value = old_target_evaluator.target(sfs.data()) old_derivs = old_target_evaluator.d_target_d_ab(sfs.data()) new_data = sfs.data() h = 0.0001 checked = 0 for N_test in xrange(sfs.data().size()): ori = complex(sfs.data()[N_test]) #print "----------------" #print alpha #print sfs.indices()[N_test] #print sfs.data()[N_test] new_data[N_test] = ori + complex(h, 0) new_target_value = old_target_evaluator.target(new_data) fdif_real = float((new_target_value - old_target_value) / h) new_data[N_test] = ori + complex(0, h) new_target_value = old_target_evaluator.target(new_data) fdif_imag = float((new_target_value - old_target_value) / h) # only use 'large' first derivative if 1: #old_derivs[0][N_test]>0: #print "real", N_test, fdif_real,old_derivs[0][N_test], (fdif_real-old_derivs[0][N_test])/old_derivs[0][N_test] if old_derivs[0][N_test] > 1: checked += 1 assert approx_equal( (fdif_real - old_derivs[0][N_test]) / fdif_real, 0, eps=1e-3) if abs(old_derivs[1][N_test]) > 0: #print "Imag", N_test, fdif_imag,old_derivs[1][N_test], (fdif_imag-old_derivs[1][N_test])/old_derivs[1][N_test] if old_derivs[1][N_test] > 1: checked += 1 assert approx_equal( (fdif_imag - old_derivs[1][N_test]) / fdif_imag, 0, eps=1e-3) new_data[N_test] = ori assert checked > 0 #------------------------------------- # use fin diffs to test derivatives wrst alpha, the twin fraction h = 0.00001 target_evaluator = xray.least_squares_hemihedral_twinning_on_f( hkl_obs=sfs.indices(), f_obs=flex.sqrt(flex.abs(sfs.data()) * flex.abs(sfs.data())) * 1.0, w_obs=None, hkl_calc=sfs.indices(), space_group=sfs.space_group(), anomalous_flag=False, alpha=0, twin_law=[-1, 0, 0, 0, 1, 0, 0, 0, -1]) tst_alpha = [0.1, 0.2, 0.3, 0.4, 0.5] for ii in tst_alpha: target_evaluator.alpha(ii) old_target = target_evaluator.target(sfs.data() * 1.0) target_evaluator.alpha(ii + h) new_target = target_evaluator.target(sfs.data() * 1.0) fd = (new_target - old_target) / h target_evaluator.alpha(ii) an = target_evaluator.d_target_d_alpha(sfs.data() * 1.0) assert approx_equal(fd / an, 1.0, eps=1e-2)
def plot_projections(projections, filename=None, show=None, colours=None, marker_size=3, font_size=6, gridsize=None, label_indices=False, epochs=None, colour_map=None): assert [filename, show].count(None) < 2 projections_all = projections try: import matplotlib if not show: # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear matplotlib.use('Agg') # use a non-interactive backend from matplotlib import pyplot from matplotlib import pylab except ImportError: raise Sorry("matplotlib must be installed to generate a plot.") if epochs is not None and colour_map is not None: epochs = flex.double(epochs) epochs -= flex.min(epochs) epochs /= flex.max(epochs) cmap = matplotlib.cm.get_cmap(colour_map) colours = [cmap(e) for e in epochs] elif colours is None or len(colours) == 0: colours = ['b'] * len(projections_all) elif len(colours) < len(projections_all): colours = colours * len(projections_all) fig = pyplot.figure() pyplot.scatter([0], [0], marker='+', c='0.75', s=100) cir = pylab.Circle((0, 0), radius=1.0, fill=False, color='0.75') pylab.gca().add_patch(cir) if gridsize is not None: x = flex.double() y = flex.double() for i, projections in enumerate(projections_all): x_, y_ = projections.parts() x.extend(x_) y.extend(y_) hb = pyplot.hexbin(x, y, gridsize=gridsize, linewidths=0.2) cb = pyplot.colorbar(hb) else: for i, projections in enumerate(projections_all): x, y = projections.parts() pyplot.scatter(x.as_numpy_array(), y.as_numpy_array(), c=colours[i], s=marker_size, edgecolors='none') if label_indices: for j, (hkl, proj) in enumerate(zip(miller_indices, projections)): # hack to not write two labels on top of each other p1, p2 = (projections - proj).parts() if (flex.sqrt(flex.pow2(p1) + flex.pow2(p2)) < 1e-3).iselection()[0] != j: continue pyplot.text(proj[0], proj[1], str(hkl), fontsize=font_size) fig.axes[0].set_aspect('equal') pyplot.xlim(-1.1, 1.1) pyplot.ylim(-1.1, 1.1) if filename is not None: pyplot.savefig(filename, size_inches=(24, 18), dpi=300) if show: pyplot.show()
def add_miller_array(self, miller_array, column_root_label, column_types=None, label_decorator=None): assert column_types is None or isinstance(column_types, str) if (label_decorator is None): label_decorator = globals()["label_decorator"]() default_col_types = default_column_types(miller_array=miller_array) if (default_col_types is None): raise RuntimeError( "Conversion of given type of miller_array to MTZ format" " is not supported.") if (column_types is None): column_types = default_col_types elif (len(column_types) != len(default_col_types)): raise RuntimeError( "Invalid MTZ column_types for the given miller_array.") self.initialize_hkl_columns() if (not miller_array.anomalous_flag()): if (default_col_types in ["FQ", "JQ"]): self._add_observations( data_label=column_root_label, sigmas_label=label_decorator.sigmas(column_root_label), column_types=column_types, indices=miller_array.indices(), data=miller_array.data(), sigmas=miller_array.sigmas()) elif (default_col_types == "FP"): self._add_complex( amplitudes_label=column_root_label, phases_label=label_decorator.phases(column_root_label), column_types=column_types, indices=miller_array.indices(), data=miller_array.data()) elif (default_col_types in ["F", "J"]): self.add_column( label=column_root_label, type=column_types).set_reals( miller_indices=miller_array.indices(), data=miller_array.data()) elif (default_col_types == "I"): self.add_column( label=column_root_label, type=column_types).set_reals( miller_indices=miller_array.indices(), data=miller_array.data().as_double()) elif (default_col_types == "AAAA"): mtz_reflection_indices = self.add_column( label=label_decorator.hendrickson_lattman(column_root_label, 0), type=column_types[0]).set_reals( miller_indices=miller_array.indices(), data=miller_array.data().slice(0)) for i in range(1,4): self.add_column( label=label_decorator.hendrickson_lattman(column_root_label, i), type=column_types[i]).set_reals( mtz_reflection_indices=mtz_reflection_indices, data=miller_array.data().slice(i)) else: raise RuntimeError("Fatal programming error.") else: asu, matches = miller_array.match_bijvoet_mates() if (default_col_types == "FQDQY"): _ = matches.pairs_hemisphere_selection selpp = _("+") selpm = _("-") _ = matches.singles_hemisphere_selection selsp = _("+") selsm = _("-") _ = asu.data() fp = _.select(selpp) fm = _.select(selpm) fs = _.select(selsp) fs.extend(_.select(selsm)) # http://www.ccp4.ac.uk/dist/html/mtzMADmod.html f = 0.5 * (fp + fm) d = fp - fm _ = asu.sigmas() sp = _.select(selpp) sm = _.select(selpm) ss = _.select(selsp) ss.extend(_.select(selsm)) sd = flex.sqrt(sp**2 + sm**2) sf = 0.5 * sd f.extend(fs) sf.extend(ss) _ = asu.indices() hd = _.select(selpp) hf = hd.concatenate(_.select(selsp)) hf.extend(-_.select(selsm)) isym = flex.double(selpp.size(), 0) # both F+ and F- isym.resize(selpp.size()+selsp.size(), 1) # only F+ isym.resize(hf.size(), 2) # only F- isym.set_selected(miller_array.space_group().is_centric(hf) , 0) label_group = [ column_root_label, label_decorator.sigmas(column_root_label), label_decorator.delta_anomalous(column_root_label), label_decorator.delta_anomalous_sigmas(column_root_label), label_decorator.delta_anomalous_isym(column_root_label)] for i,(mi,data) in enumerate([(hf,f),(hf,sf),(hd,d),(hd,sd),(hf,isym)]): self.add_column( label=label_group[i], type=column_types[i]).set_reals(miller_indices=mi, data=data) else: for anomalous_sign in ("+","-"): sel = matches.pairs_hemisphere_selection(anomalous_sign) sel.extend(matches.singles_hemisphere_selection(anomalous_sign)) if (anomalous_sign == "+"): indices = asu.indices().select(sel) else: indices = -asu.indices().select(sel) data = asu.data().select(sel) if (default_col_types in ["GL", "KM"]): self._add_observations( data_label=label_decorator.anomalous( column_root_label, anomalous_sign), sigmas_label=label_decorator.sigmas( column_root_label, anomalous_sign), column_types=column_types, indices=indices, data=data, sigmas=asu.sigmas().select(sel)) elif (default_col_types == "GP"): self._add_complex( amplitudes_label=label_decorator.anomalous( column_root_label, anomalous_sign), phases_label=label_decorator.phases( column_root_label, anomalous_sign), column_types=column_types, indices=indices, data=data) elif (default_col_types in ["G", "K"]): self.add_column( label=label_decorator.anomalous( column_root_label, anomalous_sign), type=column_types).set_reals( miller_indices=indices, data=data) elif (default_col_types == "I"): self.add_column( label=label_decorator.anomalous( column_root_label, anomalous_sign), type=column_types).set_reals( miller_indices=indices, data=data.as_double()) elif (default_col_types == "AAAA"): mtz_reflection_indices = self.add_column( label=label_decorator.hendrickson_lattman( column_root_label, 0, anomalous_sign), type=column_types[0]).set_reals( miller_indices=indices, data=data.slice(0)) for i in range(1,4): self.add_column( label=label_decorator.hendrickson_lattman( column_root_label, i, anomalous_sign), type=column_types[i]).set_reals( mtz_reflection_indices=mtz_reflection_indices, data=data.slice(i)) else: raise RuntimeError("Fatal programming error.") return self
def run(args, command_name="iotbx.pdb.as_xray_structure"): command_line = (option_parser( usage=command_name + " [options] pdb_file ...", description="Example: %s pdb1ab1.ent" % command_name ).enable_symmetry_comprehensive().option( None, "--weak_symmetry", action="store_true", default=False, help="symmetry on command line is weaker than symmetry found in files" ).option( None, "--ignore_occ_for_site_symmetry", action="store_true", default=False, help="disables non_unit_occupancy_implies_min_distance_sym_equiv_zero" ).option( "-v", "--verbose", action="store_true", default=False, help="show scatterers" ).option( None, "--pickle", action="store", type="string", help="write all data to FILE ('--pickle .' copies name of input file)", metavar="FILE").option( None, "--fake_f_obs_and_r_free_flags_d_min", action="store", type="float", help="write F-calc as F-obs, add random R-free flags (MTZ format)", metavar="FLOAT")).process(args=args) if (len(command_line.args) == 0): command_line.parser.show_help() co = command_line.options d_min = co.fake_f_obs_and_r_free_flags_d_min all_structures = [] for file_name in command_line.args: print "file_name:", file_name sys.stdout.flush() pdb_inp = pdb.input(file_name=file_name) structure = pdb_inp.xray_structure_simple( crystal_symmetry=command_line.symmetry, weak_symmetry=co.weak_symmetry, non_unit_occupancy_implies_min_distance_sym_equiv_zero=not co. ignore_occ_for_site_symmetry) structure.show_summary() if (structure.special_position_indices().size() != 0): structure.show_special_position_shifts( sites_cart_original=pdb_inp.atoms().extract_xyz()) structure.scattering_type_registry().show(show_gaussians=False) if (co.verbose): structure.show_scatterers() if (d_min is not None and d_min > 0): f_obs = abs( structure.structure_factors(d_min=d_min, anomalous_flag=False).f_calc()) f_obs = f_obs.customized_copy(sigmas=flex.sqrt(f_obs.data())) r_free_flags = f_obs.generate_r_free_flags(fraction=0.05, max_free=None) mtz_dataset = f_obs.as_mtz_dataset(column_root_label="F-obs") mtz_dataset.add_miller_array(miller_array=r_free_flags, column_root_label="R-free-flags") mtz_object = mtz_dataset.mtz_object() history = "%s %s" % (command_name, show_string(file_name)) lines = flex.std_string(["Fake F-obs, R-free-flags"]) while (len(history) != 0): lines.append(history[:77]) history = history[77:] mtz_object.add_history(lines=lines) mtz_object.show_summary() mtz_file_name = os.path.basename(file_name).replace(".","_") \ + "_fake.mtz" print "Writing file:", mtz_file_name mtz_object.write(file_name=mtz_file_name) all_structures.append(structure) print pickle_file_name = co.pickle if (pickle_file_name is not None and len(all_structures) > 0): if (pickle_file_name == "."): if (len(command_line.args) > 1): raise Sorry( "Ambiguous name for pickle file (more than one input file)." ) pickle_file_name = os.path.basename(command_line.args[0]) if (not pickle_file_name.lower().endswith(".pickle")): pickle_file_name += ".pickle" if (len(all_structures) == 1): all_structures = all_structures[0] else: print print "Writing all xray structures to file:", pickle_file_name easy_pickle.dump(pickle_file_name, all_structures) print
def __init__(self, miller_obs, miller_calc, r_free_flags, kernel_width_free_reflections=None, kernel_width_d_star_cubed=None, kernel_in_bin_centers=False, kernel_on_chebyshev_nodes=True, n_sampling_points=20, n_chebyshev_terms=10, use_sampling_sum_weights=False, make_checks_and_clean_up=True): assert [kernel_width_free_reflections, kernel_width_d_star_cubed].count(None) == 1 self.miller_obs = miller_obs self.miller_calc = abs(miller_calc) self.r_free_flags = r_free_flags self.kernel_width_free_reflections = kernel_width_free_reflections self.kernel_width_d_star_cubed = kernel_width_d_star_cubed self.n_chebyshev_terms = n_chebyshev_terms if make_checks_and_clean_up: self.miller_obs = self.miller_obs.map_to_asu() self.miller_calc = self.miller_calc.map_to_asu() self.r_free_flags = self.r_free_flags.map_to_asu() assert self.r_free_flags.indices().all_eq( self.miller_obs.indices()) self.miller_calc = self.miller_calc.common_set(self.miller_obs) assert self.r_free_flags.indices().all_eq( self.miller_calc.indices()) assert self.miller_obs.is_real_array() if self.miller_obs.is_xray_intensity_array(): self.miller_obs = self.miller_obs.f_sq_as_f() assert self.miller_obs.observation_type() is None or \ self.miller_obs.is_xray_amplitude_array() if self.miller_calc.observation_type() is None: self.miller_calc = self.miller_calc.set_observation_type( self.miller_obs) # get normalized data please self.normalized_obs_f = absolute_scaling.kernel_normalisation( self.miller_obs, auto_kernel=True) self.normalized_obs = self.normalized_obs_f.normalised_miller_dev_eps.f_sq_as_f( ) self.normalized_calc_f = absolute_scaling.kernel_normalisation( self.miller_calc, auto_kernel=True) self.normalized_calc = self.normalized_calc_f.normalised_miller_dev_eps.f_sq_as_f( ) # get the 'free data' if (self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data=~self.r_free_flags.data()) self.free_norm_obs = self.normalized_obs.select( self.r_free_flags.data()) self.free_norm_calc = self.normalized_calc.select( self.r_free_flags.data()) if self.free_norm_obs.data().size() <= 0: raise RuntimeError("No free reflections.") if (self.kernel_width_d_star_cubed is None): self.kernel_width_d_star_cubed = sigmaa_estimator_kernel_width_d_star_cubed( r_free_flags=self.r_free_flags, kernel_width_free_reflections=self. kernel_width_free_reflections) self.sigma_target_functor = ext.sigmaa_estimator( e_obs=self.free_norm_obs.data(), e_calc=self.free_norm_calc.data(), centric=self.free_norm_obs.centric_flags().data(), d_star_cubed=self.free_norm_obs.d_star_cubed().data(), width=self.kernel_width_d_star_cubed) d_star_cubed_overall = self.miller_obs.d_star_cubed().data() self.min_h = flex.min(d_star_cubed_overall) self.max_h = flex.max(d_star_cubed_overall) self.h_array = None if (kernel_in_bin_centers): self.h_array = flex.double(xrange(1, n_sampling_points * 2, 2)) * ( self.max_h - self.min_h) / (n_sampling_points * 2) + self.min_h else: self.min_h *= 0.99 self.max_h *= 1.01 if kernel_on_chebyshev_nodes: self.h_array = chebyshev_lsq_fit.chebyshev_nodes( n=n_sampling_points, low=self.min_h, high=self.max_h, include_limits=True) else: self.h_array = flex.double(range(n_sampling_points)) * ( self.max_h - self.min_h) / float(n_sampling_points - 1.0) + self.min_h assert self.h_array.size() == n_sampling_points self.sigmaa_array = flex.double() self.sigmaa_array.reserve(self.h_array.size()) self.sum_weights = flex.double() self.sum_weights.reserve(self.h_array.size()) for h in self.h_array: stimator = sigmaa_point_estimator(self.sigma_target_functor, h) self.sigmaa_array.append(stimator.sigmaa) self.sum_weights.append( self.sigma_target_functor.sum_weights(d_star_cubed=h)) # fit a smooth function reparam_sa = -flex.log(1.0 / self.sigmaa_array - 1.0) if (use_sampling_sum_weights): w_obs = flex.sqrt(self.sum_weights) else: w_obs = None fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit( n_terms=self.n_chebyshev_terms, x_obs=self.h_array, y_obs=reparam_sa, w_obs=w_obs) cheb_pol = chebyshev_polynome(self.n_chebyshev_terms, self.min_h, self.max_h, fit_lsq.coefs) def reverse_reparam(values): return 1.0 / (1.0 + flex.exp(-values)) self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array)) self.sigmaa_miller_array = reverse_reparam( cheb_pol.f(d_star_cubed_overall)) assert flex.min(self.sigmaa_miller_array) >= 0 assert flex.max(self.sigmaa_miller_array) <= 1 self.sigmaa_miller_array = self.miller_obs.array( data=self.sigmaa_miller_array) self.alpha = None self.beta = None self.fom_array = None
def collect(self, model, fmodel, step, wilson_b=None, rigid_body_shift_accumulator=None): global time_collect_and_process t1 = time.time() if (self.sites_cart_start is None): self.sites_cart_start = model.get_sites_cart() sites_cart_curr = model.get_sites_cart() if (sites_cart_curr.size() == self.sites_cart_start.size()): self.shifts.append( flex.mean( flex.sqrt( (self.sites_cart_start - sites_cart_curr).dot()))) else: self.shifts.append("n/a") if (wilson_b is not None): self.wilson_b = wilson_b self.steps.append(step) self.r_works.append(fmodel.r_work()) self.r_frees.append(fmodel.r_free()) use_amber = False if hasattr(self.params, "amber"): # loaded amber scope use_amber = self.params.amber.use_amber self.is_amber_monitor = use_amber use_afitt = False if hasattr(self.params, "afitt"): # loaded amber scope use_afitt = self.params.afitt.use_afitt general_selection = None if use_afitt: from mmtbx.geometry_restraints import afitt general_selection = afitt.get_non_afitt_selection( model.restraints_manager, model.get_sites_cart(), model.get_hd_selection(), None) geom = model.geometry_statistics(general_selection=general_selection) if (geom is not None): self.geom.bonds.append(geom.bond().mean) self.geom.angles.append(geom.angle().mean) hd_sel = None if (not self.neutron_refinement and not self.is_neutron_monitor): hd_sel = model.get_hd_selection() b_isos = model.get_xray_structure().extract_u_iso_or_u_equiv( ) * math.pi**2 * 8 if (hd_sel is not None): b_isos = b_isos.select(~hd_sel) self.bs_iso_max_a.append(flex.max_default(b_isos, 0)) self.bs_iso_min_a.append(flex.min_default(b_isos, 0)) self.bs_iso_ave_a.append(flex.mean_default(b_isos, 0)) self.n_solv.append(model.number_of_ordered_solvent_molecules()) if (len(self.geom.bonds) > 0): if ([self.bond_start, self.angle_start].count(None) == 2): if (len(self.geom.bonds) > 0): self.bond_start = self.geom.bonds[0] self.angle_start = self.geom.angles[0] if (len(self.geom.bonds) > 0): self.bond_final = self.geom.bonds[len(self.geom.bonds) - 1] self.angle_final = self.geom.angles[len(self.geom.angles) - 1] elif (len(self.geom) == 1): self.bond_final = self.geom.bonds[0] self.angle_final = self.geom.angles[0] if (rigid_body_shift_accumulator is not None): self.rigid_body_shift_accumulator = rigid_body_shift_accumulator t2 = time.time() time_collect_and_process += (t2 - t1) self.call_back(model, fmodel, method=step)
def residue_iteration(pdb_hierarchy, xray_structure, selection, target_map_data, model_map_data, residual_map_data, mon_lib_srv, rsr_manager, optimize_hd, params, log): mon_lib_srv = mmtbx.monomer_library.server.server() assert target_map_data.focus() == model_map_data.focus() assert target_map_data.all() == model_map_data.all() fmt1 = " |--------START--------| |-----FINAL----|" fmt2 = " residue map_cc 2mFo-DFc mFo-DFc 2mFo-DFc mFo-DFc" \ " rotamer n_rot max_moved" fmt3 = " %12s%7.4f %8.2f %7.2f %8.2f %7.2f %7s %5d %8.3f" print >> log, fmt1 print >> log, fmt2 unit_cell = xray_structure.unit_cell() map_selector = select_map(unit_cell=xray_structure.unit_cell(), target_map_data=target_map_data, model_map_data=model_map_data) map_selector.initialize_rotamers() get_class = iotbx.pdb.common_residue_names_get_class n_other_residues = 0 n_amino_acids_ignored = 0 n_amino_acids_scored = 0 sites_cart_start = xray_structure.sites_cart() result = [] for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): conformers = residue_group.conformers() if (params.ignore_alt_conformers and len(conformers) > 1): continue for conformer in residue_group.conformers(): residue = conformer.only_residue() if (get_class(residue.resname) == "common_amino_acid"): residue_iselection = residue.atoms().extract_i_seq() sites_cart_residue = xray_structure.sites_cart( ).select(residue_iselection) residue.atoms().set_xyz(new_xyz=sites_cart_residue) max_moved_dist = 0 sites_cart_residue_start = sites_cart_residue.deep_copy( ) # XXX assume that "atoms" are the same in residue and residue_groups if (map_selector.is_refinement_needed( residue_group=residue_group, residue=residue, cc_limit=params.poor_cc_threshold, ignore_hd=optimize_hd)): residue_id_str = residue.id_str( suppress_segid=1)[-12:] rsel, rs = include_residue_selection( selection=selection, residue_iselection=residue_iselection) cc_start = map_selector.get_cc( sites_cart=sites_cart_residue, residue_iselection=residue_iselection) rotamer_id_best = None rev = rotamer_evaluator( sites_cart_start=sites_cart_residue, unit_cell=unit_cell, two_mfo_dfc_map=target_map_data, mfo_dfc_map=residual_map_data) residue_sites_best = sites_cart_residue.deep_copy() rm = residue_rsr_monitor( residue_id_str=residue_id_str, selection=residue_iselection.deep_copy(), sites_cart=sites_cart_residue.deep_copy(), twomfodfc=rev.t1_start, mfodfc=rev.t2_start, cc=cc_start) result.append(rm) axes_and_atoms_to_rotate = rotatable_bonds.\ axes_and_atoms_aa_specific( residue = residue, mon_lib_srv = mon_lib_srv, remove_clusters_with_all_h = optimize_hd, log = log) if (axes_and_atoms_to_rotate is not None and len(axes_and_atoms_to_rotate) > 0): # initialize criteria for first rotatable atom in each cluster rev_first_atoms = [] for i_aa, aa in enumerate( axes_and_atoms_to_rotate): if (i_aa == len(axes_and_atoms_to_rotate) - 1): sites_aa = flex.vec3_double() for aa_ in aa[1]: sites_aa.append( sites_cart_residue[aa_]) else: sites_aa = flex.vec3_double( [sites_cart_residue[aa[1][0]]]) rev_i = rotamer_evaluator( sites_cart_start=sites_aa, unit_cell=unit_cell, two_mfo_dfc_map=target_map_data, mfo_dfc_map=residual_map_data) rev_first_atoms.append(rev_i) # get rotamer iterator rotamer_iterator = lockit.get_rotamer_iterator( mon_lib_srv=mon_lib_srv, residue=residue, atom_selection_bool=None) if (rotamer_iterator is None): n_amino_acids_ignored += 1 n_rotamers = 0 print >> log, "No rotamers for: %s. Use torsion grid search."%\ residue_id_str residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator=rev, cluster_evaluators=rev_first_atoms, axes_and_atoms_to_rotate= axes_and_atoms_to_rotate, rotamer_sites_cart=sites_cart_residue, rotamer_id_best=rotamer_id_best, residue_sites_best=residue_sites_best, rotamer_id=None, params=None) else: n_amino_acids_scored += 1 n_rotamers = 0 if (not params.use_rotamer_iterator): if (params.torsion_grid_search): residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator=rev, cluster_evaluators= rev_first_atoms, axes_and_atoms_to_rotate= axes_and_atoms_to_rotate, rotamer_sites_cart= sites_cart_residue, rotamer_id_best=rotamer_id_best, residue_sites_best= residue_sites_best, rotamer_id=None, params=params.torsion_search) else: for rotamer, rotamer_sites_cart in rotamer_iterator: n_rotamers += 1 if (params.torsion_grid_search): residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator=rev, cluster_evaluators= rev_first_atoms, axes_and_atoms_to_rotate= axes_and_atoms_to_rotate, rotamer_sites_cart= rotamer_sites_cart, rotamer_id_best= rotamer_id_best, residue_sites_best= residue_sites_best, rotamer_id=rotamer.id, params=params. torsion_search) else: if (rev.is_better( sites_cart= rotamer_sites_cart)): rotamer_id_best = rotamer.id residue_sites_best = rotamer_sites_cart.deep_copy( ) residue.atoms().set_xyz( new_xyz=residue_sites_best) max_moved_dist = flex.max( flex.sqrt((sites_cart_residue_start - residue_sites_best).dot())) if (not params.real_space_refine_rotamer): sites_cart_start = sites_cart_start.set_selected( residue_iselection, residue_sites_best) else: tmp = sites_cart_start.set_selected( residue_iselection, residue_sites_best) sites_cart_refined = rsr_manager.refine_restrained( tmp.select(rsel), rsel, rs) if (rev.is_better( sites_cart=sites_cart_refined)): sites_cart_start = sites_cart_start.set_selected( residue_iselection, sites_cart_refined) residue.atoms().set_xyz( new_xyz=sites_cart_refined) max_moved_dist = flex.max( flex.sqrt( (sites_cart_residue_start - sites_cart_refined).dot())) if (abs(rev.t1_best - rev.t1_start) > 0.01 and abs(rev.t2_best - rev.t2_start) > 0.01): print >> log, fmt3 % ( residue_id_str, cc_start, rev.t1_start, rev.t2_start, rev.t1_best, rev.t2_best, rotamer_id_best, n_rotamers, max_moved_dist) xray_structure.set_sites_cart(sites_cart_start) return result
def exercise(space_group_info, anomalous_flag, n_scatterers=8, d_min=2, verbose=0): structure = random_structure.xray_structure(space_group_info, elements=["const"] * n_scatterers) f_calc = structure.structure_factors( d_min=d_min, anomalous_flag=anomalous_flag).f_calc() f = abs(f_calc) fs = miller.array(miller_set=f, data=f.data(), sigmas=flex.sqrt(f.data())) assert fs.is_unique_set_under_symmetry() for a in (f, fs): for algorithm in ["gaussian", "shelx"]: m = a.merge_equivalents(algorithm=algorithm) m.show_summary(out=StringIO()) j = m.array().adopt_set(a) assert flex.linear_correlation(j.data(), a.data()).coefficient() > 1 - 1.e-6 if (a.sigmas() is not None): assert flex.linear_correlation( j.sigmas(), a.sigmas()).coefficient() > 1 - 1.e-6 redundancies = flex.size_t() for i in xrange(fs.indices().size()): redundancies.append(random.randrange(5) + 1) space_group = space_group_info.group() r_indices = flex.miller_index() r_data = flex.double() r_sigmas = flex.double() for i, n in enumerate(redundancies): h = fs.indices()[i] h_eq = miller.sym_equiv_indices(space_group, h).indices() for j in xrange(n): r_indices.append(h_eq[random.randrange(len(h_eq))].h()) r_data.append(fs.data()[i]) r_sigmas.append(fs.sigmas()[i]) r = miller.array(miller_set=miller.set(crystal_symmetry=fs, indices=r_indices, anomalous_flag=fs.anomalous_flag()), data=r_data, sigmas=r_sigmas) assert not r.is_unique_set_under_symmetry() noise = flex.random_double(size=r.indices().size()) r = r.sort(by_value=noise) for algorithm in ["gaussian", "shelx"]: m = r.merge_equivalents(algorithm=algorithm) m.show_summary(out=StringIO()) j = m.array().adopt_set(fs) assert j.is_unique_set_under_symmetry() assert flex.linear_correlation(j.data(), fs.data()).coefficient() > 1 - 1.e-6 fssr = fs.sigmas() / flex.sqrt(redundancies.as_double()) assert flex.linear_correlation(j.sigmas(), fssr).coefficient() > 1 - 1.e-6 # if (anomalous_flag): f_calc_ave = f_calc.average_bijvoet_mates() # uses merge_equivalents f_calc_com = f_calc.as_non_anomalous_array().common_set(f_calc_ave) assert f_calc_com.indices().all_eq(f_calc_ave.indices()) for part in [flex.real, flex.imag]: assert flex.linear_correlation(part( f_calc_com.data()), part( f_calc_ave.data())).coefficient() > 1 - 1.e-6 # test use_internal_variance=False m = r.merge_equivalents(algorithm="gaussian", use_internal_variance=False) j = m.array().adopt_set(fs) fssr = fs.sigmas() / flex.sqrt(redundancies.as_double()) assert flex.linear_correlation(j.sigmas(), fssr).coefficient() > 1 - 1.e-6
exit() return hklin, hklout if (__name__ == "__main__"): #0 .read input parameters and frames (pickle files) hklin, hklout = read_input(args=sys.argv[1:]) reflection_file = reflection_file_reader.any_reflection_file(hklin) miller_arrays = reflection_file.as_miller_arrays() miller_array = miller_arrays[0] F_as_I = flex.sqrt(miller_array.data()) sigF_as_sigI = flex.sqrt(miller_array.sigmas()) miller_array_I = miller_array.customized_copy(data=F_as_I, sigmas=sigF_as_sigI) miller_array_I = miller_array_I.set_observation_type_xray_amplitude() for miller_index, d, F, sigF, I, sigI in zip( miller_array.indices(), miller_array.d_spacings().data(), miller_array.data(), miller_array.sigmas(), miller_array_I.data(), miller_array_I.sigmas()): print miller_index, d, F, sigF, I, sigI #write as mtz file mtz_dataset_out = miller_array_I.as_mtz_dataset(column_root_label="FOBS") mtz_dataset_out.mtz_object().write(file_name=hklout)
def run(args=None, l=None): ''' Create 4 PDB files, 2 P1, 2 SC, perfect or containing an offset Calculate (diffuse) scattering from all 4 Run ensemble refinement for 8 combo's (4 per "size" SC or P1) ''' ########################################################################### # Start log file # ########################################################################### # init log file if l == None: l = Log(log_name='sc_er_setup_log.txt') l.title("er.sc_er_setup module") ########################################################################### # Process input Params # ########################################################################### l.process_message('Processing input...\n') working_params = sc_er_setup_phil.phil_parse( args=args, log=l) # use phil to process input p = working_params.sc_er_setup single_pdb_name = 'single_pdb.pdb' mtz_list, pdb_list = [], [] ########################################################################### # Create simple dynamic model # ########################################################################### # Rigid Body motion simple test: if p.params.make_rb: l.process_message('RB module...') rigid_body_args = [ 'pdb_in={}'.format(p.input.rb_pdb_in), 'rb_type={}'.format(p.params.rb_type), 'pdb_out={}'.format(p.output.rb_pdb_out) ] rigid_body.run(args=rigid_body_args, l=l) os.system("mv trans_rb.pdb rb_pdb_out.pdb") ########################################################################### # Prepare single PDB # ########################################################################### # Single PDB will be used for rb setups pdb_list.append(single_pdb_name) if p.params.prep_single: l.process_message('Prepping single_pdb') single_pdb = PDBClass( fname=p.input.single_pdb_in, selection= "not (resname HOH) and not (resname CL) and not (resname NA)") # Set B-factors single_pdb.set_B_zero() # Set Occupancy single_pdb.set_occ() # Write PDB single_pdb.write_hierarchy_to_pdb( out_name=single_pdb_name, output_hierarchy=single_pdb.hierarchy) ########################################################################### # Calculate perfect scattering from single_pdb # ########################################################################### if p.params.single_sc: l.process_message('single_pdb supercell operation...') # use EXP class to calculate diffuse scattering ex = EXPClass() ex.calc_diffuse_and_map(pdb=single_pdb_name, supercell_num=2, size_h=p.params.size_h, size_k=p.params.size_k, size_l=p.params.size_l, Ncpu=p.params.Ncpu, write_pdb=True, l=l) mtz_name = 'single_sc.mtz' pdb_name = 'single_sc.pdb' os.system('rm supercell_out_1.pdb') os.system('mv supercell_out_0.pdb {}'.format(pdb_name)) os.system('mv single_pdb.mtz {}'.format(mtz_name)) os.system('mv single_pdb_IDFF.map single_sc_IDFF.map') mtz_list.append(mtz_name) pdb_list.append(pdb_name) ########################################################################### # Calculate perfect scattering from single_pdb in P1 # ########################################################################### if p.params.single_P1: l.process_message('single_pdb P1 operation...') # use EXP class to calculate diffuse scattering ex = EXPClass() ex.calc_diffuse_and_map(pdb=single_pdb_name, supercell_num=2, size_h=1, size_k=1, size_l=1, Ncpu=p.params.Ncpu, write_pdb=True, l=l) mtz_name = 'single_P1.mtz' pdb_name = 'single_P1.pdb' os.system('rm supercell_out_1.pdb') os.system('mv supercell_out_0.pdb {}'.format(pdb_name)) os.system('mv single_pdb.mtz {}'.format(mtz_name)) os.system('mv single_pdb_IDFF.map single_P1_IDFF.map') mtz_list.append(mtz_name) pdb_list.append(pdb_name) ########################################################################### # Calculate diffuse scattering from rb_pdb # ########################################################################### if p.params.rb_sc: l.process_message('rb_pdb supercell operation...') # use EXP class to calculate diffuse scattering ex = EXPClass() ex.calc_diffuse_and_map(pdb=p.output.rb_pdb_out, supercell_num=100, size_h=p.params.size_h, size_k=p.params.size_k, size_l=p.params.size_l, Ncpu=p.params.Ncpu, write_pdb=True, l=l) mtz_name = 'rb_sc.mtz' pdb_name = 'rb_sc.pdb' os.system('mv supercell_out_0.pdb {}'.format(pdb_name)) os.system('rm supercell_out_*.pdb') os.system('mv rb_pdb_out.mtz {}'.format(mtz_name)) os.system('mv rb_pdb_out_IDFF.map rb_sc_IDFF.map') mtz_list.append(mtz_name) pdb_list.append(pdb_name) ########################################################################### # Calculate diffuse scattering from rb_pdb in P1 # ########################################################################### if p.params.rb_P1: l.process_message('rb_pdb P1 operation...') # use EXP class to calculate diffuse scattering ex = EXPClass() ex.calc_diffuse_and_map(pdb=p.output.rb_pdb_out, supercell_num=100, size_h=1, size_k=1, size_l=1, Ncpu=p.params.Ncpu, write_pdb=True, l=l) mtz_name = 'rb_P1.mtz' pdb_name = 'rb_P1.pdb' os.system('mv supercell_out_0.pdb {}'.format(pdb_name)) os.system('rm supercell_out_*.pdb') os.system('mv rb_pdb_out.mtz {}'.format(mtz_name)) os.system('mv rb_pdb_out_IDFF.map rb_P1_IDFF.map') mtz_list.append(mtz_name) pdb_list.append(pdb_name) ########################################################################### # Prep intensity files for er # ########################################################################### if p.params.add_sigma: # Modify .mtz files by adding a SIGI column! This will be sqrt(I) l.process_message('Adding sigmas (sqrt(I) to mtz files...') if len(mtz_list) == 0: # Dev option for when fft steps are skipped mtz_list = [ 'single_sc.mtz', 'single_P1.mtz', 'rb_sc.mtz', 'rb_P1.mtz' ] for mtz_file in mtz_list: l.show_info('Processing mtz file: {}'.format(mtz_file)) # Read mtz, create mtz_object mtz_object = mtz.object(mtz_file) # Extract miller arrays from mtz_object miller_arrays = mtz_object.as_miller_arrays() # create new miller array (IBRG) with added sigma's (sqrt(IBRG)) ibrg_new = miller_arrays[0].customized_copy( data=miller_arrays[0].data(), sigmas=flex.sqrt(miller_arrays[0].data())) # Create new mtz dataset mtz_dataset = ibrg_new.as_mtz_dataset(column_root_label="IBRG") # create new miller array (ITOT) with added sigma's (sqrt(ITOT)) itot_new = miller_arrays[1].customized_copy( data=miller_arrays[1].data(), sigmas=flex.sqrt(miller_arrays[1].data())) mtz_dataset.add_miller_array(itot_new, column_root_label="ITOT") # create new miller array (IDFF) with added sigma's (sqrt(IDFF)) idff_new = miller_arrays[1].customized_copy( data=miller_arrays[2].data(), sigmas=flex.sqrt(miller_arrays[2].data())) mtz_dataset.add_miller_array(idff_new, column_root_label="IDFF") # Write new MTZ to file mtz_dataset.mtz_object().write("{}_SIG.mtz".format(mtz_file[:-4])) ########################################################################### # For all PDB's add B-fact noise # ########################################################################### # Add noise (0-0.1) in B-factor column, this allows for TLS fitting without # influencing the ensemble refinement if p.params.b_fact_noise: l.process_message('Adding noise to B-factor column') # Dev-option: if len(pdb_list) == 1: pdb_list = [ 'single_sc.pdb', 'single_P1.pdb', 'rb_sc.pdb', 'rb_P1.pdb' ] # Loop over all PDB's for pdb_file in pdb_list: l.show_info('Adding noise to {}'.format(pdb_file)) # Read PDB pdb_f = PDBClass( fname=pdb_file, selection= "not (resname HOH) and not (resname CL) and not (resname NA)") # Loop over all atoms for atom in pdb_f.hierarchy.models()[0].atoms(): atom.b = random.uniform(0.0, 0.1) # Write PDB to file pdb_f.write_hierarchy_to_pdb(output_hierarchy=pdb_f.hierarchy, out_name='{}_B_noise.pdb'.format( pdb_file[:-4])) ########################################################################### # Prep input for ensemble refinement # ########################################################################### # P1 single_P1_vs_single_P1 = 'phenix.ensemble_refinement single_P1_B_noise.pdb single_P1_SIG.mtz output_file_prefix=single_P1_vs_single_P1 params' single_P1_vs_rb_P1 = 'phenix.ensemble_refinement single_P1_B_noise.pdb rb_P1_SIG.mtz output_file_prefix=single_P1_vs_rb_P1 params' rb_P1_vs_single_P1 = 'phenix.ensemble_refinement rb_P1_B_noise.pdb single_P1_SIG.mtz output_file_prefix=rb_P1_vs_rb_P1 params' rb_P1_vs_rb_P1 = 'phenix.ensemble_refinement rb_P1_B_noise.pdb rb_P1_SIG.mtz output_file_prefix=rb_P1_vs_single_P1 params' # SC single_sc_vs_single_sc = 'phenix.ensemble_refinement single_sc_B_noise.pdb single_sc_SIG.mtz output_file_prefix=single_sc_vs_single_sc params' single_sc_vs_rb_sc = 'phenix.ensemble_refinement single_sc_B_noise.pdb rb_sc_SIG.mtz output_file_prefix=single_sc_vs_rb_sc params' rb_sc_vs_single_sc = 'phenix.ensemble_refinement rb_sc_B_noise.pdb single_sc_SIG.mtz output_file_prefix=rb_sc_vs_single_sc params' rb_sc_vs_rb_sc = 'phenix.ensemble_refinement rb_sc_B_noise.pdb rb_sc_SIG.mtz output_file_prefix=rb_sc_vs_rb_sc params' # Running parameters (right now for TESTING!!!!!) adjust tx for real runs!!! params_commands = ''' ensemble_refinement { max_ptls_cycles=1 tls_group_selections = all ptls = 0.0 tx = 1.0 equilibrium_n_tx = 2 acquisition_block_n_tx = 4 number_of_aquisition_periods = 5 cartesian_dynamics.stop_cm_motion = False ordered_solvent_update = False ensemble_reduction = False output_running_kinetic_energy_in_occupancy_column = True } input.xray_data.labels = ITOT,SIGITOT input.xray_data.r_free_flags.generate=True ''' # Write parameter file with open('params', 'w') as f: print >> f, params_commands ########################################################################### # Start simulations (parallel and screened) # ########################################################################### # P1 com = 'screen -dmSL {} {}'.format('single_P1_vs_single_P1', single_P1_vs_single_P1) os.system(com) com = 'screen -dmSL {} {}'.format('single_P1_vs_rb_P1', single_P1_vs_rb_P1) os.system(com) com = 'screen -dmSL {} {}'.format('rb_P1_vs_single_P1', rb_P1_vs_single_P1) os.system(com) com = 'screen -dmSL {} {}'.format('rb_P1_vs_rb_P1', rb_P1_vs_rb_P1) os.system(com) # SC com = 'screen -dmSL {} {}'.format('single_sc_vs_single_sc', single_sc_vs_single_sc) os.system(com) com = 'screen -dmSL {} {}'.format('single_sc_vs_rb_sc', single_sc_vs_rb_sc) os.system(com) com = 'screen -dmSL {} {}'.format('rb_sc_vs_single_sc', rb_sc_vs_single_sc) os.system(com) com = 'screen -dmSL {} {}'.format('rb_sc_vs_rb_sc', rb_sc_vs_rb_sc) os.system(com) return l
def generate_view_data(self): from scitbx.array_family import flex #from scitbx import graphics_utils settings = self.settings data_for_colors = data_for_radii = None if not self.fullprocessarray: return data = self.data #self.work_array.data() sigmas = self.sigmas if (isinstance(data, flex.double) and data.all_eq(0)): data = flex.double(data.size(), 1) if ((self.multiplicities is not None) and (settings.scale_colors_multiplicity)): data_for_colors = self.multiplicities.data().as_double() assert data_for_colors.size() == data.size() elif (settings.sqrt_scale_colors) and (isinstance(data, flex.double)): data_for_colors = flex.sqrt(flex.abs(data)) elif isinstance(data, flex.complex_double): data_for_colors = self.phases foms_for_colours = self.foms # assuming last part of the labels indicates the phase label as in ["FCALC","PHICALC"] self.colourlabel = "Phase of " + self.miller_array.info( ).label_string() elif (settings.sigma_color) and sigmas is not None: data_for_colors = sigmas.as_double() self.colourlabel = "Sigma of " + self.miller_array.info( ).label_string() else: data_for_colors = flex.abs(data.deep_copy()) uc = self.work_array.unit_cell() self.min_dist = min(uc.reciprocal_space_vector( (1, 1, 1))) * self.renderscale min_radius = 0.05 * self.min_dist max_radius = 0.5 * self.min_dist if ((self.multiplicities is not None) and (settings.scale_radii_multiplicity)): data_for_radii = self.multiplicities.data().as_double() if (settings.sigma_radius) and sigmas is not None: data_for_radii = sigmas * self.multiplicities.as_double() assert data_for_radii.size() == data.size() elif (settings.sigma_radius) and sigmas is not None: data_for_radii, self.nth_power_scale_radii = nth_power_scale( flex.abs(sigmas.as_double().deep_copy()), settings.nth_power_scale_radii) else: data_for_radii, self.nth_power_scale_radii = nth_power_scale( flex.abs(data.deep_copy()), settings.nth_power_scale_radii) if (settings.slice_mode): data = data.select(self.slice_selection) if (not settings.keep_constant_scale): data_for_radii = data_for_radii.select(self.slice_selection) data_for_colors = data_for_colors.select(self.slice_selection) foms_for_colours = foms_for_colours.select( self.slice_selection) # Computing rgb colours of each reflection is slow so make a small array # of precomputed colours to use as a lookup table for each reflection if isinstance(data, flex.complex_double): COL = MplColorHelper(settings.color_scheme, 0, 360) rgbcolarray = [COL.get_rgb(d)[0:3] for d in range(360)] if self.isUsingFOMs(): colors = graphics_utils.map_to_rgb_colourmap( data_for_colors=data_for_colors, colormap=rgbcolarray, selection=flex.bool(data_for_colors.size(), True), attenuation=foms_for_colours, map_directly=True, color_all=False) else: colors = graphics_utils.map_to_rgb_colourmap( data_for_colors=data_for_colors, colormap=rgbcolarray, selection=flex.bool(data_for_colors.size(), True), attenuation=None, map_directly=True, color_all=False) else: # Use a colour gradient from matplotlib COL = MplColorHelper(settings.color_scheme, 0, 199) colorgradientarray = flex.vec3_double( [COL.get_rgb(d)[0:3] for d in range(200)]) # Do the table lookup in C++ for speed improvement colors = graphics_utils.map_to_rgb_colourmap( data_for_colors=data_for_colors, colormap=colorgradientarray, selection=flex.bool(data_for_colors.size(), True), powscale=settings.color_powscale, attenuation=None, color_all=False) if (settings.slice_mode) and (settings.keep_constant_scale): colors = colors.select(self.slice_selection) data_for_radii = data_for_radii.select(self.slice_selection) #if (settings.sqrt_scale_radii) and (not settings.scale_radii_multiplicity): # data_for_radii = flex.sqrt(flex.abs(data_for_radii)) if len(data_for_radii): #dat2 = flex.abs(flex.double([e for e in data_for_radii if not math.isnan(e)])) dat2 = flex.abs( flex.double(graphics_utils.NoNansArray(data_for_radii, 0.1))) # don't divide by 0 if dealing with selection of Rfree array where all values happen to be zero scale = max_radius / (flex.max(dat2) + 0.001) radii = data_for_radii * (self.settings.scale * scale) assert radii.size() == colors.size() else: radii = flex.double() max_radius = 0 self.radii = radii self.max_radius = max_radius self.min_radius = min_radius self.colors = colors if isinstance(data, flex.complex_double): self.foms = foms_for_colours
def organize_input(self, observations_pickle, iparams, avg_mode, pickle_filename=None): """Given the pickle file, extract and prepare observations object and the alpha angle (meridional to equatorial). """ #get general parameters if iparams.isoform_name is not None: if "identified_isoform" not in observations_pickle: return None, "No identified isoform" if observations_pickle[ "identified_isoform"] != iparams.isoform_name: return None, "Identified isoform(%s) is not the requested isoform (%s)" % ( observations_pickle["identified_isoform"], iparams.isoform_name) if iparams.flag_weak_anomalous: if avg_mode == 'final': target_anomalous_flag = iparams.target_anomalous_flag else: target_anomalous_flag = False else: target_anomalous_flag = iparams.target_anomalous_flag img_filename_only = '' if pickle_filename: img_filename_only = os.path.basename(pickle_filename) txt_exception = ' {0:40} ==> '.format(img_filename_only) #for dials integration pickles - also look for experimentxxx.json if "miller_index" in observations_pickle: from dxtbx.model.experiment_list import ExperimentListFactory exp_json_file = os.path.join( os.path.dirname(pickle_filename), img_filename_only.split('_')[0] + '_refined_experiments.json') if os.path.isfile(exp_json_file): experiments = ExperimentListFactory.from_json_file( exp_json_file) dials_crystal = experiments[0].crystal detector = experiments[0].detector beam = experiments[0].beam crystal_symmetry = crystal.symmetry( unit_cell=dials_crystal.get_unit_cell().parameters(), space_group_symbol=iparams.target_space_group) miller_set_all = miller.set( crystal_symmetry=crystal_symmetry, indices=observations_pickle['miller_index'], anomalous_flag=target_anomalous_flag) observations = miller_set_all.array( data=observations_pickle['intensity.sum.value'], sigmas=flex.sqrt( observations_pickle['intensity.sum.variance']) ).set_observation_type_xray_intensity() detector_distance_mm = detector[0].get_distance() alpha_angle_obs = flex.double([0] * len(observations.data())) wavelength = beam.get_wavelength() spot_pred_x_mm = observations_pickle['s1'] #a disguise of s1 spot_pred_y_mm = flex.double([0] * len(observations.data())) #calculate the crystal orientation O = sqr(dials_crystal.get_unit_cell().orthogonalization_matrix( )).transpose() R = sqr(dials_crystal.get_U()).transpose() from cctbx.crystal_orientation import crystal_orientation, basis_type crystal_init_orientation = crystal_orientation( O * R, basis_type.direct) else: txt_exception += exp_json_file + ' not found' print txt_exception return None, txt_exception else: #for cctbx.xfel proceed as usual observations = observations_pickle["observations"][0] detector_distance_mm = observations_pickle['distance'] mm_predictions = iparams.pixel_size_mm * ( observations_pickle['mapped_predictions'][0]) xbeam = observations_pickle["xbeam"] ybeam = observations_pickle["ybeam"] alpha_angle_obs = flex.double([math.atan(abs(pred[0]-xbeam)/abs(pred[1]-ybeam)) \ for pred in mm_predictions]) spot_pred_x_mm = flex.double( [pred[0] - xbeam for pred in mm_predictions]) spot_pred_y_mm = flex.double( [pred[1] - ybeam for pred in mm_predictions]) #Polarization correction wavelength = observations_pickle["wavelength"] crystal_init_orientation = observations_pickle[ "current_orientation"][0] #continue reading... if iparams.flag_LP_correction and "observations" in observations_pickle: fx = 1 - iparams.polarization_horizontal_fraction fy = 1 - fx if fx > 1.0 or fx < 0: print 'Horizontal polarization fraction is not correct. The value must be >= 0 and <= 1' print 'No polarization correction. Continue with post-refinement' else: phi_angle_obs = flex.double([math.atan2(pred[1]-ybeam, pred[0]-xbeam) \ for pred in mm_predictions]) bragg_angle_obs = observations.two_theta(wavelength).data() P = ((fx*((flex.sin(phi_angle_obs)**2)+((flex.cos(phi_angle_obs)**2)*flex.cos(bragg_angle_obs)**2)))+\ (fy*((flex.cos(phi_angle_obs)**2)+((flex.sin(phi_angle_obs)**2)*flex.cos(bragg_angle_obs)**2)))) I_prime = observations.data() / P sigI_prime = observations.sigmas() / P observations = observations.customized_copy( data=flex.double(I_prime), sigmas=flex.double(sigI_prime)) #set observations with target space group - !!! required for correct #merging due to map_to_asu command. if iparams.target_crystal_system is not None: target_crystal_system = iparams.target_crystal_system else: target_crystal_system = observations.crystal_symmetry( ).space_group().crystal_system() lph = lbfgs_partiality_handler() if iparams.flag_override_unit_cell: uc_constrained_inp = lph.prep_input( iparams.target_unit_cell.parameters(), target_crystal_system) else: uc_constrained_inp = lph.prep_input( observations.unit_cell().parameters(), target_crystal_system) uc_constrained = list( lph.prep_output(uc_constrained_inp, target_crystal_system)) try: #apply constrain using the crystal system miller_set = symmetry(unit_cell=uc_constrained, space_group_symbol=iparams.target_space_group ).build_miller_set( anomalous_flag=target_anomalous_flag, d_min=iparams.merge.d_min) observations = observations.customized_copy( anomalous_flag=target_anomalous_flag, crystal_symmetry=miller_set.crystal_symmetry()) except Exception: a, b, c, alpha, beta, gamma = uc_constrained txt_exception += 'Mismatch spacegroup (%6.2f,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f)' % ( a, b, c, alpha, beta, gamma) print txt_exception return None, txt_exception #reset systematic absence sys_absent_negate_flags = flex.bool([ sys_absent_flag[1] == False for sys_absent_flag in observations.sys_absent_flags() ]) observations = observations.select(sys_absent_negate_flags) alpha_angle_obs = alpha_angle_obs.select(sys_absent_negate_flags) spot_pred_x_mm = spot_pred_x_mm.select(sys_absent_negate_flags) spot_pred_y_mm = spot_pred_y_mm.select(sys_absent_negate_flags) #remove observations from rejection list if iparams.rejections: if pickle_filename in iparams.rejections: miller_indices_ori_rejected = iparams.rejections[ pickle_filename] i_sel_flag = flex.bool([True] * len(observations.data())) cnrej = 0 for miller_index_ori_rejected in miller_indices_ori_rejected: for i_index_ori, miller_index_ori in enumerate( observations.indices()): if miller_index_ori_rejected == miller_index_ori: i_sel_flag[i_index_ori] = False cnrej += 1 observations = observations.customized_copy( indices=observations.indices().select(i_sel_flag), data=observations.data().select(i_sel_flag), sigmas=observations.sigmas().select(i_sel_flag)) alpha_angle_obs = alpha_angle_obs.select(i_sel_flag) spot_pred_x_mm = spot_pred_x_mm.select(i_sel_flag) spot_pred_y_mm = spot_pred_y_mm.select(i_sel_flag) #filter resolution i_sel_res = observations.resolution_filter_selection( d_max=iparams.merge.d_max, d_min=iparams.merge.d_min) observations = observations.select(i_sel_res) alpha_angle_obs = alpha_angle_obs.select(i_sel_res) spot_pred_x_mm = spot_pred_x_mm.select(i_sel_res) spot_pred_y_mm = spot_pred_y_mm.select(i_sel_res) #Filter weak i_sel = (observations.data() / observations.sigmas()) > iparams.merge.sigma_min observations = observations.select(i_sel) alpha_angle_obs = alpha_angle_obs.select(i_sel) spot_pred_x_mm = spot_pred_x_mm.select(i_sel) spot_pred_y_mm = spot_pred_y_mm.select(i_sel) #filter icering (if on) if iparams.icering.flag_on: miller_indices = flex.miller_index() I_set = flex.double() sigI_set = flex.double() alpha_angle_obs_set = flex.double() spot_pred_x_mm_set = flex.double() spot_pred_y_mm_set = flex.double() for miller_index, d, I, sigI, alpha, spot_x, spot_y in zip( observations.indices(), observations.d_spacings().data(), observations.data(), observations.sigmas(), alpha_angle_obs, spot_pred_x_mm, spot_pred_y_mm): if d > iparams.icering.d_upper or d < iparams.icering.d_lower: miller_indices.append(miller_index) I_set.append(I) sigI_set.append(sigI) alpha_angle_obs_set.append(alpha) spot_pred_x_mm_set.append(spot_x) spot_pred_y_mm_set.append(spot_y) observations = observations.customized_copy(indices=miller_indices, data=I_set, sigmas=sigI_set) alpha_angle_obs = alpha_angle_obs_set[:] spot_pred_x_mm = spot_pred_x_mm_set[:] spot_pred_y_mm = spot_pred_y_mm_set[:] #replacing sigI (if set) if iparams.flag_replace_sigI: observations = observations.customized_copy( sigmas=flex.sqrt(observations.data())) inputs = observations, alpha_angle_obs, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, wavelength, crystal_init_orientation return inputs, 'OK'
def model_based_outliers(self, f_model, level=.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if (self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data()) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs='%7.3f') print >> self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())