def run(args, out=sys.stdout): usage_string = """\ mmtbx.validate_ligands model.pdb data.mtz LIGAND_CODE [...] Print out basic statistics for residue(s) with the given code(s), including electron density values/CC. """ import mmtbx.validation.ligands import mmtbx.command_line args_ = [] for arg in args: if (len(arg) == 3) and arg.isalnum() and (not os.path.exists(arg)): args_.append("ligand_code=%s" % arg) else: args_.append(arg) cmdline = mmtbx.command_line.load_model_and_data(args=args_, master_phil=master_phil(), process_pdb_file=False, usage_string=usage_string) params = cmdline.params if (params.ligand_code is None) or (len(params.ligand_code) == 0): raise Sorry("Ligand code required!") make_sub_header("Validating ligands", out=out) for ligand_code in params.ligand_code: validations = mmtbx.validation.ligands.validate_ligands( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, ligand_code=ligand_code, reference_structure=params.reference_structure, only_segid=params.only_segid) if (validations is None): raise Sorry("No ligands named '%s' found." % ligand_code) mmtbx.validation.ligands.show_validation_results( validations=validations, out=out, verbose=params.verbose)
def run(self): self.model = self.data_manager.get_model() # make_sub_header('Add H atoms', out=self.logger) reduce_add_h_obj = reduce_hydrogen.place_hydrogens( model=self.model, use_neutron_distances=self.params.use_neutron_distances, n_terminal_charge=self.params.n_terminal_charge) #import line_profiler #lp = line_profiler.LineProfiler(reduce_add_h_obj.run) #lp.enable() reduce_add_h_obj.run() #lp.disable() #lp.print_stats() self.model = reduce_add_h_obj.get_model() reduce_add_h_obj.show(log=self.logger) # make_sub_header('Optimize H atoms', out=self.logger) self.model = reduce_hydrogen.optimize(model=self.model) # if (self.params.output.file_name_prefix is not None): base = self.params.output.file_name_prefix else: fp = self.data_manager.get_default_model_name() base = os.path.splitext(os.path.basename(fp))[0] of = open("%s_hydrogenate.pdb" % base, "w") of.write(self.model.model_as_pdb()) of.close()
def show(self, log=null_out(), show_clashscore=True): """ Print all clashes in a table. """ make_sub_header(' Nonbonded overlaps', out=log) if self._clashes_dict: # General information results = self.get_results() result_str = '{:<18} : {:5d}' print(result_str.format(' Number of clashes', results.n_clashes), file=log) print(result_str.format(' Number of clashes due to symmetry', results.n_clashes_sym), file=log) result_str = '{:<18} : {:5.2f}' if show_clashscore: print(result_str.format(' Clashscore', results.clashscore), file=log) # print table with all overlaps labels = ["Overlapping residues info","model distance","overlap", "symmetry"] lbl_str = '{:^33}|{:^16}|{:^11}|{:^15}' table_str = '{:>16}|{:>16}|{:^16.2f}|{:^11.2}|{:^15}|' print('\n' + lbl_str.format(*labels), file=log) print('-'*78, file=log) atoms = self.model.get_atoms() for iseq_tuple, record in self._clashes_dict.iteritems(): i_seq, j_seq = iseq_tuple overlap = record[2] if record[4] is not None: symop = record[4] else: symop = '' i_id_str = atoms[i_seq].id_str().replace('pdb=','').replace('"','') j_id_str = atoms[j_seq].id_str().replace('pdb=','').replace('"','') line = [i_id_str, j_id_str,round(record[0], 2),round(overlap, 2), symop] print(table_str.format(*line), file=log) print('-'*78, file=log) else: print('No clashes found', file=log)
def show_cc_star(self, out=None): make_sub_header("CC* and related statistics", out=out) print >> out, """\ d_max d_min n_uniq compl. <I/sI> cc_1/2 cc* cc_work cc_free r_work r_free""" for k, bin in enumerate(self.bins): print >> out, bin.format_for_model_cc() print >> out, self.overall.format_for_model_cc()
def show_estimated_cutoffs (self, out=sys.stdout, prefix="") : print >> out, "" print >> out, "" def format_d_min (value) : if (value is None) : return "(use all data)" #% self.d_min_overall return "%7.3f" % value make_sub_header("Resolution cutoff estimates", out=out) print >> out, prefix + " resolution of all data : %7.3f" % \ self.overall.d_min cc_one_half_cut = self.estimate_d_min(min_cc_one_half=0.33) i_over_sigma_cut = self.estimate_d_min(min_i_over_sigma=2.0) r_merge_cut = self.estimate_d_min(max_r_merge=0.5) r_meas_cut = self.estimate_d_min(max_r_meas=0.5) cc_anom_cut = self.estimate_d_min(min_cc_anom=0.3) completeness_cut_conservative = self.estimate_d_min(min_completeness=0.9) completeness_cut_permissive = self.estimate_d_min(min_completeness=0.5) print >> out, prefix + " based on CC(1/2) >= 0.33 : %s" % \ format_d_min(cc_one_half_cut) print >> out, prefix + " based on mean(I/sigma) >= 2.0 : %s" % \ format_d_min(i_over_sigma_cut) print >> out, prefix + " based on R-merge < 0.5 : %s" % \ format_d_min(r_merge_cut) print >> out, prefix + " based on R-meas < 0.5 : %s" % \ format_d_min(r_meas_cut) print >> out, prefix + " based on completeness >= 90%% : %s" % \ format_d_min(completeness_cut_conservative) print >> out, prefix + " based on completeness >= 50%% : %s" % \ format_d_min(completeness_cut_permissive) print >> out, "" print >> out, "NOTE: we recommend using all data out to the CC(1/2) limit" print >> out, "for refinement."
def show (self, out=sys.stdout, prefix="", outliers_only=None, verbose=True) : for geo_type in self.__slots__ : rv = getattr(self, geo_type) if (rv.n_outliers > 0) or (not outliers_only) : make_sub_header(rv.label, out=out) rv.show(out=out)
def print_adps(self): make_sub_header(' ADPs ', out=self.log) pad1 = ' ' * 20 print(pad1, "min max mean n_iso n_aniso", file=self.log) for id_tuple, ligand_dict in self.items(): if len(ligand_dict) == 1: pad2 = ' ' * 4 lr = ligand_dict.values()[0] adps = lr.get_adps() print(lr.resname, lr.id_str, pad2, '%7s%7s%7s%7s%7s' % (round(adps.b_min, 1), round(adps.b_max, 1), round(adps.b_mean, 1), adps.n_iso, adps.n_aniso), file=self.log) else: pad2 = ' ' * 2 for altloc, lr in ligand_dict.items(): adps = lr.get_adps() print(lr.resname, lr.id_str, altloc, pad2, '%7s%7s%7s%7s%7s' % (round(adps.b_min, 1), round(adps.b_max, 1), round(adps.b_mean, 1), adps.n_iso, adps.n_aniso), file=self.log)
def show_cc_star (self, out=None) : make_sub_header("CC* and related statistics", out=out) print >> out, """\ d_max d_min n_uniq compl. <I/sI> cc_1/2 cc* cc_work cc_free r_work r_free""" for k, bin in enumerate(self.bins) : print >> out, bin.format_for_model_cc() print >> out, self.overall.format_for_model_cc()
def warn (self, text) : self._warnings.append(text) out_tmp = StringIO() make_sub_header("WARNING", out=out_tmp, sep='*') for line in out_tmp.getvalue().splitlines() : self.out.write("%s\n" % line.rstrip()) self.out.write(text)
def run(self): print('Using model file:', self.data_manager.get_default_model_name()) print('Using reflection file:', self.data_manager.get_default_miller_array_name()) cs = self.get_crystal_symmetry() model = self.data_manager.get_model() ph = model.get_hierarchy() xrs = model.get_xray_structure() if self.data_manager.get_default_miller_array_name(): f_obs, r_free_flags = self.get_fobs_rfree(crystal_symmetry = cs) print('\nInput data...', file=self.logger) print(' Reflection data:', f_obs.info().labels, file=self.logger) if (r_free_flags is not None): print(' Free-R flags:', r_free_flags.info().labels, file=self.logger) else: print(' Free-R flags: not present or not found', file=self.logger) fmodel = mmtbx.f_model.manager( f_obs = f_obs, r_free_flags = r_free_flags, xray_structure = xrs) # TODO: delete this keyword for production if self.params.update_scales: fmodel.update_all_scales() print('\nWorking crystal symmetry after inspecting all inputs:', file=self.logger) cs.show_summary(f=self.logger) # This is the new class, currently a stub but will be developed # winter 2018/spring 2019 by DL and NWM #t0 = time.time() ligand_manager = validate_ligands.manager( model = model, nproc = self.params.nproc, log = self.logger) ligand_manager.run() ligand_manager.print_ligand_counts() ligand_manager.print_ligand_occupancies() ligand_manager.print_adps() #print('time running manager: ', time.time()-t0) # TODO # DL: Eventually, delete "old" call below, but leave it for now to keep the # funcitonality alive, just in case if self.params.ligand_code and self.data_manager.get_default_miller_array_name() is not None: if (not(self.params.ligand_code is None or self.params.ligand_code[0] is None)): make_sub_header("Validating ligands", out=self.logger) for ligand_code in self.params.ligand_code : validations = mmtbx.validation.ligands.validate_ligands( pdb_hierarchy = ph, fmodel = fmodel, ligand_code = ligand_code, reference_structure = self.params.reference_structure, only_segid = self.params.only_segid) if (validations is None): raise Sorry("No ligands named '%s' found." % ligand_code) mmtbx.validation.ligands.show_validation_results(validations=validations, out = self.logger, verbose = self.params.verbose)
def refine (self, title="Refining multi-conformer model", constrain_occupancies=Auto) : make_sub_header(title, out=self.out) t1 = time.time() extra_args = [] if constrain_occupancies : if (self.params.refinement.constrain_correlated_occupancies) : extra_args.append("constrain_correlated_3d_groups=True") else : print >> self.out, " Correlated occupancies will *not* be constrained" from phenix.automation import refinement refined = refinement.refine_hires_simple( pdb_hierarchy=self.pdb_hierarchy, crystal_symmetry=self.fmodel.xray_structure, fmodel=self.fmodel, params=self.params.refinement, cif_files=self.cif_files, cycle=self.refine_cycle, extra_args=extra_args, out=self.out) # TODO need a verbosity flag t2 = time.time() print >> self.out, " refinement time: %.3fs" % (t2-t1) print >> self.out, "" self.pdb_hierarchy = refined.pdb_hierarchy self.fmodel = refined.fmodel self.fmodel.info().show_targets(out=self.out, text="refined model") self.map_file = refined.map_file self.refine_cycle += 1
def print_overall_results(self, overall_counts_hd, prefix='', log=None): if (log is None): log = self.log oc = overall_counts_hd make_sub_header('H/D atoms in the input model', out=log) self.hd_overall_values = [ ('Total number of hydrogen atoms' , oc.count_h), ('Total number of deuterium atoms' , oc.count_d), ('Number of H atoms (protein)' , oc.count_h_protein), ('Number of D atoms (protein)' , oc.count_d_protein), ('Number of H atoms (water)' , oc.count_h_water), ('Number of D atoms (water)' , oc.count_d_water), ('Number of H atoms (other)' , oc.count_h_other), ('Number of D atoms (other)' , oc.count_d_other), ] self.formatted_print(prefix, self.hd_overall_values, log) make_sub_header('Water molecules', out=log) self.hd_water_values = [ ('Number of water', oc.count_water), ('Number of water with 0 H (or D)', oc.count_water_0h), ('Number of water with 1 H (or D)', oc.count_water_1h), ('Number of water with 2 H (or D)', oc.count_water_2h), ('Number of water in alternative conformation', oc.count_water_altconf), ('Number of water without oxygen atom', oc.count_water_no_oxygen) ] self.formatted_print(prefix, self.hd_water_values, log)
def show_estimated_cutoffs(self, out=sys.stdout, prefix=""): print >> out, "" print >> out, "" def format_d_min(value): if (value is None): return "(use all data)" #% self.d_min_overall return "%7.3f" % value make_sub_header("Resolution cutoff estimates", out=out) print >> out, prefix + " resolution of all data : %7.3f" % \ self.overall.d_min cc_one_half_cut = self.estimate_d_min(min_cc_one_half=0.33) i_over_sigma_cut = self.estimate_d_min(min_i_over_sigma=2.0) r_merge_cut = self.estimate_d_min(max_r_merge=0.5) r_meas_cut = self.estimate_d_min(max_r_meas=0.5) cc_anom_cut = self.estimate_d_min(min_cc_anom=0.3) completeness_cut_conservative = self.estimate_d_min( min_completeness=0.9) completeness_cut_permissive = self.estimate_d_min(min_completeness=0.5) print >> out, prefix + " based on CC(1/2) >= 0.33 : %s" % \ format_d_min(cc_one_half_cut) print >> out, prefix + " based on mean(I/sigma) >= 2.0 : %s" % \ format_d_min(i_over_sigma_cut) print >> out, prefix + " based on R-merge < 0.5 : %s" % \ format_d_min(r_merge_cut) print >> out, prefix + " based on R-meas < 0.5 : %s" % \ format_d_min(r_meas_cut) print >> out, prefix + " based on completeness >= 90%% : %s" % \ format_d_min(completeness_cut_conservative) print >> out, prefix + " based on completeness >= 50%% : %s" % \ format_d_min(completeness_cut_permissive) print >> out, "" print >> out, "NOTE: we recommend using all data out to the CC(1/2) limit" print >> out, "for refinement."
def rejoin(self): make_sub_header("Re-joining identical conformers", out=self.out) pdb_hierarchy = self.pdb_hierarchy.deep_copy() n_modified = alternate_conformations.rejoin_split_single_conformers( pdb_hierarchy=pdb_hierarchy, crystal_symmetry=self.fmodel.xray_structure, model_error_ml=self.fmodel.model_error_ml(), params=self.params.merging, reset_occupancies=self.params.refinement. constrain_correlated_occupancies, verbose=self.verbose, log=self.out) if (n_modified > 0): self.pdb_hierarchy = pdb_hierarchy xray_structure = self.pdb_hierarchy.extract_xray_structure( crystal_symmetry=self.fmodel.xray_structure) self.fmodel.update_xray_structure(xray_structure) self.map_file = None alternate_conformations.finalize_model( pdb_hierarchy=self.pdb_hierarchy, xray_structure=self.pdb_hierarchy.extract_xray_structure( crystal_symmetry=self.fmodel.xray_structure), set_b_iso=None, convert_to_isotropic=False) return (n_modified > 0)
def warn(self, text): self._warnings.append(text) out_tmp = StringIO() make_sub_header("WARNING", out=out_tmp, sep='*') for line in out_tmp.getvalue().splitlines(): self.out.write("%s\n" % line.rstrip()) self.out.write(text)
def print_missing_HD_atoms(self, missing_HD_atoms, prefix, log=None): if (log is None): log = self.log make_sub_header('MISSING H or D atoms', out=log) for item in missing_HD_atoms: print('%s%s conformer %s : %s ' % (prefix, item[0][8:-1], item[2], ", ".join(item[1])), file=log)
def show(self, out=sys.stdout, prefix="", verbose=True): for geo_type in self.__geo_types__: rv = getattr(self, geo_type) make_sub_header(rv.restraint_label + "s", out=out) if (geo_type == "angles") and getattr(self, "_use_cdl", False): print >> out, " Using conformation-dependent library for mainchain "+\ "bond angle targets" print >> out, "" rv.show(out=out, prefix=prefix)
def show (self, out=sys.stdout, prefix="", verbose=True) : for geo_type in self.__geo_types__ : rv = getattr(self, geo_type) make_sub_header(rv.restraint_label + "s", out=out) if (geo_type == "angles") and getattr(self, "_use_cdl", False) : print >> out, " Using conformation-dependent library for mainchain "+\ "bond angle targets" print >> out, "" rv.show(out=out, prefix=prefix)
def print_results_hd_sites(self, count_exchanged_sites, hd_sites_analysis, overall_counts_hd, prefix='', log=None): if (log is None): log = self.log sites_different_xyz = hd_sites_analysis.sites_different_xyz sites_different_b = hd_sites_analysis.sites_different_b sites_sum_occ_not_1 = hd_sites_analysis.sites_sum_occ_not_1 sites_occ_sum_no_scattering = hd_sites_analysis.sites_occ_sum_no_scattering make_sub_header('H/D EXCHANGED SITES', out=log) self.hd_exchange_values = [ ('Number of H/D exchanged sites', count_exchanged_sites), ('Number of atoms modelled only as H', overall_counts_hd.count_h_protein - count_exchanged_sites), ('Number of atoms modelled only as D', overall_counts_hd.count_d_protein - count_exchanged_sites) ] self.formatted_print(prefix, self.hd_exchange_values, log) if sites_different_xyz: print('\n%sH/D pairs not at identical positions:' % prefix, file=log) for item in sites_different_xyz: print('%s %s and %s at distance %.3f' % \ (prefix, item[0][5:-1], item[1][5:-1], item[2]), file=log) if sites_different_b: print('\n%sH/D pairs without identical ADPs:' % prefix, file=log) for item in sites_different_b: print('%s %s and %s ' % (prefix, item[0][5:-1], item[1][5:-1]), file=log) if sites_sum_occ_not_1: print('\n%sH/D pairs with occupancy sum != 1:' % prefix, file=log) for item in sites_sum_occ_not_1: print('%s %s and %s with occupancy sum %s' % (prefix, item[0][5:-1], item[1][5:-1], item[2]), file=log) if sites_occ_sum_no_scattering: print( '\n%sRotatable H/D pairs with zero scattering occupancy sum:' % prefix, file=log) for item in sites_occ_sum_no_scattering: print('%s %s with occ %s and %s with occ %s' % (prefix, item[0][5:-1], item[2], item[1][5:-1], item[3]), file=log)
def print_renamed(self, renamed, prefix='', log=None): if (log is None): log = self.log make_sub_header('The following atoms were renamed:', out=log) for entry in renamed: id_str = entry[0] oldname = entry[2] newname = entry[1] print('%s%s atom %s --> %s' % (prefix, id_str, oldname, newname), file=log)
def __init__ (self, fmodel, pdb_hierarchy, params=None, processed_pdb_file=None, geometry_restraints_manager=None, cif_objects=(), cif_files=(), # XXX bug debug=None, verbose=True, out=sys.stdout) : adopt_init_args(self, locals()) if (self.params is None) : self.params = master_phil.extract().alt_confs self.extract_selection() self.refine_cycle = 1 self.map_file = None self.r_work_start = fmodel.r_work() self.r_free_start = fmodel.r_free() t_start = time.time() for i_cycle in range(params.macro_cycles) : n_alts = self.build_residue_conformers(stop_if_none=(i_cycle==0)) if (n_alts == 0) : if (i_cycle == 0) : raise Sorry("No alternate conformations found.") else : self.refine(constrain_occupancies=False) refine_again = self.params.refinement.constrain_correlated_occupancies if (self.rejoin()) : refine_again = True self.refine(title="Refining final model") make_header("Finished", out=out) from mmtbx.validation import molprobity validation = molprobity.molprobity( pdb_hierarchy=self.pdb_hierarchy, outliers_only=False) print >> self.out, "" validation.show_summary(out=self.out, prefix=" ") make_sub_header("Analyzing final model", out=out) analyze_model.process_pdb_hierarchy( pdb_hierarchy=self.pdb_hierarchy, validation=validation, log=self.out).show(out=out, verbose=self.verbose) print >> self.out, "" print >> self.out, "Start: r_work=%6.4f r_free=%6.4f" % \ (self.r_work_start, self.r_free_start) print >> self.out, "Final: r_work=%6.4f r_free=%6.4f" % \ (self.fmodel.r_work(), self.fmodel.r_free()) t_end = time.time() print >> self.out, "" print >> self.out, "Total runtime: %d s" % int(t_end - t_start) print >> self.out, ""
def run(args=(), params=None, out=None): if (out is None): out = sys.stdout if (params is None): import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil_string=master_phil, pdb_file_def="adp_statistics.pdb_file", cif_file_def="adp_statistics.cif_file", usage_string="""\ phenix.b_factor_statistics model.pdb [restraints.cif] [selection=...] Show statistics for atomic displacement parameters (ADPs) or B-factors, including TLS contribution if present.""") params = cmdline.work.extract() validate_params(params) import mmtbx.model import mmtbx.restraints from mmtbx.monomer_library import pdb_interpretation processed_pdb_file = pdb_interpretation.run( args=[params.adp_statistics.pdb_file] + params.adp_statistics.cif_file, substitute_non_crystallographic_unit_cell_if_necessary=True, log=out) geometry = processed_pdb_file.geometry_restraints_manager( show_energies=True) restraints_manager = mmtbx.restraints.manager(geometry=geometry, normalization=True) model = mmtbx.model.manager( xray_structure=processed_pdb_file.xray_structure(), pdb_hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy, restraints_manager=restraints_manager, log=out) make_sub_header("Analyzing model B-factors", out=out) if (params.adp_statistics.selection is not None): sel_cache = model.pdb_hierarchy().atom_selection_cache() selection = sel_cache.selection(params.adp_statistics.selection) n_sel = selection.count(True) if (n_sel == 0): raise Sorry("No atoms in selection!") else: model = model.select(selection) print >> out, "Extracted %d atoms in selection:" % n_sel print >> out, " %s" % params.adp_statistics.selection print >> out, "" stats = model.adp_statistics() stats.file_name = params.adp_statistics.pdb_file stats.selection = params.adp_statistics.selection stats.show_1(out=out) return stats
def run (args=(), params=None, out=None) : if (out is None) : out = sys.stdout if (params is None) : import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil_string=master_phil, pdb_file_def="adp_statistics.pdb_file", cif_file_def="adp_statistics.cif_file", usage_string="""\ phenix.b_factor_statistics model.pdb [restraints.cif] [selection=...] Show statistics for atomic displacement parameters (ADPs) or B-factors, including TLS contribution if present.""") params = cmdline.work.extract() validate_params(params) import mmtbx.model import mmtbx.restraints from mmtbx.monomer_library import pdb_interpretation processed_pdb_file = pdb_interpretation.run( args=[params.adp_statistics.pdb_file] + params.adp_statistics.cif_file, substitute_non_crystallographic_unit_cell_if_necessary=True, log=out) geometry = processed_pdb_file.geometry_restraints_manager(show_energies=True) restraints_manager = mmtbx.restraints.manager( geometry = geometry, normalization = True) model = mmtbx.model.manager( xray_structure = processed_pdb_file.xray_structure(), pdb_hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy, restraints_manager = restraints_manager, log = out) make_sub_header("Analyzing model B-factors", out=out) if (params.adp_statistics.selection is not None) : sel_cache = model.pdb_hierarchy().atom_selection_cache() selection = sel_cache.selection(params.adp_statistics.selection) n_sel = selection.count(True) if (n_sel == 0) : raise Sorry("No atoms in selection!") else : model = model.select(selection) print >> out, "Extracted %d atoms in selection:" % n_sel print >> out, " %s" % params.adp_statistics.selection print >> out, "" stats = model.adp_statistics() stats.file_name = params.adp_statistics.pdb_file stats.selection = params.adp_statistics.selection stats.show_1(out=out) return stats
def print_atoms_occ_lt_1(self, hd_atoms_with_occ_0, single_hd_atoms_occ_lt_1, prefix='', log=None): if (log is None): log = self.log if hd_atoms_with_occ_0: make_sub_header('H (or D) atoms with zero occupancy', out=log) for item in hd_atoms_with_occ_0: print('%s%s' % (prefix, item[0]), file=log) if single_hd_atoms_occ_lt_1: make_sub_header('H (or D) atoms with occupancy < 1', out=log) for item in single_hd_atoms_occ_lt_1: print('%s%s with occupancy %s' % (prefix, item[0], item[1]), file=log)
def check_work_root_folder(self): ''' make sure the script is executed in work_root_folder ''' make_sub_header('Check location') a1 = os.path.abspath(".") a2 = os.path.abspath(self.work_root_folder) print('Current folder: ', a1) print('Folder where script should be executed: ', a2) if a1 != a2: print('Wrong folder location') self.error = True else: print('OK')
def show_ligand_occupancies(self): make_sub_header(' Occupancies ', out=self.log) pad1 = ' ' * 20 print('If three values: min, max, mean, otherwise the same occupancy for entire ligand.', \ file=self.log) for id_tuple, ligand_dict in self.items(): for altloc, lr in ligand_dict.items(): occs = lr.get_occupancies() if (occs.occ_min == occs.occ_max): print(lr.id_str.ljust(16), occs.occ_min, file=self.log) else: print(lr.id_str.ljust(16), '%s %s %s' % (occs.occ_min, occs.occ_max, occs.occ_mean), file=self.log)
def print_outliers_bonds_angles(self, outliers_bonds, outliers_angles, prefix='', log=None): if (log is None): log = self.log if outliers_bonds: make_sub_header('Bond outliers', out=log) for item in outliers_bonds: print('%s%s, Bond %s, observed: %.3f, delta from target: %.3f' % \ (prefix, item[0], item[1], item[2], item[3]), file=log) if outliers_angles: make_sub_header('Angle outliers', out=log) for item in outliers_angles: print('%s%s, Angle %s, observed: %.3f, delta from target: %.3f' % \ (prefix, item[0], item[1], item[2], item[3]), file=self.log)
def run(args, out=sys.stdout): from mmtbx.validation import waters import mmtbx.command_line master_phil = mmtbx.command_line.generate_master_phil_with_inputs("") cmdline = mmtbx.command_line.load_model_and_data(args=args, master_phil=master_phil, process_pdb_file=False, out=out) result = waters.waters(pdb_hierarchy=cmdline.pdb_hierarchy, xray_structure=cmdline.xray_structure, fmodel=cmdline.fmodel, collect_all=True) make_sub_header("Solvent analysis", out=out) result.show(out=out) return result
def __init__(self, logger, folder, params): self.logger = logger self.folder = folder self.params = params self.prefix = os.path.basename(os.path.normpath(folder)) self.pdb_code = self.prefix[0:4] self.map_code = self.prefix[5:] self.success = True make_header('Model: %s (emdb %s)' % (self.pdb_code, self.map_code), out=self.logger) make_sub_header('Initializing', out=self.logger) self.prepare_directory() self.initialize_json()
def show_ccs(self): ''' Show results for correlation coefficients ''' if self.fmodel is None: return make_sub_header(' Correlation coefficients ', out=self.log) for id_tuple, ligand_dict in self.items(): for altloc, lr in ligand_dict.items(): ccs = lr.get_ccs() cc_two_fofc = round(ccs.cc_two_fofc, 2) cc_fofc = round(ccs.cc_fofc, 2) fofc_min = round(ccs.fofc_min, 2) fofc_max = round(ccs.fofc_max, 2) fofc_mean = round(ccs.fofc_mean, 2) print(lr.id_str.ljust(16), cc_two_fofc, cc_fofc, fofc_min, fofc_max, fofc_mean, file = self.log)
def show(self, log=null_out()): """ Print all hbonds in a table. """ make_sub_header(' Hydrogen bonds', out=log) if self._hbonds_dict: # General information results = self.get_results() result_str = '{:<18} : {:5d}' print(result_str.format(' Number of H bonds', results.n_hbonds), file=log) # print table with all H-bonds title1 = ['donor', 'acceptor', 'distance', 'angle'] title1_str = '{:^33}|{:^16}|{:^21}|{:^14}|' print('\n' + title1_str.format(*title1), file=log) title2 = ['X', 'H', 'A', 'H...A', 'X...A', 'X-H...A', 'symop'] title2_str = '{:^16}|{:^16}|{:^16}|{:^10}|{:^10}|{:^14}|{:^15}|' print(title2_str.format(*title2), file=log) # lbl_str = '{:^49}|{:^16}|{:^11}|{:^15}' # table_str = '{:>16}|{:>16}|{:^16.2f}|{:^11.2}|{:^15}|' table_str = '{:>16}|{:>16}|{:^16}|{:^10.2f}|{:^10.2f}|{:^14.2f}|{:^15}|' # print(lbl_str.format(*labels), file=log) print('-' * 99, file=log) atoms = self.model.get_atoms() for iseq_tuple, record in self._hbonds_dict.iteritems(): iseq_x, iseq_h, iseq_a = iseq_tuple if record[4] is not None: symop = record[4] else: symop = '' x_id_str = atoms[iseq_x].id_str().replace('pdb=', '').replace('"', '') h_id_str = atoms[iseq_h].id_str().replace('pdb=', '').replace('"', '') a_id_str = atoms[iseq_a].id_str().replace('pdb=', '').replace('"', '') line = [ x_id_str, h_id_str, a_id_str, round(record[0], 2), round(record[1], 2), round(record[2], 2), symop ] print(table_str.format(*line), file=log) print('-' * 99, file=log) else: print('No hbonds found', file=log)
def show_adps(self): ''' Show results for ADPs of ligand and surrounding atoms ''' make_sub_header(' ADPs ', out=self.log) pad1 = ' '*18 print(pad1, "min max mean n_iso n_aniso", file=self.log) for id_tuple, ligand_dict in self.items(): for altloc, lr in ligand_dict.items(): adps = lr.get_adps() print(lr.id_str.ljust(14), '%7s%7s%7s%7s%7s' % (round(adps.b_min,1), round(adps.b_max,1), round(adps.b_mean,1), adps.n_iso, adps.n_aniso), file = self.log) if (adps.b_mean_within is not None): print('neighbors'.ljust(14), '%7s%7s%7s' % (round(adps.b_min_within,1), round(adps.b_max_within,1), round(adps.b_mean_within,1) ), file = self.log)
def get_folders_sorted_by_size(self): if self.error: return make_sub_header('Get map folders') folders = [] size = flex.double() for d in os.listdir(emdb): dm = emdb + d + "/map/" if (not os.path.isdir(dm)): continue map_file = dm + os.listdir(dm)[0] if (os.path.isfile(map_file)): folders.append(d) size.append(os.path.getsize(map_file)) tmp = [] for i in flex.sort_permutation(size): tmp.append(folders[i]) print('Number of folders with map files: ', len(tmp)) return tmp
def run (args, out=sys.stdout, verbose=True) : import mmtbx.building.extend_sidechains import mmtbx.command_line input_out = out if (not verbose) : input_out = null_out() cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=get_master_phil(), process_pdb_file=False, out=input_out, usage_string="""\ mmtbx.extend_sidechains model.pdb data.mtz [restraints.cif] [options] Rebuild sidechains with missing non-hydrogen atoms. Includes real-space refinement (but needs work).""") params = cmdline.params prefix = os.path.splitext(os.path.basename(params.input.pdb.file_name[0]))[0] pdb_hierarchy = cmdline.pdb_hierarchy xray_structure = cmdline.xray_structure if (cmdline.params.input.sequence is not None) : from iotbx.bioinformatics import any_sequence_format sequences, nc = any_sequence_format(cmdline.params.input.sequence) make_sub_header("Correcting model sequence", out=out) n_changed = mmtbx.building.extend_sidechains.correct_sequence( pdb_hierarchy=pdb_hierarchy, sequences=sequences, out=out) if (n_changed == 0) : print >> out, " No modifications required." else : xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=xray_structure.crystal_symmetry()) cmdline.fmodel.update_xray_structure(xray_structure, update_f_calc=True) return mmtbx.building.extend_sidechains.extend_and_refine( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=cmdline.fmodel, params=params, prefix=prefix, cif_objects=[ co for fn, co in cmdline.cif_objects ], out=out, verbose=verbose, output_model=params.output_model, output_map_coeffs=params.output_map_coeffs)
def show (self, out=None, header=True) : if (out is None) : out = sys.stdout if (header) : make_sub_header("Merging statistics", out=out) self.overall.show_summary(out) print >> out, "" print >> out, "Redundancies%s:" % self.anom_extra n_obs = sorted(self.overall.redundancies.keys()) for x in n_obs : print >> out, " %d : %d" % (x, self.overall.redundancies[x]) print >> out, "" print >> out, """\ Statistics by resolution bin: d_max d_min #obs #uniq mult. %comp <I> <I/sI> r_mrg r_meas r_pim cc1/2 cc_ano""" for bin_stats in self.bins : print >> out, bin_stats.format() print >> out, self.overall.format()
def print_adps(self): make_sub_header(' ADPs ', out=self.log) pad1 = ' ' * 20 print(pad1, "min max mean n_iso n_aniso", file=self.log) for id_tuple, ligand_dict in self.items(): for altloc, lr in ligand_dict.items(): adps = lr.get_adps() print(lr.id_str.ljust(14), '%7s%7s%7s%7s%7s' % (round(adps.b_min, 1), round(adps.b_max, 1), round(adps.b_mean, 1), adps.n_iso, adps.n_aniso), file=self.log) print('neighbors'.ljust(14), '%7s%7s%7s' % (round(adps.b_min_within, 1), round( adps.b_max_within, 1), round(adps.b_mean_within, 1)), file=self.log)
def run (args, out=sys.stdout) : from mmtbx.validation import waters import mmtbx.command_line master_phil = mmtbx.command_line.generate_master_phil_with_inputs("") cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil, process_pdb_file=False, out=out) result = waters.waters( pdb_hierarchy=cmdline.pdb_hierarchy, xray_structure=cmdline.xray_structure, fmodel=cmdline.fmodel, collect_all=True) make_sub_header("Solvent analysis", out=out) result.show(out=out) return result
def run (args, out=sys.stdout) : from mmtbx.disorder import analyze_model import mmtbx.validation.molprobity import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), require_data=False, create_fmodel=True, process_pdb_file=True, usage_string="mmtbx.analyze_static_disorder model.pdb", out=out) hierarchy = cmdline.pdb_hierarchy params = cmdline.params validation = mmtbx.validation.molprobity.molprobity( pdb_hierarchy=hierarchy, xray_structure=cmdline.xray_structure, fmodel=cmdline.fmodel, crystal_symmetry=cmdline.crystal_symmetry, geometry_restraints_manager=cmdline.geometry, header_info=None, keep_hydrogens=False, outliers_only=False, nuclear=False) segments = [] make_header("Analyzing model", out=out) if (params.ignore_inconsistent_occupancy) : print >> out, "Discontinuous occupancies will be ignored." process = analyze_model.process_pdb_hierarchy( pdb_hierarchy=hierarchy, validation=validation, ignore_inconsistent_occupancy=params.ignore_inconsistent_occupancy, log=out) make_sub_header("MolProbity validation", out=out) validation.show_summary(out=out) make_sub_header("Disorder analysis", out=out) if (process.n_disordered == 0) : print >> out, "No alternate conformations found." else : process.show(out=out, verbose=params.verbose) if (params.pickle) : file_name = os.path.basename( os.path.splitext(params.input.pdb.file_name[0])[0]) + ".pkl" easy_pickle.dump(file_name, process) return process
def run(args, out=sys.stdout): from mmtbx.disorder import analyze_model import mmtbx.validation.molprobity import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), require_data=False, create_fmodel=True, process_pdb_file=True, usage_string="mmtbx.analyze_static_disorder model.pdb", out=out) hierarchy = cmdline.pdb_hierarchy params = cmdline.params validation = mmtbx.validation.molprobity.molprobity( pdb_hierarchy=hierarchy, xray_structure=cmdline.xray_structure, fmodel=cmdline.fmodel, crystal_symmetry=cmdline.crystal_symmetry, geometry_restraints_manager=cmdline.geometry, header_info=None, keep_hydrogens=False, outliers_only=False, nuclear=False) segments = [] make_header("Analyzing model", out=out) if (params.ignore_inconsistent_occupancy): print("Discontinuous occupancies will be ignored.", file=out) process = analyze_model.process_pdb_hierarchy( pdb_hierarchy=hierarchy, validation=validation, ignore_inconsistent_occupancy=params.ignore_inconsistent_occupancy, log=out) make_sub_header("MolProbity validation", out=out) validation.show_summary(out=out) make_sub_header("Disorder analysis", out=out) if (process.n_disordered == 0): print("No alternate conformations found.", file=out) else: process.show(out=out, verbose=params.verbose) if (params.pickle): file_name = os.path.basename( os.path.splitext(params.input.pdb.file_name[0])[0]) + ".pkl" easy_pickle.dump(file_name, process) return process
def analyze_waters (self, out=sys.stdout, debug=True, candidates=Auto) : """ Uses a SVM to analyze all of a model's water sites and decide whether to re-assign them as ions. Parameters ---------- out : file, optional debug : bool, optional candidates : list of str, optional Returns ------- list of svm_prediction """ waters = self._extract_waters() print >> out, " %d waters to analyze" % len(waters) print >> out, "" if (len(waters) == 0) : return #nproc = easy_mp.get_processes(self.nproc) predictions = [] for i_seq in waters : prediction = self.analyze_water( i_seq=i_seq, debug=debug, candidates=candidates, filter_outputs=self.params.svm.filtered_outputs) if (prediction is not None) : predictions.append(prediction) filtered = [] for result in predictions : if (debug) : result.show(out=out, prefix=" ") print >> out, "" if (result.final_choice is not None) : filtered.append(result) if (len(filtered) == 0) : print >> out, "" print >> out, " No waters could be classified as possible ions." else : make_sub_header("Predicted ions", out=out) for result in filtered : result.show_brief(out=out, prefix=" ") return filtered
def build_residue_conformers (self, stop_if_none=False) : self.extract_selection() print >> self.out, "" #self.fmodel.info().show_targets(out=self.out, text="starting model") make_sub_header("Fitting individual residues", out=self.out) t1 = time.time() params = self.params self.pdb_hierarchy, n_alternates = single_residue.build_cycle( pdb_hierarchy = self.pdb_hierarchy, fmodel = self.fmodel, geometry_restraints_manager = self.geometry_restraints_manager, params = params, cif_objects=self.cif_objects, selection=params.selection, nproc=params.nproc, verbose=self.verbose, debug=self.debug, out=self.out) if (n_alternates == 0) and (stop_if_none) : raise Sorry("No new conformations generated.") return n_alternates
def rejoin (self) : make_sub_header("Re-joining identical conformers", out=self.out) pdb_hierarchy = self.pdb_hierarchy.deep_copy() n_modified = alternate_conformations.rejoin_split_single_conformers( pdb_hierarchy=pdb_hierarchy, crystal_symmetry=self.fmodel.xray_structure, model_error_ml=self.fmodel.model_error_ml(), params=self.params.merging, reset_occupancies=self.params.refinement.constrain_correlated_occupancies, verbose=self.verbose, log=self.out) if (n_modified > 0) : self.pdb_hierarchy = pdb_hierarchy xray_structure = self.pdb_hierarchy.extract_xray_structure( crystal_symmetry=self.fmodel.xray_structure) self.fmodel.update_xray_structure(xray_structure) self.map_file = None alternate_conformations.finalize_model( pdb_hierarchy=self.pdb_hierarchy, xray_structure=self.pdb_hierarchy.extract_xray_structure( crystal_symmetry=self.fmodel.xray_structure), set_b_iso=None, convert_to_isotropic=False) return (n_modified > 0)
def run (args, out=sys.stdout) : usage_string = """\ mmtbx.validate_ligands model.pdb data.mtz LIGAND_CODE [...] Print out basic statistics for residue(s) with the given code(s), including electron density values/CC. """ import mmtbx.validation.ligands import mmtbx.command_line args_ = [] for arg in args : if (len(arg) == 3) and arg.isalnum() and (not os.path.exists(arg)) : args_.append("ligand_code=%s" % arg) else : args_.append(arg) cmdline = mmtbx.command_line.load_model_and_data( args=args_, master_phil=master_phil(), process_pdb_file=False, usage_string=usage_string) params = cmdline.params if (params.ligand_code is None) or (len(params.ligand_code) == 0) : raise Sorry("Ligand code required!") make_sub_header("Validating ligands", out=out) for ligand_code in params.ligand_code : validations = mmtbx.validation.ligands.validate_ligands( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, ligand_code=ligand_code, reference_structure=params.reference_structure, only_segid=params.only_segid) if (validations is None) : raise Sorry("No ligands named '%s' found." % ligand_code) mmtbx.validation.ligands.show_validation_results(validations=validations, out=out, verbose=params.verbose)
def make_sub_header(text, out=None): if (out is None): out = sys.stdout str_utils.make_sub_header(text, out=out, header_len=80)
def __init__ (self, model_names, model_data, f_obs, r_free_flags, params=None, skip_twin_detection=False, nproc=1, log=sys.stdout) : if (params is None) : params = master_phil.extract() self.model_names = model_names if (model_data is None) : from iotbx.file_reader import any_file model_data = [] for file_name in model_names : if (not os.path.isfile(file_name)) : raise RuntimeError("model_data is None, but %s is not a file." % file_name) model_in = any_file(file_name, force_type="pdb", raise_sorry_if_errors=True).file_object pdb_hierarchy = model_in.hierarchy xray_structure = model_in.xray_structure_simple() model_data.append((pdb_hierarchy, xray_structure)) self.model_symmetries = [] self.models_accepted = [] self.model_r_frees = [] self.f_obs = f_obs.resolution_filter(d_min=params.d_min) self.r_free_flags = r_free_flags.common_set(other=self.f_obs) self.skip_twin_detection = skip_twin_detection self.params = params self.evaluations = None self.best_xray_structure = None self.best_pdb_hierarchy = None self.best_result = None self.best_model_name = None from mmtbx.pdb_symmetry import rms_difference from iotbx import file_reader data_symmetry = f_obs.crystal_symmetry() data_space_group = data_symmetry.space_group() data_point_group = data_space_group.build_derived_point_group() data_unit_cell = data_symmetry.unit_cell() data_cell_edges = data_unit_cell.parameters()[0:3] data_cell_angles = data_unit_cell.parameters()[3:6] make_sub_header("Evaluating models", out=log) print >> log, "Experimental data:" print >> log, " space group: %s" % data_space_group.info() print >> log, " unit cell: %s" % ucf(data_unit_cell) pdb_hierarchies = [] xray_structures = [] for k, file_name in enumerate(model_names) : pdb_hierarchy, xray_structure = model_data[k] pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchies.append(pdb_hierarchy) model_symmetry = xray_structure.crystal_symmetry() self.model_symmetries.append(model_symmetry) if (model_symmetry is None) : print >> log, "Model %d is missing symmetry records:" % (k+1) print >> log, " source: %s" % file_name xray_structures.append(None) continue model_unit_cell = model_symmetry.unit_cell() model_space_group = model_symmetry.space_group() is_compatible_sg = False if (model_space_group == data_space_group) : is_compatible_sg = True else : model_point_group = model_space_group.build_derived_point_group() if (data_point_group == model_point_group) : is_compatible_sg = True if (not is_compatible_sg) : print >> log, "Model %d has incompatible space group:" % (k+1) print >> log, " source: %s" % file_name print >> log, " space group: %s" % model_space_group.info() xray_structures.append(None) continue is_similar_cell = False if (model_unit_cell.is_similar_to(data_unit_cell)) : is_similar_cell = True else : model_cell_edges = model_unit_cell.parameters()[0:3] model_cell_angles = model_unit_cell.parameters()[3:6] cell_edge_rmsd = rms_difference(model_cell_edges, data_cell_edges) cell_angle_rmsd = rms_difference(model_cell_angles, data_cell_angles) if ((cell_edge_rmsd <= params.max_cell_edge_rmsd) and (cell_angle_rmsd <= params.max_cell_angle_rmsd)) : is_similar_cell = True if (not is_similar_cell) : print >> log, "Model %d has incompatible space group:" % (k+1) print >> log, " source: %s" % file_name print >> log, " model: %s" % ucf(model_unit_cell) xray_structures.append(None) continue else : xray_structures.append(xray_structure) if (xray_structures.count(None) != len(xray_structures)) : print >> log, "" print >> log, "Calculating R-factors - will use %s processors." % nproc evaluations = easy_mp.parallel_map( func=self.evaluate_model, iterable=zip(xray_structures, pdb_hierarchies), processes=nproc) passed = [] for k, result in enumerate(evaluations) : if (result is not None) : if (result.r_free <= params.max_r_free) : passed.append((k, result)) if (len(passed) > 0) : passed.sort(lambda a,b: cmp(a[1].r_free, b[1].r_free)) i_result, result = passed[0] self.evaluations = passed self.best_xray_structure = result.xray_structure self.best_pdb_hierarchy = pdb_hierarchies[i_result] self.best_result = result self.best_model_name = self.model_names[i_result] self.show(out=log, verbose=True)
def strip_model ( pdb_hierarchy=None, xray_structure=None, file_name=None, params=None, remove_waters=True, remove_hydrogens=True, remove_alt_confs=True, convert_semet_to_met=True, convert_to_isotropic=True, reset_occupancies=True, remove_ligands=False, reset_hetatm_flag=False, preserve_remarks=False, preserve_symmetry=True, add_remarks=None, output_file=None, log=None) : """ Utility for removing extraneous records from a model intended for use in molecular replacement, etc., including waters, alternate conformations, and other features specific to a particular dataset. """ if (params is not None) : remove_waters = params.remove_waters remove_hydrogens = params.remove_hydrogens remove_alt_confs = params.remove_alt_confs convert_semet_to_met = params.convert_semet_to_met convert_to_isotropic = params.convert_to_isotropic reset_occupancies = params.reset_occupancies remove_ligands = params.remove_ligands reset_hetatm_flag = params.reset_hetatm_flag if (log is None) : log = null_out() make_sub_header("Processing input model", out=log) from mmtbx import pdbtools remarks = None if (file_name is not None) : print >> log, "Reading model from %s" % file_name assert ([pdb_hierarchy, xray_structure] == [None, None]) from iotbx import file_reader pdb_in = file_reader.any_file(file_name, force_type="pdb", raise_sorry_if_errors=True) pdb_in.check_file_type("pdb") remarks = pdb_in.file_object.input.remark_section() pdb_hierarchy = pdb_in.file_object.hierarchy xray_structure = pdb_in.file_object.xray_structure_simple() else : # XXX work with copies, not the original structure pdb_hierarchy = pdb_hierarchy.deep_copy() xray_structure = xray_structure.deep_copy_scatterers() pdb_hierarchy.atoms().reset_i_seq() if (len(pdb_hierarchy.models()) > 1) : raise Sorry("Multiple models not supported.") if (remove_hydrogens) : sele = ~(xray_structure.hd_selection()) n_hd = sele.count(False) if (n_hd > 0) : pdb_hierarchy = pdb_hierarchy.select(sele) xray_structure = xray_structure.select(sele) print >> log, " removed %d hydrogens" % n_hd pdb_hierarchy.atoms().reset_i_seq() if (remove_waters) : sele = pdb_hierarchy.atom_selection_cache().selection("not (resname HOH)") n_wat = sele.count(False) if (n_wat > 0) : pdb_hierarchy = pdb_hierarchy.select(sele) xray_structure = xray_structure.select(sele) print >> log, " removed %d waters" % n_wat pdb_hierarchy.atoms().reset_i_seq() assert_identical_id_str = True if (remove_alt_confs) : n_atoms_start = xray_structure.scatterers().size() pdbtools.remove_alt_confs(pdb_hierarchy) i_seqs = pdb_hierarchy.atoms().extract_i_seq() n_atoms_end = i_seqs.size() if (n_atoms_end != n_atoms_start) : print >> log, " removed %d atoms in alternate conformations" % \ (n_atoms_end - n_atoms_start) assert_identical_id_str = False xray_structure = xray_structure.select(i_seqs) pdb_hierarchy.atoms().reset_i_seq() if (convert_semet_to_met) : # XXX need to start from a copy here because the atom-parent relationship # seems to be messed up otherwise. this is probably a bug. pdb_hierarchy = pdb_hierarchy.deep_copy() n_mse = pdbtools.convert_semet_to_met( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure) if (n_mse > 0) : print >> log, " removed %d selenomethionine (MSE) residues" % n_mse assert_identical_id_str = False open("tmp1.pdb", "w").write(pdb_hierarchy.as_pdb_string()) sel = pdb_hierarchy.atom_selection_cache().selection assert sel("resname MSE").count(True) == 0 if (convert_to_isotropic) : xray_structure.convert_to_isotropic() pdb_hierarchy.adopt_xray_structure(xray_structure, assert_identical_id_str=assert_identical_id_str) print >> log, " converted all atoms to isotropic B-factors" if (reset_occupancies) : assert (remove_alt_confs) xray_structure.adjust_occupancy(occ_max=1.0, occ_min=1.0) pdb_hierarchy.adopt_xray_structure(xray_structure, assert_identical_id_str=assert_identical_id_str) print >> log, " reset occupancy to 1.0 for all atoms" if (reset_hetatm_flag) : for atom in pdb_hierarchy.atoms() : atom.hetero = False if (remove_ligands) : pdb_hierarchy.atoms().reset_i_seq() model = pdb_hierarchy.only_model() for chain in model.chains() : if (not chain.is_protein()) and (not chain.is_na()) : print >> log, " removing %d ligand atoms in chain '%s'" % \ (len(chain.atoms()), chain.id) model.remove_chain(chain) i_seqs = pdb_hierarchy.atoms().extract_i_seq() xray_structure = xray_structure.select(i_seqs) pdb_hierarchy.atoms().reset_i_seq() assert xray_structure.scatterers().size() == pdb_hierarchy.atoms_size() if (output_file is not None) : f = open(output_file, "w") if (add_remarks is not None) : f.write("\n".join(add_remarks)) f.write("\n") if (preserve_remarks) and (remarks is not None) : f.write("\n".join(remarks)) f.write("\n") symm = None if (preserve_symmetry) : symm = xray_structure f.write(pdb_hierarchy.as_pdb_string(crystal_symmetry=symm)) f.close() print >> log, " wrote model to %s" % output_file return pdb_hierarchy, xray_structure
def run(args, log=sys.stdout): print >> log, "-"*79 print >> log, legend print >> log, "-"*79 inputs = mmtbx.utils.process_command_line_args(args = args, master_params = master_params()) params = inputs.params.extract() # estimate resolution d_min = params.resolution broadcast(m="Map resolution:", log=log) if(d_min is None): raise Sorry("Resolution is required.") print >> log, " d_min: %6.4f"%d_min # model broadcast(m="Input PDB:", log=log) file_names = inputs.pdb_file_names if(len(file_names) != 1): raise Sorry("PDB file has to given.") if(inputs.crystal_symmetry is None): raise Sorry("No crystal symmetry defined.") processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv = monomer_library.server.server(), ener_lib = monomer_library.server.ener_lib(), file_name = file_names[0], crystal_symmetry = inputs.crystal_symmetry, force_symmetry = True, log = None) ph = processed_pdb_file.all_chain_proxies.pdb_hierarchy if(len(ph.models())>1): raise Sorry("Only one model allowed.") xrs = processed_pdb_file.xray_structure() xrs.scattering_type_registry(table = params.scattering_table) xrs.show_summary(f=log, prefix=" ") # restraints sctr_keys = xrs.scattering_type_registry().type_count_dict().keys() has_hd = "H" in sctr_keys or "D" in sctr_keys geometry = processed_pdb_file.geometry_restraints_manager( show_energies = False, assume_hydrogens_all_missing = not has_hd, plain_pairs_radius = 5.0) # map broadcast(m="Input map:", log=log) if(inputs.ccp4_map is None): raise Sorry("Map file has to given.") inputs.ccp4_map.show_summary(prefix=" ") map_data = inputs.ccp4_map.map_data() print >> log, " Actual map (min,max,mean):", \ map_data.as_1d().min_max_mean().as_tuple() make_sub_header("Histogram of map values", out=log) md = map_data.as_1d() show_histogram(data=md, n_slots=10, data_min=flex.min(md), data_max=flex.max(md), log=log) # shift origin if needed shift_needed = not \ (map_data.focus_size_1d() > 0 and map_data.nd() == 3 and map_data.is_0_based()) if(shift_needed): N = map_data.all() O=map_data.origin() map_data = map_data.shift_origin() # apply same shift to the model a,b,c = xrs.crystal_symmetry().unit_cell().parameters()[:3] sites_cart = xrs.sites_cart() sx,sy,sz = a/N[0]*O[0], b/N[1]*O[1], c/N[2]*O[2] sites_cart_shifted = sites_cart-\ flex.vec3_double(sites_cart.size(), [sx,sy,sz]) xrs.set_sites_cart(sites_cart_shifted) #### # Compute and show all stats #### broadcast(m="Model statistics:", log=log) make_sub_header("Overall", out=log) ms = model_statistics.geometry( pdb_hierarchy = ph, restraints_manager = geometry, molprobity_scores = True) ms.show() make_sub_header("Histogram of devations from ideal bonds", out=log) show_histogram(data=ms.bond_deltas, n_slots=10, data_min=0, data_max=0.2, log=log) # make_sub_header("Histogram of devations from ideal angles", out=log) show_histogram(data=ms.angle_deltas, n_slots=10, data_min=0, data_max=30., log=log) # make_sub_header("Histogram of non-bonded distances", out=log) show_histogram(data=ms.nonbonded_distances, n_slots=10, data_min=0, data_max=5., log=log) # make_sub_header("Histogram of ADPs", out=log) bs = xrs.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) show_histogram(data=bs, n_slots=10, data_min=flex.min(bs), data_max=flex.max(bs), log=log) # # Compute FSC(map, model) broadcast(m="Map-model FSC:", log=log) mmtbx.maps.correlation.fsc_model_map( xray_structure=xrs, map=map_data, d_min=d_min, log=log) # # various CC cc_calculator = mmtbx.maps.correlation.from_map_and_xray_structure_or_fmodel( xray_structure = xrs, map_data = map_data, d_min = d_min) broadcast(m="Map-model CC:", log=log) print >> log, "Overall (entire box): %6.4f"%cc_calculator.cc() print >> log, "Around atoms (masked): %6.4f"%cc_calculator.cc( selection=flex.bool(xrs.scatterers().size(), True)) # per chain print >> log, "Per chain:" for chain in ph.chains(): print >> log, " chain %s: %6.4f"%(chain.id, cc_calculator.cc( selection=chain.atoms().extract_i_seq())) # per residue print >> log, "Per residue:" for rg in ph.residue_groups(): cc = cc_calculator.cc(selection=rg.atoms().extract_i_seq()) print >> log, " chain id: %s resid %s: %6.4f"%( rg.parent().id, rg.resid(), cc) # per residue detailed counts print >> log, "Per residue (histogram):" crystal_gridding = maptbx.crystal_gridding( unit_cell = xrs.unit_cell(), space_group_info = xrs.space_group_info(), pre_determined_n_real = map_data.accessor().all()) f_calc = xrs.structure_factors(d_min=d_min).f_calc() fft_map = miller.fft_map( crystal_gridding = crystal_gridding, fourier_coefficients = f_calc) fft_map.apply_sigma_scaling() map_model = fft_map.real_map_unpadded() sites_cart = xrs.sites_cart() cc_per_residue = flex.double() for rg in ph.residue_groups(): cc = mmtbx.maps.correlation.from_map_map_atoms( map_1 = map_data, map_2 = map_model, sites_cart = sites_cart.select(rg.atoms().extract_i_seq()), unit_cell = xrs.unit_cell(), radius = 2.) cc_per_residue.append(cc) show_histogram(data=cc_per_residue, n_slots=10, data_min=-1., data_max=1.0, log=log)
def find_and_build_ions ( manager, fmodels, model, wavelength, params, nproc=1, elements=Auto, out=None, run_ordered_solvent=False, occupancy_strategy_enabled=False, group_anomalous_strategy_enabled=False, use_svm=None) : """ Analyzes the water molecules in a structure and re-labels them as ions if they scatter and bind environments that we expect of that ion. Parameters ---------- manager : mmtbx.ions.identity.manager fmodels : mmtbx.fmodels model : mmtbx.model.manager wavelength : float params : libtbx.phil.scope_extract nproc : int, optional elements : list of str, optional out : file, optional run_ordered_solvent : bool, optional occupancy_strategy_enabled : bool, optional group_anomalous_strategy_enabled : bool, optional use_svm : bool, optional See Also -------- mmtbx.ions.identify.manager.analyze_waters """ import mmtbx.refinement.minimization from mmtbx.refinement.anomalous_scatterer_groups import \ get_single_atom_selection_string from mmtbx.refinement import anomalous_scatterer_groups import mmtbx.ions.identify import mmtbx.ions.svm from cctbx.eltbx import sasaki from cctbx import crystal from cctbx import adptbx from cctbx import xray from scitbx.array_family import flex import scitbx.lbfgs if (use_svm is None) : use_svm = getattr(params, "use_svm", False) assert (1.0 >= params.initial_occupancy >= 0) fmodel = fmodels.fmodel_xray() anomalous_flag = fmodel.f_obs().anomalous_flag() if (out is None) : out = sys.stdout model.xray_structure = fmodel.xray_structure model.xray_structure.tidy_us() pdb_hierarchy = model.pdb_hierarchy(sync_with_xray_structure=True) pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() # FIXME why does B for anisotropic waters end up negative? u_iso = model.xray_structure.extract_u_iso_or_u_equiv() for i_seq, atom in enumerate(pdb_atoms) : labels = atom.fetch_labels() if (labels.resname == "HOH") and (atom.b < 0) : assert (u_iso[i_seq] >= 0) atom.b = adptbx.u_as_b(u_iso[i_seq]) if (manager is None) : manager_class = None if (use_svm) : manager_class = mmtbx.ions.svm.manager if params.svm.svm_name == "merged_high_res" : params.find_anomalous_substructure = False params.use_phaser = False manager = mmtbx.ions.identify.create_manager( pdb_hierarchy=pdb_hierarchy, geometry_restraints_manager=model.restraints_manager.geometry, fmodel=fmodel, wavelength=wavelength, params=params, nproc=nproc, verbose=params.debug, log=out, manager_class=manager_class) else : grm = model.restraints_manager.geometry connectivity = grm.shell_sym_tables[0].full_simple_connectivity() manager.update_structure( pdb_hierarchy=pdb_hierarchy, xray_structure=fmodel.xray_structure, connectivity=connectivity, log=out) manager.update_maps() model.update_anomalous_groups(out=out) make_sub_header("Analyzing water molecules", out=out) manager.show_current_scattering_statistics(out=out) anomalous_groups = [] # XXX somehow comma-separation of phil strings fields doesn't work if (isinstance(elements, list)) and (len(elements) == 1) : elements = elements[0].split(",") water_ion_candidates = manager.analyze_waters( out=out, candidates=elements) modified_iselection = flex.size_t() default_b_iso = manager.get_initial_b_iso() # Build in the identified ions for_building = [] if (use_svm) : for result in water_ion_candidates : for_building.append((result.i_seq, result.final_choice)) else : for i_seq, final_choices, two_fofc in water_ion_candidates : if (len(final_choices) == 1) : for_building.append((i_seq, final_choices[0])) skipped = [] if (len(for_building) > 0) : make_sub_header("Adding %d ions to model" % len(for_building), out) for k, (i_seq, final_choice) in enumerate(for_building) : atom = manager.pdb_atoms[i_seq] skip = False for other_i_seq, other_ion in for_building[:k] : if (other_i_seq in skipped) : continue if (((other_ion.charge > 0) and (final_choice.charge > 0)) or ((other_ion.charge < 0) and (final_choice.charge < 0))) : other_atom = manager.pdb_atoms[other_i_seq] dxyz = atom.distance(other_atom) if (dxyz < params.max_distance_between_like_charges) : print >> out, \ " %s (%s%+d) is only %.3fA from %s (%s%+d), skipping for now" %\ (atom.id_str(), final_choice.element, final_choice.charge, dxyz, other_atom.id_str(), other_ion.element, other_ion.charge) skipped.append(i_seq) skip = True break if (skip) : continue print >> out, " %s becomes %s%+d" % \ (atom.id_str(), final_choice.element, final_choice.charge) refine_adp = params.refine_ion_adp if (refine_adp == "Auto") : if (fmodel.f_obs().d_min() <= 1.5) : refine_adp = "anisotropic" elif (fmodel.f_obs().d_min() < 2.5) : atomic_number = sasaki.table(final_choice.element).atomic_number() if (atomic_number >= 19) : refine_adp = "anisotropic" # Modify the atom object - this is clumsy but they will be grouped into # a single chain at the end of refinement initial_b_iso = params.initial_b_iso if (initial_b_iso is Auto) : initial_b_iso = manager.guess_b_iso_real(i_seq) element = final_choice.element if (element == "IOD") : # FIXME element = "I" modified_atom = model.convert_atom( i_seq=i_seq, scattering_type=final_choice.scattering_type(), atom_name=element, element=element, charge=final_choice.charge, residue_name=final_choice.element, initial_occupancy=params.initial_occupancy, initial_b_iso=initial_b_iso, chain_id=params.ion_chain_id, segid="ION", refine_adp=refine_adp, refine_occupancies=False) #params.refine_ion_occupancies) if (params.refine_anomalous) and (anomalous_flag) : scatterer = model.xray_structure.scatterers()[i_seq] if (wavelength is not None) : fp_fdp_info = sasaki.table(final_choice.element).at_angstrom( wavelength) scatterer.fp = fp_fdp_info.fp() scatterer.fdp = fp_fdp_info.fdp() print >> out, " setting f'=%g, f''=%g" % (scatterer.fp, scatterer.fdp) group = xray.anomalous_scatterer_group( iselection=flex.size_t([i_seq]), f_prime=scatterer.fp, f_double_prime=scatterer.fdp, refine=["f_prime","f_double_prime"], selection_string=get_single_atom_selection_string(modified_atom), update_from_selection=True) anomalous_groups.append(group) modified_iselection.append(i_seq) if (len(modified_iselection) > 0) : scatterers = model.xray_structure.scatterers() # FIXME not sure this is actually working as desired... site_symmetry_table = model.xray_structure.site_symmetry_table() for i_seq in site_symmetry_table.special_position_indices() : scatterers[i_seq].site = crystal.correct_special_position( crystal_symmetry=model.xray_structure, special_op=site_symmetry_table.get(i_seq).special_op(), site_frac=scatterers[i_seq].site, site_label=scatterers[i_seq].label, tolerance=1.0) model.xray_structure.replace_scatterers(scatterers=scatterers) def show_r_factors () : return "r_work=%6.4f r_free=%6.4f" % (fmodel.r_work(), fmodel.r_free()) fmodel.update_xray_structure( xray_structure=model.xray_structure, update_f_calc=True, update_f_mask=True) n_anom = len(anomalous_groups) refine_anomalous = anomalous_flag and params.refine_anomalous and n_anom>0 refine_occupancies = ((params.refine_ion_occupancies or refine_anomalous) and ((not occupancy_strategy_enabled) or (model.refinement_flags.s_occupancies is None) or (len(model.refinement_flags.s_occupancies) == 0))) if (refine_anomalous) : if ((model.anomalous_scatterer_groups is not None) and (group_anomalous_strategy_enabled)) : model.anomalous_scatterer_groups.extend(anomalous_groups) refine_anomalous = False if (refine_occupancies) or (refine_anomalous) : print >> out, "" print >> out, " occupancy refinement (new ions only): start %s" % \ show_r_factors() fmodel.xray_structure.scatterers().flags_set_grads(state = False) fmodel.xray_structure.scatterers().flags_set_grad_occupancy( iselection = modified_iselection) lbfgs_termination_params = scitbx.lbfgs.termination_parameters( max_iterations = 25) minimized = mmtbx.refinement.minimization.lbfgs( restraints_manager = None, fmodels = fmodels, model = model, is_neutron_scat_table = False, lbfgs_termination_params = lbfgs_termination_params) fmodel.xray_structure.adjust_occupancy( occ_max = 1.0, occ_min = 0, selection = modified_iselection) zero_occ = [] for i_seq in modified_iselection : occ = fmodel.xray_structure.scatterers()[i_seq].occupancy if (occ == 0) : zero_occ.append(i_seq) fmodel.update_xray_structure( update_f_calc=True, update_f_mask=True) print >> out, " final %s" % \ show_r_factors() if (len(zero_occ) > 0) : print >> out, " WARNING: occupancy dropped to zero for %d atoms:" atoms = model.pdb_hierarchy().atoms() for i_seq in zero_occ : print >> out, " %s" % atoms[i_seq].id_str(suppress_segid=True) print >> out, "" if (refine_anomalous) : assert fmodel.f_obs().anomalous_flag() print >> out, " anomalous refinement (new ions only): start %s" % \ show_r_factors() fmodel.update(target_name="ls") anomalous_scatterer_groups.minimizer( fmodel=fmodel, groups=anomalous_groups) fmodel.update(target_name="ml") print >> out, " final %s" % \ show_r_factors() print >> out, "" return manager
def run_resolve (self) : from solve_resolve.resolve_python import resolve_in_memory from iotbx import pdb from scitbx.array_family import flex make_sub_header("RESOLVE build", out=self.out) mean_density_start = self.mean_density_at_sites() cc_start = self.cc_model_map() sites_start = self.get_selected_sites(hydrogens=False) t1 = time.time() pdb_inp = self.box_selected_hierarchy.as_pdb_input() inp_hierarchy = pdb_inp.construct_hierarchy() chain = inp_hierarchy.only_model().only_chain() first_resseq = chain.residue_groups()[0].resseq_as_int() seq = "".join(chain.only_conformer().as_sequence(substitute_unknown='A')) resolve_args = [ "start_chain 1 %d" % first_resseq, "extend_only", "skip_hetatm", "no_merge_ncs_copies", "no_optimize_ncs", "i_ran_seed %d" % int(time.time() % os.getpid()), ] if (self.params.build_new_loop) : # XXX not really working... n_res = len(chain.residue_groups()) assert (n_res >= 3) k = 0 for residue_group in chain.residue_groups()[1:-1] : print >> self.out, " removing residue group %s %s" % \ (chain.id, residue_group.resid()) chain.remove_residue_group(residue_group) resolve_args.extend([ "loop_only", "build_outside_model", "no_sub_segments", "n_random_loop %d" % self.params.n_random_loop, "loop_length %d" % (n_res - 2), "rms_random_loop 0.3", "rho_min_main_low 0.5", "rho_min_main_base 0.5", "n_internal_start 0", ]) else : resolve_args.extend([ "rebuild_in_place", "replace_existing", "richardson_rotamers", "min_z_value_rho -3.0", "delta_phi 20.00", "dist_cut_base 3.0", "n_random_frag 0", "group_ca_length 4", "group_length 2", ]) out = null_out() if (self.debug) : out = self.out cmn = resolve_in_memory.run( map_coeffs=self.box_map_coeffs, pdb_inp=inp_hierarchy.as_pdb_input(), build=True, input_text="\n".join(resolve_args), chain_type="PROTEIN", seq_file_as_string=seq, out=out) new_pdb_input = pdb.input( source_info='string', lines=flex.split_lines(cmn.atom_db.pdb_out_as_string)) new_hierarchy = new_pdb_input.construct_hierarchy() print >> self.out, " %d atoms rebuilt" % len(new_hierarchy.atoms()) new_hierarchy.write_pdb_file("resolve.pdb") selection_moved = flex.size_t() sites_new = flex.vec3_double() for atom in new_hierarchy.atoms() : id_str = atom.id_str() if (not id_str in self.atom_id_mapping) : raise KeyError("Atom ID %s not recognized in RESOLVE model." % id_str) i_seq = self.atom_id_mapping[id_str] selection_moved.append(i_seq) sites_new.append(atom.xyz) sites_cart_selected = self.box_selected_hierarchy.atoms().extract_xyz() sites_cart_selected.set_selected(selection_moved, sites_new) self.box_selected_hierarchy.atoms().set_xyz(sites_cart_selected) sites_cart_box = self.box.xray_structure_box.sites_cart() sites_cart_box.set_selected(self.selection_in_box, sites_cart_selected) self.box.xray_structure_box.set_sites_cart(sites_cart_box) self.box.pdb_hierarchy_box.atoms().set_xyz(sites_cart_box) t2 = time.time() print >> self.out, " RESOLVE time: %.1fs" % (t2-t1) selection_rebuilt = self.selection_in_box.select(selection_moved) minimize_sel = flex.bool(self.n_sites_box, False).set_selected( self.selection_in_box, True).set_selected(selection_rebuilt, False) # atoms present in the selection but not in the RESOLVE model (usually # hydrogen atoms) need to be minimized to follow the rebuilt sites if (minimize_sel.count(True) > 0) : print >> self.out, " Performing geometry minimzation on unbuilt sites" self.geometry_minimization( selection=minimize_sel, nonbonded=False) self.box.write_pdb_file("box_resolve.pdb") # two alternatives here: restrain other atoms tightly, and minimize the # entire box, or restrain selected atoms loosely, and refine only those self.restrain_atoms( selection=self.others_in_box, reference_sigma=0.02) if (self.params.anneal) : self.anneal(start_temperature=2500) else : self.real_space_refine(selection=self.selection_all_box) self.box.write_pdb_file("box_refined.pdb") self.box.write_ccp4_map() mean_density_end = self.mean_density_at_sites() cc_end = self.cc_model_map() print >> self.out, " mean density level: start=%.2fsigma end=%.2fsigma" \ % (mean_density_start, mean_density_end) print >> self.out, " model-map CC: start=%.3f end=%.3f" % (cc_start, cc_end) sites_final = self.get_selected_sites(hydrogens=False) print >> self.out, " rmsd to starting model: %.3f Angstrom" % \ sites_final.rms_difference(sites_start) t3 = time.time() print >> self.out, " Total build and refine time: %.1fs" % (t3-t1)
def assemble_constraint_groups_3d ( xray_structure, pdb_atoms, constraint_groups, interaction_distance_cutoff=4.0, verbose=False, log=None) : """ Re-sorts occupancy constraint groups so that conformers whose motion is correlated (i.e. they interact in 3D, without necessarily being part of the same fragment/molecule/ASU) are grouped together. As input, it expects the constraint groups output by mmtbx.utils.occupancy_selections(), which will already have connectivity taken into account. This function will exit with an error if the occupancies for the new groups are not consistent. """ if (log is None) : log = null_out() make_sub_header("Correlated occupancy grouping", out=log) print >> log, """ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING - EXPERIMENTAL FEATURE !! !! !! !! Grouping of occupancy constraints in 3D is experimental and not fully !! !! tested. Use at your own risk! For bug reports, etc. contact us by !! !! email at [email protected]. !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """ occupancies = pdb_atoms.extract_occ() pair_asu_table = xray_structure.pair_asu_table( distance_cutoff=interaction_distance_cutoff) pair_sym_table = pair_asu_table.extract_pair_sym_table() k = 0 n_groups_start = len(constraint_groups) while (k < len(constraint_groups)) : groups = constraint_groups[k] print >> log, "Constraint group %d: %d conformers" % (k+1, len(groups)) merge_constraints = [] for i_sel, selection in enumerate(groups) : occ = occupancies.select(selection) altloc = pdb_atoms[selection[0]].fetch_labels().altloc print >> log, " conformer '%s': %d atoms" % (altloc, len(selection)) if (not occ.all_eq(occ[0])) : raise Sorry("At least one occupancy constraint group has "+ "inconsistent occupancies for atoms in a single conformer. To use "+ "the automatic 3D constraints, the starting occupancies must be "+ "uniform within each selection.") for i_seq in selection : labels = pdb_atoms[i_seq].fetch_labels() if (labels.altloc.strip() == '') : continue pair_sym_dict = pair_sym_table[i_seq] if (verbose) : print "%s (group %d):" % (pdb_atoms[i_seq].id_str(), k+1) for j_seq, sym_ops in pair_sym_dict.items() : kk = k + 1 while (kk < len(constraint_groups)) : combine_group = False for other_selection in constraint_groups[kk] : if (j_seq in other_selection) : if (verbose) : print " %s (group %d)" % (pdb_atoms[j_seq].id_str(), kk+1) merge_constraints.append(constraint_groups[kk]) del constraint_groups[kk] combine_group = True break if (not combine_group) : kk += 1 if (len(merge_constraints) > 0) : print >> log, "Merging %d constraint groups with group %d" % ( len(merge_constraints), (k+1)) for selection in groups : first_atom = pdb_atoms[selection[0]] altloc = first_atom.fetch_labels().altloc if (altloc.strip() == '') : raise RuntimeError(("Atom '%s' in occupancy constraint group has "+ "blank altloc ID") % first_atom.id_str()) for merge_groups in merge_constraints : kk = 0 while (kk < len(merge_groups)) : other_selection = merge_groups[kk] altloc2 = pdb_atoms[other_selection[0]].fetch_labels().altloc if (altloc2 == altloc) : print >> log, " combining %d atoms with altloc %s" % \ (len(other_selection), altloc) occ1 = occupancies.select(selection) occ2 = occupancies.select(other_selection) if (not occ1.all_eq(occ2[0])) or (not occ2.all_eq(occ1[0])) : raise Sorry( ("Inconsistent occupancies in spatially related groups "+ "(%.2f versus %.2f). To use automatic 3D occupancy "+ "restraints, the correlated conformers must start with "+ "the same initial occupancy.") % (occ1[0], occ2[0])) selection.extend(other_selection) del merge_groups[kk] else : kk += 1 for merge_groups in merge_constraints : if (len(merge_groups) > 0) : for other_selection in merge_groups : altloc = pdb_atoms[other_selection[0]].fetch_labels().altloc print >> log, (" warning: %d atoms with altloc %s do not "+ "correspond to an existing group") % (len(other_selection), altloc) groups.append(other_selection) k += 1 if (len(constraint_groups) != n_groups_start) : print >> log, "New occupancy constraint groups:" for i_group, constraint_group in enumerate(constraint_groups) : print >> log, " group %d:" % (i_group+1) for selection in constraint_group : resids = [] altlocs = set() for i_seq in selection : atom_group = pdb_atoms[i_seq].parent() ag_id = atom_group.id_str() altlocs.add(atom_group.altloc) if (not ag_id in resids) : resids.append(ag_id) assert len(altlocs) == 1 print >> log, " conformer '%s' (%d atoms):" % (list(altlocs)[0], len(selection)) for ag_id in resids : print >> log, " atom_group %s" % ag_id else : print >> log, "Occupancy constraint groups unmodified." print >> log, "" return constraint_groups
def filter_before_build ( pdb_hierarchy, fmodel, geometry_restraints_manager, selection=None, params=None, verbose=True, log=sys.stdout) : """ Pick residues suitable for building alternate conformations - by default, this means no MolProbity/geometry outliers, good fit to map, no missing atoms, and no pre-existing alternates, but with significant difference density nearby. """ from mmtbx.validation import molprobity from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from mmtbx import building from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (selection is None) : selection = flex.bool(fmodel.xray_structure.scatterers().size(), True) pdb_atoms = pdb_hierarchy.atoms() assert (pdb_atoms.size() == fmodel.xray_structure.scatterers().size()) pdb_atoms.reset_i_seq() full_validation = molprobity.molprobity( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry_restraints_manager, outliers_only=False, rotamer_library="8000") if (verbose) : full_validation.show(out=log) multi_criterion = full_validation.as_multi_criterion_view() if (params is None) : params = libtbx.phil.parse(filter_params_str).extract() mon_lib_srv = mmtbx.monomer_library.server.server() two_fofc_map, fofc_map = building.get_difference_maps(fmodel=fmodel) residues = [] filters = params.discard_outliers make_sub_header("Identifying candidates for building", out=log) # TODO parallelize for chain in pdb_hierarchy.only_model().chains() : if (not chain.is_protein()) : continue for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() id_str = residue_group.id_str() i_seqs = residue_group.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)) : continue if (len(atom_groups) > 1) : print >> log, " %s is already multi-conformer" % id_str continue atom_group = atom_groups[0] res_class = common_residue_names_get_class(atom_group.resname) if (res_class != "common_amino_acid") : print >> log, " %s: non-standard residue" % id_str continue missing_atoms = rotamer_eval.eval_residue_completeness( residue=atom_group, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0) : # residues modeled as pseudo-ALA are allowed by default; partially # missing sidechains are more problematic if ((building.is_stub_residue(atom_group)) and (not params.ignore_stub_residues)) : pass else : print >> log, " %s: missing or incomplete sidechain" % \ (id_str, len(missing_atoms)) continue validation = multi_criterion.get_residue_group_data(residue_group) is_outlier = is_validation_outlier(validation, params) if (is_outlier) : print >> log, " %s" % str(validation) continue if (params.use_difference_map) : i_seqs_no_hd = building.get_non_hydrogen_atom_indices(residue_group) map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=i_seqs_no_hd, xray_structure=fmodel.xray_structure, radius=params.sampling_radius) if ((map_stats.number_of_atoms_below_fofc_map_level() == 0) and (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) : if (verbose) : print >> log, " no difference density for %s" % id_str continue residues.append(residue_group.only_atom_group()) if (len(residues) == 0) : raise Sorry("No residues passed the filtering criteria.") print >> log, "" print >> log, "Alternate conformations will be tried for %d residue(s):" % \ len(residues) building.show_chain_resseq_ranges(residues, out=log, prefix=" ") print >> log, "" return residues
def run (args=None, params=None, out=sys.stdout) : assert [args, params].count(None) == 1 if args is not None: if (len(args) == 0) or ("--help" in args) : raise Usage(""" phenix.cc_star model.pdb data.mtz unmerged_data=data.hkl [n_bins=X] [options] phenix.cc_star model_refine_001.mtz unmerged_data=data.hkl [...] Implementation of the method for assessing data and model quality described in: Karplus PA & Diederichs K (2012) Science 336:1030-3. Full parameters: %s """ % master_phil.as_str(prefix=" ", attributes_level=1)) import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", reflection_file_def="data") params = cmdline.work.extract() import mmtbx.command_line import mmtbx.validation.experimental from iotbx import merging_statistics from iotbx import file_reader if (params.data is None) : raise Sorry("Please specify a data file (usually MTZ format).") if (params.unmerged_data is None) : raise Sorry("Please specify unmerged_data file") hkl_in = file_reader.any_file(params.data, force_type="hkl") hkl_in.check_file_type("hkl") f_model = f_obs = r_free_flags = None f_models = [] data_arrays = [] f_model_labels = [] if (params.f_model_labels is None) : for array in hkl_in.file_server.miller_arrays : labels = array.info().label_string() if (array.is_complex_array()) : if (labels.startswith("F-model") or labels.startswith("FMODEL")) : f_models.append(array) f_model_labels.append(labels) if (len(f_models) > 1) : raise Sorry(("Multiple F(model) arrays found:\n%s\nPlease specify the "+ "'labels' parameter.") % "\n".join(f_model_labels)) elif (len(f_models) == 1) : f_model = f_models[0] if (f_model.anomalous_flag()) : info = f_model.info() f_model = f_model.average_bijvoet_mates().set_info(info) print >> out, "F(model):" f_model.show_summary(f=out, prefix=" ") else : data_array = hkl_in.file_server.get_xray_data( file_name=params.data, labels=params.f_obs_labels, ignore_all_zeros=True, parameter_scope="") if (data_array.is_xray_intensity_array()) : from cctbx import french_wilson f_obs = french_wilson.french_wilson_scale( miller_array=data_array, out=out) else : f_obs = data_array else : for array in hkl_in.file_server.miller_arrays : array_labels = array.info().label_string() if (array_labels == params.f_model_labels) : if (array.is_complex_array()) : f_model = array break else : raise Sorry("The data in %s are not of the required type." % array_labels) if (f_model is not None) : assert (f_obs is None) for array in hkl_in.file_server.miller_arrays : labels = array.info().label_string() if (labels == params.f_obs_labels) : f_obs = array break else : try : f_obs = hkl_in.file_server.get_amplitudes( file_name=params.f_obs_labels, labels=None, convert_to_amplitudes_if_necessary=False, parameter_name="f_obs_labels", parameter_scope="", strict=True) except Sorry : raise Sorry("You must supply a file containing both F-obs and F-model "+ "if you want to use a pre-calculated F-model array.") assert (f_obs.is_xray_amplitude_array()) if (f_obs.anomalous_flag()) : info = f_obs.info() f_obs = f_obs.average_bijvoet_mates().set_info(info) print >> out, "F(obs):" f_obs.show_summary(f=out, prefix=" ") print >> out, "" r_free_flags, test_flag_value = hkl_in.file_server.get_r_free_flags( file_name=params.data, label=params.r_free_flags.label, test_flag_value=params.r_free_flags.test_flag_value, disable_suitability_test=False, parameter_scope="") info = r_free_flags.info() r_free_flags = r_free_flags.customized_copy( data=r_free_flags.data()==test_flag_value).set_info(info) if (r_free_flags.anomalous_flag()) : r_free_flags = r_free_flags.average_bijvoet_mates().set_info(info) print >> out, "R-free flags:" r_free_flags.show_summary(f=out, prefix=" ") print >> out, "" unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data( f_obs=f_obs, file_name=params.unmerged_data, data_labels=params.unmerged_labels, log=out) print >> out, "Unmerged intensities:" unmerged_i_obs.show_summary(f=out, prefix=" ") print >> out, "" if (f_model is None) : assert (f_obs is not None) if (params.model is None) : raise Sorry("A PDB file is required if F(model) is not pre-calculated.") make_sub_header("Calculating F(model)", out=out) pdb_in = file_reader.any_file(params.model, force_type="pdb") pdb_in.check_file_type("pdb") pdb_symm = pdb_in.file_object.crystal_symmetry() if (pdb_symm is None) : pdb_symm = f_obs else : if (f_obs.crystal_symmetry() is None) : f_obs = f_obs.customized_copy(crystal_symmetry=pdb_symm) elif (not pdb_symm.is_similar_symmetry(f_obs)) : mmtbx.command_line.show_symmetry_error( file1="PDB file", file2="data file", symm1=pdb_symm, symm2=f_obs) xray_structure = pdb_in.file_object.xray_structure_simple( crystal_symmetry=pdb_symm) from mmtbx.utils import fmodel_simple # XXX this gets done anyway later, but they need to be consistent before # creating the fmodel manager if (f_obs.anomalous_flag()) : f_obs = f_obs.average_bijvoet_mates() f_obs = f_obs.eliminate_sys_absent() f_obs, r_free_flags = f_obs.map_to_asu().common_sets( other=r_free_flags.map_to_asu()) fmodel = fmodel_simple( f_obs=f_obs, r_free_flags=r_free_flags, xray_structures=[xray_structure], skip_twin_detection=True, scattering_table="n_gaussian") fmodel.show(log=out) f_model = fmodel.f_model() r_free_flags = f_model.customized_copy(data=fmodel.arrays.free_sel) else : if (f_model.anomalous_flag()) : f_model = f_model.average_bijvoet_mates() f_model, r_free_flags = f_model.common_sets(other=r_free_flags) stats = mmtbx.validation.experimental.merging_and_model_statistics( f_model=f_model, f_obs=f_obs, r_free_flags=r_free_flags, unmerged_i_obs=unmerged_i_obs, n_bins=params.n_bins, sigma_filtering=params.sigma_filtering) stats.show_cc_star(out=out) if (params.loggraph) : stats.show_loggraph(out=out) print >> out, "" print >> out, "Reference:" print >> out, " Karplus PA & Diederichs K (2012) Science 336:1030-3." print >> out, "" return stats
def show (self, out=sys.stdout, outliers_only=True, suppress_summary=False, show_percentiles=False) : """ Comprehensive output with individual outlier lists, plus summary. """ if (self.xtriage is not None) : self.xtriage.summarize_issues().show(out=out) if (self.data_stats is not None) : make_header("Experimental data", out=out) self.data_stats.show(out=out, prefix=" ") if (self.real_space is not None) : make_sub_header("Residues with poor real-space CC", out=out) self.real_space.show(out=out, prefix=" ") if (self.waters is not None) : make_sub_header("Suspicious water molecules", out=out) self.waters.show(out=out, prefix=" ") if (self.model_stats is not None) : make_header("Model properties", out=out) self.model_stats.show(prefix=" ", out=out) if (self.restraints is not None) : make_header("Geometry restraints", out=out) self.restraints.show(out=out, prefix=" ") make_header("Molprobity validation", out=out) if (self.ramalyze is not None) : make_sub_header("Ramachandran angles", out=out) self.ramalyze.show(out=out, prefix=" ", outliers_only=outliers_only) ##### omegalyze ################################################################ if (self.omegalyze is not None) : make_sub_header("Omegalyze analysis", out=out) self.omegalyze.show(out=out, prefix=" ", outliers_only=outliers_only) ##### omegalyze ################################################################ if (self.rotalyze is not None) : make_sub_header("Sidechain rotamers", out=out) self.rotalyze.show(out=out, prefix=" ", outliers_only=outliers_only) if (self.cbetadev is not None) : make_sub_header("C-beta deviations", out=out) self.cbetadev.show(out=out, prefix=" ", outliers_only=outliers_only) if (self.clashes is not None) : make_sub_header("Bad clashes", out=out) self.clashes.show(out=out, prefix=" ") if (self.nqh_flips is not None) : make_sub_header("Asn/Gln/His flips", out=out) self.nqh_flips.show(out=out, prefix=" ") if (self.rna is not None) : make_header("RNA validation", out=out) self.rna.show(out=out, prefix=" ", outliers_only=outliers_only) if (not suppress_summary) : make_header("Summary", out=out) self.show_summary(out=out, prefix=" ", show_percentiles=show_percentiles) return self
def __init__ (self, args, master_phil, out=sys.stdout, process_pdb_file=True, require_data=True, create_fmodel=True, prefer_anomalous=None, force_non_anomalous=False, set_wavelength_from_model_header=False, set_inelastic_form_factors=None, usage_string=None, create_log_buffer=False, remove_unknown_scatterers=False, generate_input_phil=False) : import mmtbx.monomer_library.pdb_interpretation import mmtbx.monomer_library.server import mmtbx.utils from iotbx import crystal_symmetry_from_any from iotbx import file_reader import iotbx.phil if generate_input_phil : assert isinstance(master_phil, basestring) master_phil = generate_master_phil_with_inputs(phil_string=master_phil) if isinstance(master_phil, str) : master_phil = iotbx.phil.parse(master_phil) if (usage_string is not None) : if (len(args) == 0) or ("--help" in args) : raise Usage("""%s\n\nFull parameters:\n%s""" % (usage_string, master_phil.as_str(prefix=" "))) if (force_non_anomalous) : assert (not prefer_anomalous) assert (set_inelastic_form_factors in [None, "sasaki", "henke"]) self.args = args self.master_phil = master_phil self.processed_pdb_file = self.pdb_inp = None self.pdb_hierarchy = self.xray_structure = None self.geometry = None self.sequence = None self.fmodel = None self.f_obs = None self.r_free_flags = None self.intensity_flag = None self.raw_data = None self.raw_flags = None self.test_flag_value = None self.miller_arrays = None self.hl_coeffs = None self.cif_objects = [] self.log = out if ("--quiet" in args) or ("quiet=True" in args) : self.log = null_out() elif create_log_buffer : self.log = multi_out() self.log.register(label="stdout", file_object=out) self.log.register(label="log_buffer", file_object=StringIO()) make_header("Collecting inputs", out=self.log) cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="input.pdb.file_name", reflection_file_def="input.xray_data.file_name", cif_file_def="input.monomers.file_name", seq_file_def="input.sequence") self.working_phil = cmdline.work params = self.working_phil.extract() if len(params.input.pdb.file_name) == 0 : raise Sorry("At least one PDB file is required as input.") self.cif_file_names = params.input.monomers.file_name self.pdb_file_names = params.input.pdb.file_name # SYMMETRY HANDLING - PDB FILES self.crystal_symmetry = pdb_symm = None for pdb_file_name in params.input.pdb.file_name : pdb_symm = crystal_symmetry_from_any.extract_from(pdb_file_name) if (pdb_symm is not None) : break # DATA INPUT data_and_flags = hkl_symm = hkl_in = None if (params.input.xray_data.file_name is None) : if (require_data) : raise Sorry("At least one reflections file is required as input.") else : # FIXME this may still require that the data file has full crystal # symmetry defined (although for MTZ input this will not be a problem) make_sub_header("Processing X-ray data", out=self.log) hkl_in = file_reader.any_file(params.input.xray_data.file_name) hkl_in.check_file_type("hkl") hkl_server = hkl_in.file_server symm = hkl_server.miller_arrays[0].crystal_symmetry() if ((symm is None) or (symm.space_group() is None) or (symm.unit_cell() is None)) : if (pdb_symm is not None) : from iotbx.reflection_file_utils import reflection_file_server print >> self.log, \ "No symmetry in X-ray data file - using PDB symmetry:" pdb_symm.show_summary(f=out, prefix=" ") hkl_server = reflection_file_server( crystal_symmetry=pdb_symm, reflection_files=[hkl_in.file_object]) else : raise Sorry("No crystal symmetry information found in input files.") if (hkl_server is None) : hkl_server = hkl_in.file_server data_and_flags = mmtbx.utils.determine_data_and_flags( reflection_file_server=hkl_server, parameters=params.input.xray_data, data_parameter_scope="input.xray_data", flags_parameter_scope="input.xray_data.r_free_flags", prefer_anomalous=prefer_anomalous, force_non_anomalous=force_non_anomalous, log=self.log) self.intensity_flag = data_and_flags.intensity_flag self.raw_data = data_and_flags.raw_data self.raw_flags = data_and_flags.raw_flags self.test_flag_value = data_and_flags.test_flag_value self.f_obs = data_and_flags.f_obs self.r_free_flags = data_and_flags.r_free_flags self.miller_arrays = hkl_in.file_server.miller_arrays hkl_symm = self.raw_data.crystal_symmetry() if len(self.cif_file_names) > 0 : for file_name in self.cif_file_names : cif_obj = mmtbx.monomer_library.server.read_cif(file_name=file_name) self.cif_objects.append((file_name, cif_obj)) # SYMMETRY HANDLING - COMBINED if (hkl_symm is not None) : use_symmetry = hkl_symm from iotbx.symmetry import combine_model_and_data_symmetry self.crystal_symmetry = combine_model_and_data_symmetry( model_symmetry=pdb_symm, data_symmetry=hkl_symm) if (self.crystal_symmetry is not None) and (self.f_obs is not None) : self.f_obs = self.f_obs.customized_copy( crystal_symmetry=self.crystal_symmetry).eliminate_sys_absent().set_info( self.f_obs.info()) self.r_free_flags = self.r_free_flags.customized_copy( crystal_symmetry=self.crystal_symmetry).eliminate_sys_absent().set_info( self.r_free_flags.info()) # EXPERIMENTAL PHASES target_name = "ml" if hasattr(params.input, "experimental_phases") : flag = params.input.use_experimental_phases if (flag in [True, Auto]) : phases_file = params.input.experimental_phases.file_name if (phases_file is None) : phases_file = params.input.xray_data.file_name phases_in = hkl_in else : phases_in = file_reader.any_file(phases_file) phases_in.check_file_type("hkl") phases_in.file_server.err = self.log # redirect error output space_group = self.crystal_symmetry.space_group() point_group = space_group.build_derived_point_group() hl_coeffs = mmtbx.utils.determine_experimental_phases( reflection_file_server = phases_in.file_server, parameters = params.input.experimental_phases, log = self.log, parameter_scope = "input.experimental_phases", working_point_group = point_group, symmetry_safety_check = True) if (hl_coeffs is not None) : hl_coeffs = hl_coeffs.map_to_asu() if hl_coeffs.anomalous_flag() : if (not self.f_obs.anomalous_flag()) : hl_coeffs = hl_coeffs.average_bijvoet_mates() elif self.f_obs.anomalous_flag() : hl_coeffs = hl_coeffs.generate_bijvoet_mates() self.hl_coeffs = hl_coeffs.matching_set(other=self.f_obs, data_substitute=(0,0,0,0)) target_name = "mlhl" # PDB INPUT self.unknown_residues_flag = False self.unknown_residues_error_message = False if process_pdb_file : pdb_interp_params = getattr(params, "pdb_interpretation", None) if (pdb_interp_params is None) : pdb_interp_params = \ mmtbx.monomer_library.pdb_interpretation.master_params.extract() make_sub_header("Processing PDB file(s)", out=self.log) pdb_combined = mmtbx.utils.combine_unique_pdb_files( file_names=params.input.pdb.file_name,) pdb_combined.report_non_unique(out=self.log) pdb_raw_records = pdb_combined.raw_records processed_pdb_files_srv = mmtbx.utils.process_pdb_file_srv( cif_objects=self.cif_objects, pdb_interpretation_params=pdb_interp_params, crystal_symmetry=self.crystal_symmetry, use_neutron_distances=params.input.scattering_table=="neutron", stop_for_unknowns=getattr(pdb_interp_params, "stop_for_unknowns",False), log=self.log) self.processed_pdb_file, self.pdb_inp = \ processed_pdb_files_srv.process_pdb_files( raw_records = pdb_raw_records, stop_if_duplicate_labels = False, allow_missing_symmetry=\ (self.crystal_symmetry is None) and (not require_data)) error_msg = self.processed_pdb_file.all_chain_proxies.\ fatal_problems_message( ignore_unknown_scattering_types=False, ignore_unknown_nonbonded_energy_types=False) if (error_msg is not None) : self.unknown_residues_flag = True self.unknown_residues_error_message = error_msg self.geometry = self.processed_pdb_file.geometry_restraints_manager( show_energies=False) assert (self.geometry is not None) self.xray_structure = self.processed_pdb_file.xray_structure() chain_proxies = self.processed_pdb_file.all_chain_proxies self.pdb_hierarchy = chain_proxies.pdb_hierarchy else : pdb_file_object = mmtbx.utils.pdb_file( pdb_file_names=params.input.pdb.file_name, cif_objects=self.cif_objects, crystal_symmetry=self.crystal_symmetry, log=self.log) self.pdb_inp = pdb_file_object.pdb_inp self.pdb_hierarchy = self.pdb_inp.construct_hierarchy() if (remove_unknown_scatterers) : known_sel = self.pdb_hierarchy.atom_selection_cache().selection( "not element X") if (known_sel.count(True) != len(known_sel)) : self.pdb_hierarchy = self.pdb_hierarchy.select(known_sel) self.xray_structure = self.pdb_hierarchy.extract_xray_structure( crystal_symmetry=self.crystal_symmetry) self.pdb_hierarchy.atoms().reset_i_seq() if (self.xray_structure is None) : self.xray_structure = self.pdb_inp.xray_structure_simple( crystal_symmetry=self.crystal_symmetry) # wavelength if (params.input.energy is not None) : if (params.input.wavelength is not None) : raise Sorry("Both wavelength and energy have been specified!") params.input.wavelength = 12398.424468024265 / params.input.energy if (set_wavelength_from_model_header and params.input.wavelength is None) : wavelength = self.pdb_inp.extract_wavelength() if (wavelength is not None) : print >> self.log, "" print >> self.log, "Using wavelength = %g from PDB header" % wavelength params.input.wavelength = wavelength # set scattering table if (data_and_flags is not None) : self.xray_structure.scattering_type_registry( d_min=self.f_obs.d_min(), table=params.input.scattering_table) if ((params.input.wavelength is not None) and (set_inelastic_form_factors is not None)) : self.xray_structure.set_inelastic_form_factors( photon=params.input.wavelength, table=set_inelastic_form_factors) make_sub_header("xray_structure summary", out=self.log) self.xray_structure.scattering_type_registry().show(out = self.log) self.xray_structure.show_summary(f=self.log) # FMODEL SETUP if (create_fmodel) and (data_and_flags is not None) : make_sub_header("F(model) initialization", out=self.log) skip_twin_detection = getattr(params.input, "skip_twin_detection", None) twin_law = getattr(params.input, "twin_law", None) if (twin_law is Auto) : if (self.hl_coeffs is not None) : raise Sorry("Automatic twin law determination not supported when "+ "experimental phases are used.") elif (skip_twin_detection is not None) : twin_law = Auto if (twin_law is Auto) : print >> self.log, "Twinning will be detected automatically." self.fmodel = mmtbx.utils.fmodel_simple( xray_structures=[self.xray_structure], scattering_table=params.input.scattering_table, f_obs=self.f_obs, r_free_flags=self.r_free_flags, skip_twin_detection=skip_twin_detection, target_name=target_name, log=self.log) else : if ((twin_law is not None) and (self.hl_coeffs is not None)) : raise Sorry("Automatic twin law determination not supported when "+ "experimental phases are used.") self.fmodel = mmtbx.utils.fmodel_manager( f_obs=self.f_obs, xray_structure=self.xray_structure, r_free_flags=self.r_free_flags, twin_law=params.input.twin_law, hl_coeff=self.hl_coeffs, target_name=target_name) self.fmodel.update_all_scales( params=None, log=self.log, optimize_mask=True, show=True) self.fmodel.info().show_rfactors_targets_scales_overall(out=self.log) # SEQUENCE if (params.input.sequence is not None) : seq_file = file_reader.any_file(params.input.sequence, force_type="seq", raise_sorry_if_errors=True) self.sequence = seq_file.file_object # UNMERGED DATA self.unmerged_i_obs = None if hasattr(params.input, "unmerged_data") : if (params.input.unmerged_data.file_name is not None) : self.unmerged_i_obs = load_and_validate_unmerged_data( f_obs=self.f_obs, file_name=params.input.unmerged_data.file_name, data_labels=params.input.unmerged_data.labels, log=self.log) self.params = params print >> self.log, "" print >> self.log, "End of input processing"
def run (args, out=sys.stdout) : from mmtbx.building import make_library import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil_string=master_phil_str, pdb_file_def="model", seq_file_def="sequence", directory_def="search_directory", usage_string="mmtbx.get_related_ensemble [model.pdb] [seq.fa] [...]") params = cmdline.work.extract() sequence = None if (params.model is None) : raise Sorry("No model (PDB or mmCIF file) was specified.") if (params.sequence is not None) : seq_file = cmdline.get_file(params.sequence, force_type="seq") n_seqs = len(seq_file.file_object) if (n_seqs > 1) : print >> out, "%d sequences in file - will only use the first" % n_seqs sequence = seq_file.file_object[0].sequence pdb_file = cmdline.get_file(params.model, force_type="pdb") hierarchy = pdb_file.file_object.hierarchy reference_hierarchy = iotbx.pdb.hierarchy.root() model = iotbx.pdb.hierarchy.model() reference_hierarchy.append_model(model) for chain in hierarchy.models()[0].chains() : if (params.chain_id is None) or (chain.id == params.chain_id) : if (not chain.is_protein()) : if (chain.id == params.chain_id) : print >> out, \ "warning: matching chain '%s' is not protein, skipping" % \ chain.id continue else : # TODO select based on sequence if provided new_chain = iotbx.pdb.hierarchy.chain(id=chain.id) model.append_chain(new_chain) # get rid of alternate conformations for residue_group in chain.residue_groups() : atom_group = residue_group.atom_groups()[0] if (not atom_group.altloc.strip() in ['', 'A']) : continue new_rg = iotbx.pdb.hierarchy.residue_group( resseq=residue_group.resseq, icode=residue_group.icode) new_ag = atom_group.detached_copy() new_ag.altloc = '' new_rg.append_atom_group(new_ag) new_chain.append_residue_group(new_rg) if (sequence is None) : sequence = chain.as_padded_sequence(pad='X') print >> out, "Using sequence of chain '%s' (approx. %d residues)" % \ (chain.id, len(sequence)) break if (sequence is None) : raise Sorry("No protein sequence could be extracted based on these inputs.") make_sub_header("Finding related models and generating ensemble", out=out) ensemble = make_library.extract_and_superpose( reference_hierarchy=reference_hierarchy, search_directory=params.search_directory, sequence=sequence, params=params, out=out) f = null_out() if (params.output_file is not None) : f = open(params.output_file, "w") print >> out, "Assembling moved models:" ensemble_hierarchy = ensemble.as_multi_model_hierarchy() for k in ensemble.selection_moved : source_info = ensemble.related_chains[k].source_info print >> out, " Model %d: %s:%s" % (k+1, source_info, ensemble.related_chains[k].chain_id) f.write("REMARK model %d is from %s\n" % (k+1, source_info)) f.write(ensemble_hierarchy.as_pdb_string()) f.close() return ensemble_hierarchy
def build_cycle (pdb_hierarchy, fmodel, geometry_restraints_manager, params, selection=None, cif_objects=(), nproc=Auto, out=sys.stdout, verbose=False, debug=None, i_cycle=0) : from mmtbx import restraints from scitbx.array_family import flex t_start = time.time() hd_sel = fmodel.xray_structure.hd_selection() n_hydrogen = hd_sel.count(True) if (n_hydrogen > 0) and (True) : #params.building.delete_hydrogens) : print >> out, "WARNING: %d hydrogen atoms will be removed!" % n_hydrogen non_hd_sel = ~hd_sel # XXX it's better to do this in-place for the hierarchy, because calling # pdb_hierarchy.select(non_hd_sel) will not remove parent-child # relationships involving hydrogens, which causes problems when running # the MolProbity validation. pdb_hierarchy.remove_hd(reset_i_seq=True) xray_structure = fmodel.xray_structure.select(non_hd_sel) assert (pdb_hierarchy.atoms_size() == xray_structure.scatterers().size()) fmodel.update_xray_structure(xray_structure) geometry_restraints_manager = geometry_restraints_manager.select(non_hd_sel) pdb_atoms = pdb_hierarchy.atoms() segids = pdb_atoms.extract_segid().strip() if (not segids.all_eq("")) : print >> out, "WARNING: resetting segids to blank" for i_seq, atom in enumerate(pdb_atoms) : atom.segid = "" sc = fmodel.xray_structure.scatterers()[i_seq] sc.label = atom.id_str() if isinstance(selection, str) : sele_cache = pdb_hierarchy.atom_selection_cache() selection = sele_cache.selection(selection) make_header("Build cycle %d" % (i_cycle+1), out=out) fmodel.info().show_rfactors_targets_scales_overall(out=out) if (debug > 0) : from mmtbx.maps.utils import get_maps_from_fmodel from iotbx.map_tools import write_map_coeffs two_fofc, fofc = get_maps_from_fmodel(fmodel, exclude_free_r_reflections=True) write_map_coeffs( fwt_coeffs=two_fofc, delfwt_coeffs=fofc, file_name="cycle_%d_start.mtz" % (i_cycle+1)) candidate_residues = alt_confs.filter_before_build( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry_restraints_manager, selection=selection, params=params.prefilter, verbose=verbose, log=out) t1 = time.time() print >> out, "filtering: %.3fs" % (t1-t_start) restraints_manager = restraints.manager( geometry=geometry_restraints_manager, normalization=True) make_sub_header("Finding alternate conformations", out=out) building_trials = find_all_alternates( residues=candidate_residues, pdb_hierarchy=pdb_hierarchy, restraints_manager=restraints_manager, fmodel=fmodel, params=params.residue_fitting, nproc=params.nproc, verbose=verbose, debug=debug, log=out).results t2 = time.time() print >> out, " building: %.3fs" % (t2-t1) make_sub_header("Scoring and assembling alternates", out=out) n_alternates = process_results( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, residues_in=candidate_residues, building_trials=building_trials, params=params.residue_fitting, verbose=verbose, log=out) if (n_alternates > 0) : print >> out, "" print >> out, " %d disordered residues built" % n_alternates n_split = alt_confs.spread_alternates(pdb_hierarchy, new_occupancy=params.residue_fitting.expected_occupancy, split_all_adjacent=True, log=out) assert (n_split > 0) print >> out, " %d adjacent residues split" % n_split else : print >> out, "No alternates built this round." t3 = time.time() print >> out, " assembly: %.3fs" % (t3-t2) if (not params.cleanup.rsr_after_build) : if (n_alternates > 0) : print >> out, "Skipping final RSR step (rsr_after_build=False)." else : print >> out, "No refinement needs to be performed." else : make_sub_header("Real-space refinement", out=out) print >> out, "" pdb_hierarchy = real_space_refine( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, cif_objects=cif_objects, params=params, nproc=params.nproc, remediate=True, out=out) t4 = time.time() print >> out, "" print >> out, "RSR: %.3fs" % (t4-t3) fmodel.info().show_targets(out=out, text="Rebuilt model") t_end = time.time() alt_confs.finalize_model( pdb_hierarchy=pdb_hierarchy, xray_structure=pdb_hierarchy.extract_xray_structure( crystal_symmetry=fmodel.xray_structure), set_b_iso=params.cleanup.set_b_iso, convert_to_isotropic=params.cleanup.convert_to_isotropic, selection="altloc A or altloc B") t_end = time.time() print >> out, "Total runtime for cycle: %.3fs" % (t_end-t_start) return pdb_hierarchy, n_alternates
def set_rotamer_to_reference(self, xray_structure, mon_lib_srv=None, log=None, quiet=False): if self.mon_lib_srv is None: self.mon_lib_srv = mon_lib_srv assert isinstance(self.mon_lib_srv, mmtbx.monomer_library.server.server) if(log is None): log = sys.stdout make_sub_header( "Correcting rotamer outliers to match reference model", out=log) sa = SidechainAngles(False) r = rotalyze.rotalyze(pdb_hierarchy=self.pdb_hierarchy) rot_list_reference = {} coot_reference = {} for key in self.pdb_hierarchy_ref.keys(): hierarchy = self.pdb_hierarchy_ref[key] rot_list_reference[key] = \ rotalyze.rotalyze(pdb_hierarchy=hierarchy) model_hash = {} model_chis = {} reference_hash = {} reference_chis = {} model_outliers = 0 for rot in r.results: model_hash[rot.id_str()] = rot.rotamer_name if rot.rotamer_name == "OUTLIER": model_outliers += 1 for key in rot_list_reference.keys(): reference_hash[key] = {} for rot in rot_list_reference[key].results: reference_hash[key][rot.id_str()] = rot.rotamer_name print >> log, "** evaluating rotamers for working model **" for model in self.pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): all_dict = rotalyze.construct_complete_sidechain(residue_group) for atom_group in residue_group.atom_groups(): try: atom_dict = all_dict.get(atom_group.altloc) chis = sa.measureChiAngles(atom_group, atom_dict) if chis is not None: key = utils.id_str( chain_id=chain.id, resseq=residue_group.resseq, resname=atom_group.resname, icode=residue_group.icode, altloc=atom_group.altloc) model_chis[key] = chis except Exception: print >> log, \ ' %s%5s %s is missing some sidechain atoms, **skipping**' % ( chain.id, residue_group.resid(), atom_group.altloc+atom_group.resname) if model_outliers == 0: print >> log, "No rotamer outliers detected in working model" return else: print >> log, "Number of rotamer outliers: %d" % model_outliers print >> log, "\n** evaluating rotamers for reference model **" for file in self.pdb_hierarchy_ref.keys(): hierarchy = self.pdb_hierarchy_ref[file] reference_chis[file] = {} for model in hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): all_dict = rotalyze.construct_complete_sidechain(residue_group) for atom_group in residue_group.atom_groups(): try: atom_dict = all_dict.get(atom_group.altloc) chis = sa.measureChiAngles(atom_group, atom_dict) if chis is not None: key = utils.id_str( chain_id=chain.id, resseq=residue_group.resseq, resname=atom_group.resname, icode=residue_group.icode, altloc=atom_group.altloc) reference_chis[file][key] = chis except Exception: print >> log, \ ' %s%5s %s is missing some sidechain atoms, **skipping**' % ( chain.id, residue_group.resid(), atom_group.altloc+atom_group.resname) print >> log, "\n** fixing outliers **" sites_cart_start = xray_structure.sites_cart() for model in self.pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): if len(residue_group.conformers()) > 1: print >> log, " %s%5s %s has multiple conformations, **skipping**" % ( chain.id, residue_group.resid(), " "+residue_group.atom_groups()[0].resname) continue for conformer in residue_group.conformers(): for residue in conformer.residues(): if residue.resname == "PRO": continue key = utils.id_str( chain_id=chain.id, resseq=residue_group.resseq, resname=residue_group.atom_groups()[0].resname, icode=residue_group.icode, altloc=conformer.altloc) if len(chain.id) == 1: chain_id = " "+chain.id else: chain_id = chain.id file_key = '%s%s%s' %(residue.resname, chain_id, residue_group.resid()) file_key = file_key.strip() file_match = self.residue_match_hash.get(file_key) if file_match is not None: file = file_match[0] else: continue model_rot = model_hash.get(key) reference_rot = reference_hash[file].get(self.one_key_to_another(file_match[1])) m_chis = model_chis.get(key) r_chis = reference_chis[file].get(self.one_key_to_another(file_match[1])) if model_rot is not None and reference_rot is not None and \ m_chis is not None and r_chis is not None: if (model_rot == 'OUTLIER' and \ reference_rot != 'OUTLIER'): # or \ #atom_group.resname in ["LEU", "VAL", "THR"]: self.change_residue_rotamer_in_place( sites_cart_start,residue, m_chis,r_chis,self.mon_lib_srv) xray_structure.set_sites_cart(sites_cart_start) elif self.params.strict_rotamer_matching and \ (model_rot != 'OUTLIER' and reference_rot != 'OUTLIER'): if model_rot != reference_rot: self.change_residue_rotamer_in_place( sites_cart_start,residue, m_chis,r_chis,self.mon_lib_srv) xray_structure.set_sites_cart(sites_cart_start)