def parse_workflow_commands(): prog = __package__ or os.path.basename(sys.argv[0]) args = sys.argv[1:] if not args or args[0] not in ('info', 'repeat'): return False if len(args) == 1: sys.stderr.write("Specify output_dir.\n") return True # it's handy to use "/my/path/05-cad.log" as "/my/path" "5" ext = os.path.splitext(args[1])[1] if os.path.isfile(args[1]) and ext in ('.log', '.err'): dirname, basename = os.path.split(args[1]) args[1:2] = [dirname, basename.split('-')[0]] wf = open_pickled_workflow(args[1]) steps = args[2:] if not steps: show_workflow_info(wf, dict(prog=prog, output_dir=args[1])) return True for job in parse_steps(steps, wf): if args[0] == 'info': show_job_info(job) elif args[0] == 'repeat': try: job.data = {} # reset data from parsing job.run() utils.comment("\n") except JobError as e: utils.put_error(e.msg, comment=e.note) sys.exit(1) return True
def check_contaminants_if_bad(wf, mtz): ref_job = wf.get_final_refinement_job() if not ref_job or ref_job.data.get('free_r', 1) > GOOD_FINAL_RFREE: mtz_meta = wf.read_mtz_metadata(mtz) # it's cached info = contaminants.get_info(mtz_meta) if info: comment('\n' + info)
def main(args): if workflow.parse_workflow_commands(): return options = parse_dimple_commands(args) check_ccp4_envvars() wf = workflow.Workflow(options.output_dir, from_job=options.from_step) utils.start_log(os.path.join(options.output_dir, "dimple.log"), output_dir=options.output_dir) utils.log_value("version", __version__) utils.start_log_screen(os.path.join(options.output_dir, "screen.log")) try: dimple(wf=wf, opt=options) check_contaminants_if_bad(wf, mtz=options.mtz) exit_status = 0 except workflow.JobError as e: put_error(e.msg, comment=e.note) try: utils.report_disk_space([wf.output_dir, os.getenv("CCP4_SCR")]) except KeyboardInterrupt: comment("\nok, exiting...") exit_status = 1 except RuntimeError as e: put_error(e) exit_status = 1 finally: comment("\n") if options.cleanup: wf.delete_files(wf.temporary_files) wf.options = options wf.dump_pickle() return exit_status
def _comment_summary_line(name, meta): def angle(x): if x == 90.: return '90' else: return str(x) if meta: line = '\n%-21s %-12s (%.2f, %.2f, %.2f, %s, %s, %s)' % ( name, meta.symmetry, meta.a, meta.b, meta.c, angle(meta.alpha), angle(meta.beta), angle(meta.gamma)) else: line = '\n%-21s ???' % name comment(line)
def check_freerflags_column(free_mtz, expected_symmetry): names = ['FreeR_flag', 'FREE'] rfree_meta = read_metadata(free_mtz) if not match_symmetry(rfree_meta, expected_symmetry): comment("\nWARNING: R-free flag reference file is %s not %s." % (rfree_meta.symmetry, expected_symmetry.symmetry)) for name in names: if name in rfree_meta.columns: rfree_meta.check_col_type(name, 'I') return name put_error("free-R column not found in %s" % free_mtz) sys.exit(1)
def find_path(): if os.name == 'nt': for path in [ "C:/WinCoot/wincoot.bat", # since WinCoot 0.8.8 "C:/WinCoot/runwincoot.bat", # WinCoot prior to 0.8.8 utils.cbin("coot.bat") ]: # CCP4 script added in 2018 if os.path.exists(path): return path utils.comment("\nNote: WinCoot not found.\n") else: return utils.syspath("coot")
def dls_name_filter(pdbs): # Filename matching used in Diamond synchrotron. PDB filenames # are matched against the current (!) directory. # It's more relaxed than in solve_o_matic's select_pdb.py: # case-insensitive and ignoring non-alphanumeric characters. pattern = ''.join(a for a in os.getcwd().lower() if a.isalnum() or a == '/') def token(arg): part = os.path.basename(arg).split('.')[0] return ''.join(a for a in part.lower() if a.isalnum()) matched_pdbs = [arg for arg in pdbs if token(arg) in pattern] if matched_pdbs != pdbs: comment("\n%d of %d PDBs have filenames matching data directory" % (len(matched_pdbs), len(pdbs))) return matched_pdbs
def check_freerflags_column(free_mtz, expected_symmetry, column): rfree_meta = read_metadata(free_mtz) if expected_symmetry and not match_symmetry(rfree_meta, expected_symmetry): comment("\nWARNING: R-free flag reference file is %s not %s." % (rfree_meta.symmetry, expected_symmetry.symmetry)) if column is not None: if not rfree_meta.check_col_type(column, 'I'): sys.exit(1) return column for name in DEFAULT_FREE_COLS: if name in rfree_meta.columns: rfree_meta.check_col_type(name, 'I') return name put_error("free-R column not found in %s" % free_mtz) sys.exit(1)
def check_freerflags_column(free_mtz, expected_symmetry, column): rfree_meta = read_metadata(free_mtz) if not match_symmetry(rfree_meta, expected_symmetry): comment("\nWARNING: R-free flag reference file is %s not %s." % (rfree_meta.symmetry, expected_symmetry.symmetry)) if column is not None: if not rfree_meta.check_col_type(column, 'I'): sys.exit(1) return column for name in DEFAULT_FREE_COLS: if name in rfree_meta.columns: rfree_meta.check_col_type(name, 'I') return name put_error("free-R column not found in %s" % free_mtz) sys.exit(1)
def _after_phaser_comments(phaser_job, sg_in): phaser_data = phaser_job.data if 'error' in phaser_data: comment('\n' + phaser_data['error']) if (phaser_job.exit_status != 0 or phaser_data['info'] == 'Sorry - No solution'): comment('\nGiving up.') return False solu_set = phaser_data.get('status', '') if phaser_data['info'].endswith('...'): comment('\n...' + solu_set[len(phaser_data['info']) - 3:]) if phaser_data.get('partial_solution'): # counting TF*0 or TFZ=number, but not TFZ==number n_comp = (solu_set.count('TF') - solu_set.count('TFZ==') + solu_set.count('+TNCS')) comment('\nSolution found with %d components.' % n_comp) if phaser_data['SG'] != sg_in: comment('\nSpacegroup changed to %s' % phaser_data['SG']) return True
def _after_phaser_comments(phaser_job, sg_in): phaser_data = phaser_job.data if 'error' in phaser_data: comment("\n" + phaser_data['error']) if (phaser_job.exit_status != 0 or phaser_data['info'] == 'Sorry - No solution'): comment("\nGiving up.") return False solu_set = phaser_data.get('status', '') if phaser_data['info'].endswith('...'): comment("\n..." + solu_set[len(phaser_data['info'])-3:]) if phaser_data.get('partial_solution'): # counting TF*0 or TFZ=number, but not TFZ==number n_comp = (solu_set.count('TF') - solu_set.count('TFZ==') + solu_set.count('+TNCS')) comment("\nSolution found with %d components." % n_comp) if phaser_data['SG'] != sg_in: comment("\nSpacegroup changed to %s" % phaser_data['SG']) return True
def guess_number_of_molecules(mtz_meta, rw_data, vol_ratio): Va = mtz_meta.asu_volume() m = rw_data['weight'] # if the number of molecules seems to be 1 or 2, don't go into Matthews if vol_ratio and rw_data.get('solvent_percent', 100) < HIGH_SOLVENT_PCT: if 0.7 < vol_ratio < 1.33: return 1 if 1.8 < vol_ratio < 2.2: return 2 # Vm = Va/(n*M) # Vs = 1 - 1.23/Vm => Vs = 1 - n * 1.23*M/Va def calc_Vs(nmol): return 100 * (1 - nmol * 1.23 * m / Va) # For our purpose, it's better to overestimate the number of molecules, # because we can use "partial solution" from Phaser. # OTOH the search with overestimated n is slower and more likely to fail. # We also have preference for even numbers because they are more frequent # and Phaser can make use of tNCS if it's present. # Let's pick the largest n that gives solvent content (Vs) at least 30%. # If n is odd, try n-1 if Vs is still above 45%. # Vm = Va/(n*M) => n = Va/(Vm*M) # 1-1.23/Vm=30% => Vm=1.76 n = max(int(Va / (1.76 * m)), 1) if n % 2 == 1 and calc_Vs(n-1) < 45: n -= 1 Vsn = calc_Vs(n) if Vsn < 10: # model too big, won't fit return float(vol_ratio or Va / (2.4 * m)) if n > 1: # 1-1.23/Vm=50% => Vm=2.46 other_n = min(int(round(Va / (2.46 * m))), n-1) comment("\n%.0f%% solvent for %d, %.0f%% for %d components." % (calc_Vs(other_n), other_n, Vsn, n)) else: comment("\n%.0f%% solvent for single component." % Vsn) return n
def download_pdb(pdb_id, output_dir): filename = pdb_id.upper() + '.pdb' path = os.path.join(output_dir, filename) if os.path.exists(path): comment('%s: using existing file %s\n' % (pdb_id, filename)) else: comment('Downloading %s from RCSB... ' % pdb_id) url = ('http://www.rcsb.org/pdb/download/downloadFile.do' '?fileFormat=pdb&compression=NO&structureId=' + pdb_id.upper()) try: u = urlopen(url) except HTTPError as e: put_error(str(e)) sys.exit(1) content = u.read() try: if not os.path.isdir(output_dir): os.makedirs(output_dir) with open(path, 'wb') as f: f.write(content) comment('done.\n') except IOError as e: put_error('Failed to save downloaded file on disk', comment=str(e)) sys.exit(1) return path
def download_pdb(pdb_id, output_dir): filename = pdb_id.upper()+'.pdb' path = os.path.join(output_dir, filename) if os.path.exists(path): comment('%s: using existing file %s\n' % (pdb_id, filename)) else: comment('Downloading %s from RCSB... ' % pdb_id) url = 'http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId=' + pdb_id.upper() try: u = urllib2.urlopen(url) except urllib2.HTTPError as e: put_error(str(e)) sys.exit(1) content = u.read() try: if not os.path.isdir(output_dir): os.makedirs(output_dir) with open(path, 'wb') as f: f.write(content) comment('done.\n') except IOError as e: put_error('Failed to save downloaded file on disk', comment=str(e)) sys.exit(1) return path
def main(args): if workflow.parse_workflow_commands(): return options = parse_dimple_commands(args) for necessary_var in ("CCP4", "CCP4_SCR"): if necessary_var not in os.environ: put_error('$%s not found, giving up' % necessary_var) sys.exit(1) if not os.path.isdir(os.environ["CCP4_SCR"]): put_error('No such directory: $CCP4_SCR, refmac shall not work!') wf = workflow.Workflow(options.output_dir, from_job=options.from_step) utils.start_log(os.path.join(options.output_dir, "dimple.log"), output_dir=options.output_dir) utils.log_value("version", __version__) utils.start_log_screen(os.path.join(options.output_dir, "screen.log")) try: dimple(wf=wf, opt=options) exit_status = 0 except workflow.JobError as e: put_error(e.msg, comment=e.note) try: utils.report_disk_space([wf.output_dir, os.getenv("CCP4_SCR")]) except KeyboardInterrupt: comment("\nok, exiting...") exit_status = 1 except RuntimeError as e: put_error(e) exit_status = 1 finally: comment("\n") if options.cleanup: wf.delete_files(wf.temporary_files) wf.options = options wf.dump_pickle() return exit_status
def main(args): if workflow.parse_workflow_commands(): return options = parse_dimple_commands(args) check_ccp4_envvars() try: wf = workflow.Workflow(options.output_dir, from_job=options.from_step) utils.start_log(os.path.join(options.output_dir, 'dimple.log'), output_dir=options.output_dir) utils.log_value('version', __version__) utils.start_log_screen(os.path.join(options.output_dir, 'screen.log')) dimple(wf=wf, opt=options) check_contaminants_if_bad(wf, mtz=options.mtz) exit_status = 0 except workflow.JobError as e: put_error(e.msg, comment=e.note) try: utils.report_disk_space([wf.output_dir, os.getenv('CCP4_SCR')]) except KeyboardInterrupt: comment('\nok, exiting...') exit_status = 1 except (RuntimeError, IOError, OSError) as e: put_error(e) exit_status = 1 finally: comment('\n') if options.cleanup: wf.delete_files(wf.temporary_files) wf.options = options try: wf.dump_pickle() except IOError as e: put_error(e) exit_status = 1 return exit_status
def guess_number_of_molecules(mtz_meta, rw_data, vol_ratio): # if the number of molecules seems to be 1 or 2, don't go into Matthews if vol_ratio and rw_data.get('solvent_percent', 100) < HIGH_SOLVENT_PCT: if 0.7 < vol_ratio < 1.33: return 1 if 1.8 < vol_ratio < 2.2: return 2 Va = mtz_meta.asu_volume() m = rw_data['weight'] # Vm = Va/(n*M) # Vs = 1 - 1.23/Vm => Vs = 1 - n * 1.23*M/Va def calc_Vs(nmol): return 100 * (1 - nmol * 1.23 * m / Va) # For our purpose, it's better to overestimate the number of molecules, # because we can use "partial solution" from Phaser. # OTOH the search with overestimated n is slower and more likely to fail. # We also have preference for even numbers because they are more frequent # and Phaser can make use of tNCS if it's present. # Let's pick the largest n that gives solvent content (Vs) at least 30%. # If n is odd, try n-1 if Vs is still above 45%. # Vm = Va/(n*M) => n = Va/(Vm*M) # 1-1.23/Vm=30% => Vm=1.76 n = max(int(Va / (1.76 * m)), 1) if n % 2 == 1 and calc_Vs(n - 1) < 45: n -= 1 Vsn = calc_Vs(n) if n > 1: # 1-1.23/Vm=50% => Vm=2.46 other_n = min(int(round(Va / (2.46 * m))), n - 1) comment('\n%.0f%% solvent for %d, %.0f%% for %d components.' % (calc_Vs(other_n), other_n, Vsn, n)) elif Vsn > 0: comment('\n%.0f%% solvent for single component.' % Vsn) else: comment('\nModel too big to fit in the unit cell.') # if model is too big we will try to split it if Vsn < VERY_LOW_SOLVENT_PCT or (Vsn < LOW_SOLVENT_PCT and vol_ratio): comment(' Let us try to split the model.') return float(vol_ratio or Va / (2.4 * m)) return n
def dimple(wf, opt): comment(" ### Dimple v%s. Problems and suggestions:" " ccp4.github.io/dimple ###" % __version__) mtz_meta = wf.read_mtz_metadata(opt.mtz) _comment_summary_line("MTZ (%.1fA)" % mtz_meta.dmax, mtz_meta) if opt.dls_naming: opt.pdbs = dls_name_filter(opt.pdbs) opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir) if not opt.pdbs: comment("\nNo non-empty pdb files given. Nothing to do.") return for p in opt.pdbs: wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1)) if len(opt.pdbs) > 1: comment("\nPDBs in order of similarity (using the first one):") opt.pdbs.sort(key=lambda x: calculate_difference(wf.file_info[x], mtz_meta)) utils.log_value("data_file", opt.mtz) utils.log_value("pdb_files", opt.pdbs) for p in opt.pdbs: _comment_summary_line(os.path.basename(p), wf.file_info[p]) ini_pdb = "ini.pdb" wf.copy_uncompressed(opt.pdbs[0], ini_pdb) pdb_meta = wf.file_info[opt.pdbs[0]] if pdb_meta is None: put_error("PDB file missing CRYST1 record, starting from MR") if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta): if not opt.no_hetatm: comment("\nHETATM marked as element X would choke many programs.") rb_xyzin = "prepared.pdb" wf.temporary_files.add(rb_xyzin) n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin, remove_all=opt.no_hetatm) comment("\nRemoved %d HETATM atoms" % n_het) else: rb_xyzin = ini_pdb # run rwcontents even without CRYST1 - it will show mol. weight only wf.rwcontents(xyzin=rb_xyzin).run() rw_data = wf.jobs[-1].data if pdb_meta is None: pass # we already had a warning message elif rw_data.get('solvent_percent') is None: put_error("rwcontents could not interpret %s" % rb_xyzin) elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT: comment("\nHmm... %.1f%% of solvent or incomplete model" % rw_data['solvent_percent']) if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10: comment("\ndebug: problem when calculating volume?") ####### pointless - reindexing ####### if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and ( 0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4): reindexed_mtz = "pointless.mtz" wf.temporary_files.add(reindexed_mtz) wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz, keys="TOLERANCE 5").run(may_fail=True) alt_reindex = wf.jobs[-1].data.get('alt_reindex') if wf.jobs[-1].exit_status == 0 and alt_reindex: for ar in alt_reindex: comment("\n %-10s CC: %-8.3f cell diff: %.1fA" % ( ar['op'], ar['cc'], ar['cell_deviat'])) else: # until recently (2015) pointless didn't print CC for non-ambiguous # spacegroups (e.g. C2), but now it always prints comment("\n no good indexing") reindexed_mtz = opt.mtz else: reindexed_mtz = opt.mtz reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz) if reindexed_mtz_meta.symmetry != mtz_meta.symmetry: _comment_summary_line('reindexed MTZ', reindexed_mtz_meta) ####### (c)truncate - calculate amplitudes if needed ####### if not opt.fcolumn: opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP' elif opt.icolumn or opt.ItoF_prog: put_error('Ignoring options --fcolumn/--sigfcolumn') opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn) if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns or opt.sigfcolumn not in mtz_meta.columns): mtz_meta.check_col_type(opt.icolumn or 'IMEAN', 'J') mtz_meta.check_col_type(opt.sigicolumn, 'Q') f_mtz = "amplit.mtz" wf.temporary_files.add(f_mtz) i_sigi_cols = (opt.icolumn or 'IMEAN', opt.sigicolumn) if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow): wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz, colin="/*/*/[%s,%s]" % i_sigi_cols).run() else: wf.truncate(hklin=reindexed_mtz, hklout=f_mtz, labin="IMEAN=%s SIGIMEAN=%s" % i_sigi_cols, labout="F=F SIGF=SIGF").run() opt.fcolumn = 'F' opt.sigfcolumn = 'SIGF' else: f_mtz = reindexed_mtz ####### rigid body - check if model is good for refinement? ####### refmac_labin_nofree = "FP=%s SIGFP=%s" % (opt.fcolumn, opt.sigfcolumn) refmac_xyzin = None cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta) if pdb_meta is None: pass # the error message was already printed elif opt.mr_when_r <= 0: comment("\nMR requested unconditionally.") elif cell_diff > 0.1 and opt.mr_when_r < 1: comment("\nDifferent unit cells.") elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry: comment("\nDifferent space groups.") else: comment("\nRigid-body refinement with resolution 3.5 A, 10 cycles.") if 'aa_count' in rw_data and 'water_count' in rw_data: comment(" %.1f waters/aa." % (rw_data['water_count'] / rw_data['aa_count'])) wf.temporary_files |= {"refmacRB.pdb", "refmacRB.mtz"} # it may fail because of "Disagreement between mtz and pdb" wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin, hklout="refmacRB.mtz", xyzout="refmacRB.pdb", labin=refmac_labin_nofree, libin=None, keys="""refinement type rigidbody resolution 15 3.5 rigidbody ncycle 10""").run(may_fail=True) # if the error is caused by mtz/pdb disagreement, continue with MR if wf.jobs[-1].exit_status != 0: comment("\nTry MR.") elif not wf.jobs[-1].data.get("overall_r"): comment("\nWARNING: unknown R factor, something went wrong.\n") refmac_xyzin = "refmacRB.pdb" elif wf.jobs[-1].data["overall_r"] > opt.mr_when_r: comment("\nRun MR for R > %g." % opt.mr_when_r) else: comment("\nNo MR for R < %g." % opt.mr_when_r) refmac_xyzin = "refmacRB.pdb" ####### phaser/molrep - molecular replacement ####### if refmac_xyzin is None: vol_ratio = None if pdb_meta: # num_mol accounts for strict NCS (MTRIX without iGiven) vol_ratio = (mtz_meta.asu_volume() / pdb_meta.asu_volume(rw_data['num_mol'])) comment(" Volume of asu: %.1f%% of model asu." % (100 * vol_ratio)) if opt.mr_when_r >= 1: comment("\nWould try MR, but it is disabled.") return if opt.mr_num: mr_num = opt.mr_num else: mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio) mw = rw_data.get('weight') if isinstance(mr_num, float): wf.ensembler(pdbin=rb_xyzin, root='ens').run() n_models = len(wf.jobs[-1].data['models']) mw = None rb_xyzin = "ens_merged.pdb" mr_num = max(int(round(mr_num * n_models)), 1) # phaser is used by default if number of searched molecules is known if opt.mr_prog == 'molrep': wf.temporary_files |= {"molrep.pdb", "molrep_dimer.pdb", "molrep.crd"} wf.molrep(f=f_mtz, m=rb_xyzin).run() refmac_xyzin = "molrep.pdb" else: wf.temporary_files |= {"phaser.1.pdb", "phaser.1.mtz"} wf.phaser_auto(hklin=f_mtz, labin="F=%s SIGF=%s" % (opt.fcolumn, opt.sigfcolumn), model=dict(pdb=rb_xyzin, identity=100, num=mr_num, mw=mw), sg_alt="ALL", opt=opt, root='phaser').run(may_fail=True) if not _after_phaser_comments(wf.jobs[-1], sg_in=reindexed_mtz_meta.symmetry): return refmac_xyzin = "phaser.1.pdb" f_mtz = "phaser.1.mtz" if False: wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz, f="FC", phi="PHIC", pdbout="prepared_wat.pdb", sigma=2) refmac_xyzin = "prepared_wat.pdb" ####### adding free-R flags ####### f_mtz_meta = wf.read_mtz_metadata(f_mtz) cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps) if opt.free_r_flags: free_mtz = opt.free_r_flags free_col = check_freerflags_column(wf.path(free_mtz), expected_symmetry=pdb_meta, column=opt.freecolumn) comment("\nFree-R flags from the %s file, column %s." % (("reference" if free_mtz != opt.mtz else 'input'), free_col)) else: free_col = DEFAULT_FREE_COLS[0] if free_col in f_mtz_meta.columns: comment("\nReplace free-R flags") else: comment("\nGenerate free-R flags") free_mtz = "free.mtz" wf.temporary_files |= {"unique.mtz", free_mtz} if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG wf.unique(hklout="unique.mtz", ref=f_mtz_meta, resolution=cad_reso).run() else: comment(" (repeatably)") # Here we'd like to have always the same set of free-r flags # for given PDB file. That's why we don't use information # from the data file (mtz). wf.unique(hklout="unique.mtz", ref=pdb_meta, resolution=1.0).run() # CCP4 freerflag uses always the same pseudo-random sequence by default wf.freerflag(hklin="unique.mtz", hklout=free_mtz, keys=("SEED" if opt.seed_freerflag else "")).run() if free_mtz == opt.mtz and opt.reso is None: prepared_mtz = f_mtz else: prepared_mtz = "prepared.mtz" wf.temporary_files.add(prepared_mtz) wf.cad(data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]), (free_mtz, [free_col])], hklout=prepared_mtz, keys=["sysab_keep", # does it matter? "reso overall 1000.0 %g" % cad_reso]).run() freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col) if freerflag_missing: wf.freerflag(hklin=prepared_mtz, hklout="prepared2.mtz", keys="COMPLETE FREE="+free_col, parser=" (again, for %d refl. more)" % freerflag_missing ).run() prepared_mtz = "prepared2.mtz" wf.temporary_files.add(prepared_mtz) ####### refinement ####### if opt.weight: refmac_weight = "matrix %f" % opt.weight else: refmac_weight = "auto" restr_ref_keys = """\ make newligand continue refinement type restrained weight %s """ % refmac_weight if opt.freecolumn_val: restr_ref_keys += "free %s\n" % opt.freecolumn_val refmac_labin = "%s FREE=%s" % (refmac_labin_nofree, free_col) comment("\nRestrained refinement, %d+%d cycles." % (opt.jelly, opt.restr_cycles)) if opt.jelly: wf.temporary_files |= {"jelly.pdb", "jelly.mtz"} wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout="jelly.mtz", xyzout="jelly.pdb", labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys+"ridge distance sigma 0.01\n" "make hydrogen no\n" "ncycle %d" % opt.jelly).run() comment(_refmac_rms_line(wf.jobs[-1].data)) refmac_xyzin = "jelly.pdb" restr_job = wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout=opt.hklout, xyzout=opt.xyzout, labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys+("ncycle %d" % opt.restr_cycles)).run() comment(_refmac_rms_line(restr_job.data)) # if that run is repeated with --from-step it's useful to compare Rfree if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based prev = [j for j in wf.repl_jobs if j.name == restr_job.name] if prev and prev[0].data and "free_r" in prev[0].data: comment("\nPreviously: R/Rfree %.4f/%.4f Rfree change: %+.4f" % ( prev[0].data["overall_r"], prev[0].data["free_r"], restr_job.data["free_r"] - prev[0].data["free_r"])) ####### check blobs and finish ####### if restr_job.data["free_r"] <= BAD_FINAL_RFREE: fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run() coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data) if coot_script: comment("\nTo see it in Coot run %s" % coot_script) else: comment("\nGiving up (Rfree > %g). No blob search." % BAD_FINAL_RFREE) _generate_scripts_and_pictures(wf, opt, None)
def dimple(wf, opt): comment(" ### Dimple v%s. Problems and suggestions:" " ccp4.github.io/dimple ###" % __version__) mtz_meta = wf.read_mtz_metadata(opt.mtz) _comment_summary_line("MTZ (%.1fA)" % mtz_meta.dmax, mtz_meta) if opt.dls_naming: opt.pdbs = dls_name_filter(opt.pdbs) opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir) if not opt.pdbs: comment("\nNo non-empty pdb files given. Nothing to do.") return for p in opt.pdbs: wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1)) if len(opt.pdbs) > 1: comment("\nPDBs in order of similarity (using the first one):") opt.pdbs.sort(key=lambda x: calculate_difference(wf.file_info[x], mtz_meta)) utils.log_value("data_file", opt.mtz) utils.log_value("pdb_files", opt.pdbs) for p in opt.pdbs: _comment_summary_line(os.path.basename(p), wf.file_info[p]) ini_pdb = "ini.pdb" wf.copy_uncompressed(opt.pdbs[0], ini_pdb) pdb_meta = wf.file_info[opt.pdbs[0]] if pdb_meta is None: put_error("PDB file missing CRYST1 record, starting from MR") if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta): if not opt.no_hetatm: comment("\nHETATM marked as element X would choke many programs.") rb_xyzin = "prepared.pdb" wf.temporary_files.add(rb_xyzin) n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin, remove_all=opt.no_hetatm) comment("\nRemoved %d HETATM atoms" % n_het) else: rb_xyzin = ini_pdb # run rwcontents even without CRYST1 - it will show mol. weight only wf.rwcontents(xyzin=rb_xyzin).run() rw_data = wf.jobs[-1].data if pdb_meta is None: pass # we already had a warning message elif rw_data.get('solvent_percent') is None: put_error("rwcontents could not interpret %s" % rb_xyzin) elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT: comment("\nHmm... %.1f%% of solvent or incomplete model" % rw_data['solvent_percent']) if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10: comment("\ndebug: problem when calculating volume?") ####### pointless - reindexing ####### if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and ( 0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4): reindexed_mtz = "pointless.mtz" wf.temporary_files.add(reindexed_mtz) wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz, keys="TOLERANCE 5").run(may_fail=True) alt_reindex = wf.jobs[-1].data.get('alt_reindex') if wf.jobs[-1].exit_status == 0 and alt_reindex: for ar in alt_reindex: comment("\n %-10s CC: %-8.3f cell diff: %.1fA" % ( ar['op'], ar['cc'], ar['cell_deviat'])) else: # until recently (2015) pointless didn't print CC for non-ambiguous # spacegroups (e.g. C2), but now it always prints comment("\n no good indexing") reindexed_mtz = opt.mtz else: reindexed_mtz = opt.mtz reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz) if reindexed_mtz_meta.symmetry != mtz_meta.symmetry: _comment_summary_line('reindexed MTZ', reindexed_mtz_meta) ####### (c)truncate - calculate amplitudes if needed ####### if not opt.fcolumn: opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP' elif opt.icolumn or opt.ItoF_prog: put_error('Ignoring options --fcolumn/--sigfcolumn') opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn) if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns or opt.sigfcolumn not in mtz_meta.columns): f_mtz = "amplit.mtz" wf.temporary_files.add(f_mtz) i_sigi_cols = _find_i_sigi_columns(mtz_meta, opt) if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow): wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz, colin="/*/*/[%s,%s]" % i_sigi_cols).run() else: wf.truncate(hklin=reindexed_mtz, hklout=f_mtz, labin="IMEAN=%s SIGIMEAN=%s" % i_sigi_cols, labout="F=F SIGF=SIGF").run() opt.fcolumn = 'F' opt.sigfcolumn = 'SIGF' else: f_mtz = reindexed_mtz ####### rigid body - check if model is good for refinement? ####### refmac_labin_nofree = "FP=%s SIGFP=%s" % (opt.fcolumn, opt.sigfcolumn) refmac_xyzin = None cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta) if pdb_meta is None: pass # the error message was already printed elif opt.mr_when_r <= 0: comment("\nMR requested unconditionally.") elif cell_diff > 0.1 and opt.mr_when_r < 1: comment("\nDifferent unit cells.") elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry: comment("\nDifferent space groups.") else: comment("\nRigid-body refinement with resolution 3.5 A, 10 cycles.") if 'aa_count' in rw_data and 'water_count' in rw_data: if rw_data['aa_count'] != 0: comment(" %.1f waters/aa." % (rw_data['water_count'] / rw_data['aa_count'])) else: comment(' %d/0 waters/aa.' % rw_data['water_count']) wf.temporary_files |= {"refmacRB.pdb", "refmacRB.mtz"} # it may fail because of "Disagreement between mtz and pdb" wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin, hklout="refmacRB.mtz", xyzout="refmacRB.pdb", labin=refmac_labin_nofree, libin=None, keys="""refinement type rigidbody resolution 15 3.5 rigidbody ncycle 10""").run(may_fail=True) # if the error is caused by mtz/pdb disagreement, continue with MR if wf.jobs[-1].exit_status != 0: comment("\nTry MR.") elif not wf.jobs[-1].data.get("overall_r"): comment("\nWARNING: unknown R factor, something went wrong.\n") refmac_xyzin = "refmacRB.pdb" elif wf.jobs[-1].data["overall_r"] > opt.mr_when_r: comment("\nRun MR for R > %g." % opt.mr_when_r) else: comment("\nNo MR for R < %g." % opt.mr_when_r) refmac_xyzin = "refmacRB.pdb" ####### phaser/molrep - molecular replacement ####### if refmac_xyzin is None: vol_ratio = None if pdb_meta: # num_mol accounts for strict NCS (MTRIX without iGiven) vol_ratio = (mtz_meta.asu_volume() / pdb_meta.asu_volume(rw_data['num_mol'])) comment(" Volume of asu: %.1f%% of model asu." % (100 * vol_ratio)) if opt.mr_when_r >= 1: comment("\nWould try MR, but it is disabled.") return if opt.mr_num: mr_num = opt.mr_num else: mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio) mw = rw_data.get('weight') if isinstance(mr_num, float): wf.ensembler(pdbin=rb_xyzin, root='ens').run() n_models = len(wf.jobs[-1].data['models']) mw = None rb_xyzin = "ens_merged.pdb" mr_num = max(int(round(mr_num * n_models)), 1) # phaser is used by default if number of searched molecules is known if opt.mr_prog == 'molrep': wf.temporary_files |= {"molrep.pdb", "molrep_dimer.pdb", "molrep.crd"} wf.molrep(f=f_mtz, m=rb_xyzin).run() refmac_xyzin = "molrep.pdb" else: wf.temporary_files |= {"phaser.1.pdb", "phaser.1.mtz"} wf.phaser_auto(hklin=f_mtz, labin="F=%s SIGF=%s" % (opt.fcolumn, opt.sigfcolumn), model=dict(pdb=rb_xyzin, identity=100, num=mr_num, mw=mw), sg_alt="ALL", opt=opt, root='phaser').run(may_fail=True) if not _after_phaser_comments(wf.jobs[-1], sg_in=reindexed_mtz_meta.symmetry): return refmac_xyzin = "phaser.1.pdb" f_mtz = "phaser.1.mtz" if False: wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz, f="FC", phi="PHIC", pdbout="prepared_wat.pdb", sigma=2) refmac_xyzin = "prepared_wat.pdb" ####### adding free-R flags ####### f_mtz_meta = wf.read_mtz_metadata(f_mtz) cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps) if opt.free_r_flags: free_mtz = opt.free_r_flags free_col = check_freerflags_column(wf.path(free_mtz), expected_symmetry=pdb_meta, column=opt.freecolumn) comment("\nFree-R flags from the %s file, column %s." % (("reference" if free_mtz != opt.mtz else 'input'), free_col)) else: free_col = DEFAULT_FREE_COLS[0] if free_col in f_mtz_meta.columns: comment("\nReplace free-R flags") else: comment("\nGenerate free-R flags") free_mtz = "free.mtz" wf.temporary_files |= {"unique.mtz", free_mtz} if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG wf.unique(hklout="unique.mtz", ref=f_mtz_meta, resolution=cad_reso).run() else: comment(" (repeatably)") # Here we'd like to have always the same set of free-r flags # for given PDB file. That's why we don't use information # from the data file (mtz). wf.unique(hklout="unique.mtz", ref=pdb_meta, resolution=1.0).run() # CCP4 freerflag uses always the same pseudo-random sequence by default wf.freerflag(hklin="unique.mtz", hklout=free_mtz, keys=("SEED" if opt.seed_freerflag else "")).run() if free_mtz == opt.mtz and opt.reso is None: prepared_mtz = f_mtz else: prepared_mtz = "prepared.mtz" wf.temporary_files.add(prepared_mtz) wf.cad(data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]), (free_mtz, [free_col])], hklout=prepared_mtz, keys=["sysab_keep", # does it matter? "reso overall 1000.0 %g" % cad_reso]).run() freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col) if freerflag_missing: wf.freerflag(hklin=prepared_mtz, hklout="prepared2.mtz", keys="COMPLETE FREE="+free_col, parser=" (again, for %d refl. more)" % freerflag_missing ).run() prepared_mtz = "prepared2.mtz" wf.temporary_files.add(prepared_mtz) ####### refinement ####### if opt.weight: refmac_weight = "matrix %f" % opt.weight else: refmac_weight = "auto" restr_ref_keys = """\ make newligand continue refinement type restrained weight %s """ % refmac_weight if opt.freecolumn_val: restr_ref_keys += "free %s\n" % opt.freecolumn_val refmac_labin = "%s FREE=%s" % (refmac_labin_nofree, free_col) comment("\nRestrained refinement, %d+%d cycles." % (opt.jelly, opt.restr_cycles)) if opt.jelly: wf.temporary_files |= {"jelly.pdb", "jelly.mtz"} wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout="jelly.mtz", xyzout="jelly.pdb", labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys+"ridge distance sigma 0.01\n" "make hydrogen no\n" "ncycle %d" % opt.jelly +opt.extra_ref_keys).run() comment(_refmac_rms_line(wf.jobs[-1].data)) refmac_xyzin = "jelly.pdb" restr_job = wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout=opt.hklout, xyzout=opt.xyzout, labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys+("ncycle %d" % opt.restr_cycles) +opt.extra_ref_keys).run() comment(_refmac_rms_line(restr_job.data)) # if that run is repeated with --from-step it's useful to compare Rfree if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based prev = [j for j in wf.repl_jobs if j.name == restr_job.name] if prev and prev[0].data and "free_r" in prev[0].data: comment("\nPreviously: R/Rfree %.4f/%.4f Rfree change: %+.4f" % ( prev[0].data["overall_r"], prev[0].data["free_r"], restr_job.data["free_r"] - prev[0].data["free_r"])) ####### check blobs and finish ####### if restr_job.data["free_r"] <= BAD_FINAL_RFREE: fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run() coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data) if coot_script: comment("\nTo see it in Coot run %s" % coot_script) else: comment("\nGiving up (Rfree > %g). No blob search." % BAD_FINAL_RFREE) _generate_scripts_and_pictures(wf, opt, None)
def _generate_scripts_and_pictures(wf, opt, data, pha=None): blobs = data['blobs'] if data else [] coot_path = coots.find_path() if not blobs: comment('\nUnmodelled blobs not found.') elif opt.img_format: if coot_path: coot_ver = coots.find_version(coot_path) if coot_ver is None: put_error('coot not working(?), no pictures') opt.img_format = None elif 'with python' not in coot_ver: put_error('coot with Python support is needed') opt.img_format = None else: put_error('No coot, no pictures') opt.img_format = None if not utils.syspath('render'): put_error('No Raster3d, no pictures') opt.img_format = None if opt.img_format: if len(blobs) == 1: comment('\nRendering density blob at (%.1f, %.1f, %.1f)' % blobs[0]) else: comment('\nRendering 2 largest blobs: at (%.1f, %.1f, %.1f) ' 'and at (%.1f, %.1f, %.1f)' % (blobs[0] + blobs[1])) com = data and data.get('center') if pha: normal_map = False refl = pha prefix = 'anom-' else: normal_map = True refl = opt.hklout prefix = '' # run-coot.py centers on the biggest blob. It uses relative paths - # it can be run only from the output directory, but is not affected # by moving that directory to different location. # There are blobN-coot.py scripts generated below with absolute paths. # write coot script (apart from pictures) that centers on the biggest blob script_path = os.path.join(wf.output_dir, prefix + 'run-coot.py') script = coots.basic_script(pdb=opt.xyzout, refl=refl, normal_map=normal_map, center=(blobs and blobs[0]), toward=com, white_bg=opt.white_bg) _write_script(script_path, script, executable=True) # blob images, for now for not more than two blobs d = os.path.abspath(wf.output_dir) for n, b in enumerate(blobs[:2]): py_path = os.path.join(wf.output_dir, '%sblob%d-coot.py' % (prefix, n + 1)) content = coots.basic_script(pdb=os.path.join(d, opt.xyzout), refl=os.path.join(d, refl), normal_map=normal_map, center=blobs[n], toward=com, white_bg=opt.white_bg) _write_script(py_path, content) # coot.sh - one-line script for convenience if blobs: coot_sh_text = '{coot} --no-guano {out}/%sblob1-coot.py\n' % prefix else: coot_sh_text = '{coot} --no-guano {out}/final.mtz {out}/final.pdb\n' coot_sh_path = os.path.join(wf.output_dir, prefix + 'coot.sh') _write_script(coot_sh_path, coot_sh_text.format(coot=coot_path or 'coot', out=wf.output_dir), executable=True) if opt.img_format and blobs: script = '' basenames = [] # as a workaround for buggy coot the maps are reloaded for each blob for n, b in enumerate(blobs[:2]): script += coots.basic_script(pdb=opt.xyzout, refl=refl, normal_map=normal_map, center=b, toward=com, white_bg=opt.white_bg) rs, names = coots.r3d_script(center=b, toward=com, blobname='%sblob%s' % (prefix, n + 1)) script += rs basenames += names coot_job = wf.coot_py(script) try: coot_job.run() except workflow.JobError: # check for a possible cause to hint the user # (possible workaround: change $HOME to non-existing directory) if utils.silently_run(coot_job.args, cwd=wf.output_dir)[0] != 0: put_error('coot fails with options: --no-graphics --python', comment='It happens when scripts in .coot or ' '.coot-preferences are not compatible\n' 'with the --no-graphics mode.') raise for n, basename in enumerate(basenames): try: job = wf.render_r3d(basename, img_format=opt.img_format) if n % 3 == 0: job.run() else: # minimal output job.run(show_progress=False, new_line=False) wf.delete_files([basename + '.r3d']) except workflow.JobError as e: # Raster3D may fail saying "increase MAXDET and recompile". # This is not critical, so Dimple doesn't stop. put_error('Rendering failed, no picture', comment=' ' + e.note) return coot_sh_path
def dimple(wf, opt): comment(' ### Dimple v%s. Problems and suggestions:' ' ccp4.github.io/dimple ###' % __version__) mtz_meta = wf.read_mtz_metadata(opt.mtz) _comment_summary_line('MTZ (%.1fA)' % mtz_meta.dmax, mtz_meta) if opt.dls_naming: opt.pdbs = dls_name_filter(opt.pdbs) opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir) if not opt.pdbs: comment('\nNo non-empty pdb files given. Nothing to do.') return for p in opt.pdbs: wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1)) if len(opt.pdbs) > 1: comment('\nPDBs in order of similarity (using the first one):') opt.pdbs.sort( key=lambda x: calculate_difference(wf.file_info[x], mtz_meta)) utils.log_value('data_file', opt.mtz) utils.log_value('pdb_files', opt.pdbs) for p in opt.pdbs: _comment_summary_line(os.path.basename(p), wf.file_info[p]) ini_pdb = 'ini.pdb' wf.copy_uncompressed(opt.pdbs[0], ini_pdb) pdb_meta = wf.file_info[opt.pdbs[0]] if pdb_meta is None: put_error('PDB file missing CRYST1 record, starting from MR') if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta): if not opt.no_hetatm: comment('\nHETATM marked as element X would choke many programs.') rb_xyzin = 'prepared.pdb' wf.temporary_files.add(rb_xyzin) n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin, remove_all=opt.no_hetatm) comment('\nRemoved %d HETATM atoms' % n_het) else: rb_xyzin = ini_pdb # run rwcontents even without CRYST1 - it will show mol. weight only wf.rwcontents(xyzin=rb_xyzin).run() rw_data = wf.jobs[-1].data if pdb_meta is None: pass # we already had a warning message elif rw_data.get('solvent_percent') is None: put_error('rwcontents could not interpret %s' % rb_xyzin) elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT: comment('\nHmm... %.1f%% of solvent or incomplete model' % rw_data['solvent_percent']) if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10: comment('\ndebug: problem when calculating volume?') ####### pointless - reindexing ####### if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and ( 0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4): reindexed_mtz = 'pointless.mtz' wf.temporary_files.add(reindexed_mtz) wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz, keys='TOLERANCE 5').run(may_fail=True) alt_reindex = wf.jobs[-1].data.get('alt_reindex') if wf.jobs[-1].exit_status == 0 and alt_reindex: for ar in alt_reindex: comment('\n %-10s CC: %-8.3f cell diff: %.1fA' % (ar['op'], ar['cc'], ar['cell_deviat'])) else: # until recently (2015) pointless didn't print CC for non-ambiguous # spacegroups (e.g. C2), but now it always prints comment('\n no good indexing') reindexed_mtz = opt.mtz else: reindexed_mtz = opt.mtz reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz) if reindexed_mtz_meta.symmetry != mtz_meta.symmetry: _comment_summary_line('reindexed MTZ', reindexed_mtz_meta) ####### (c)truncate - calculate amplitudes if needed ####### if not opt.fcolumn: opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP' elif opt.icolumn or opt.ItoF_prog: put_error('Ignoring options --fcolumn/--sigfcolumn') opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn) if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns or opt.sigfcolumn not in mtz_meta.columns): f_mtz = 'amplit.mtz' wf.temporary_files.add(f_mtz) i_sigi_cols = _find_i_sigi_columns(mtz_meta, opt) if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow): colano = None if opt.anode and all( col in mtz_meta.columns for col in ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']): colano = '/*/*/[I(+),SIGI(+),I(-),SIGI(-)]' wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz, colin='/*/*/[%s,%s]' % i_sigi_cols, colano=colano).run() else: wf.truncate(hklin=reindexed_mtz, hklout=f_mtz, labin='IMEAN=%s SIGIMEAN=%s' % i_sigi_cols, labout='F=F SIGF=SIGF').run() opt.fcolumn = 'F' opt.sigfcolumn = 'SIGF' else: f_mtz = reindexed_mtz ####### rigid body - check if model is good for refinement? ####### refmac_labin_nofree = 'FP=%s SIGFP=%s' % (opt.fcolumn, opt.sigfcolumn) refmac_xyzin = None cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta) if pdb_meta is None: pass # the error message was already printed elif opt.mr_when_r <= 0: comment('\nMR requested unconditionally.') elif cell_diff > 0.1 and opt.mr_when_r < 1: comment('\nDifferent unit cells.') elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry: comment('\nDifferent space groups.') else: comment('\nRigid-body refinement with resolution 3.5 A, %d cycles.' % opt.rigid_cycles) if 'aa_count' in rw_data and 'water_count' in rw_data: if rw_data['aa_count'] != 0: comment(' %.1f waters/aa.' % (rw_data['water_count'] / rw_data['aa_count'])) else: comment(' %d/0 waters/aa.' % rw_data['water_count']) wf.temporary_files |= {'refmacRB.pdb', 'refmacRB.mtz'} # it may fail because of "Disagreement between mtz and pdb" wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin, hklout='refmacRB.mtz', xyzout='refmacRB.pdb', labin=refmac_labin_nofree, libin=None, keys="""refinement type rigidbody resolution 15 3.5 rigidbody ncycle %d""" % opt.rigid_cycles).run(may_fail=True) # if the error is caused by mtz/pdb disagreement, continue with MR if wf.jobs[-1].exit_status != 0: comment('\nTry MR.') elif not wf.jobs[-1].data.get('overall_r'): comment('\nWARNING: unknown R factor, something went wrong.\n') refmac_xyzin = 'refmacRB.pdb' elif wf.jobs[-1].data['overall_r'] > opt.mr_when_r: comment('\nRun MR for R > %g.' % opt.mr_when_r) else: comment('\nNo MR for R < %g.' % opt.mr_when_r) refmac_xyzin = 'refmacRB.pdb' ####### phaser/molrep - molecular replacement ####### if refmac_xyzin is None: vol_ratio = None if pdb_meta: # num_mol accounts for strict NCS (MTRIX without iGiven) vol_ratio = (mtz_meta.asu_volume() / pdb_meta.asu_volume(rw_data['num_mol'])) comment(' Volume of asu: %.1f%% of model asu.' % (100 * vol_ratio)) if opt.mr_when_r >= 1: comment('\nWould try MR, but it is disabled.') return if opt.mr_num: mr_num = opt.mr_num else: mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio) mw = rw_data.get('weight') if isinstance(mr_num, float): wf.ensembler(pdbin=rb_xyzin, root='ens').run() n_models = len(wf.jobs[-1].data['models']) mw = None rb_xyzin = 'ens_merged.pdb' mr_num = max(int(round(mr_num * n_models)), 1) # phaser is used by default if number of searched molecules is known if opt.mr_prog == 'molrep': wf.temporary_files |= { 'molrep.pdb', 'molrep_dimer.pdb', 'molrep.crd' } wf.molrep(f=f_mtz, m=rb_xyzin).run() refmac_xyzin = 'molrep.pdb' else: wf.temporary_files |= {'phaser.1.pdb', 'phaser.1.mtz'} wf.phaser_auto(hklin=f_mtz, labin='F=%s SIGF=%s' % (opt.fcolumn, opt.sigfcolumn), model=dict(pdb=rb_xyzin, identity=100, num=mr_num, mw=mw), sg_alt='ALL', opt=opt, root='phaser').run(may_fail=True) if not _after_phaser_comments(wf.jobs[-1], sg_in=reindexed_mtz_meta.symmetry): raise RuntimeError('No phaser solution.') refmac_xyzin = 'phaser.1.pdb' f_mtz = 'phaser.1.mtz' if False: wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz, f='FC', phi='PHIC', pdbout='prepared_wat.pdb', sigma=2) refmac_xyzin = 'prepared_wat.pdb' ####### adding free-R flags ####### f_mtz_meta = wf.read_mtz_metadata(f_mtz) cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps) if opt.free_r_flags: free_mtz = opt.free_r_flags free_col = check_freerflags_column(wf.path(free_mtz), expected_symmetry=pdb_meta, column=opt.freecolumn) comment('\nFree-R flags from the %s file, column %s.' % (('reference' if free_mtz != opt.mtz else 'input'), free_col)) else: free_col = DEFAULT_FREE_COLS[0] if free_col in f_mtz_meta.columns: comment('\nReplace free-R flags') else: comment('\nGenerate free-R flags') free_mtz = 'free.mtz' wf.temporary_files |= {'unique.mtz', free_mtz} if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG wf.unique(hklout='unique.mtz', ref=f_mtz_meta, resolution=cad_reso).run() else: comment(' (repeatably)') # Here we'd like to have always the same set of free-r flags # for given PDB file. That's why we don't use information # from the data file (mtz). wf.unique(hklout='unique.mtz', ref=pdb_meta, resolution=1.0).run() # CCP4 freerflag uses always the same pseudo-random sequence by default wf.freerflag(hklin='unique.mtz', hklout=free_mtz, keys=('SEED' if opt.seed_freerflag else '')).run() if free_mtz == opt.mtz and opt.reso is None: prepared_mtz = f_mtz else: prepared_mtz = 'prepared.mtz' wf.temporary_files.add(prepared_mtz) wf.cad( data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]), (free_mtz, [free_col])], hklout=prepared_mtz, keys=[ 'sysab_keep', # does it matter? 'reso overall 1000.0 %g' % cad_reso ]).run() freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col) if freerflag_missing: wf.freerflag(hklin=prepared_mtz, hklout='prepared2.mtz', keys='COMPLETE FREE=' + free_col, parser=' (again, for %d refl. more)' % freerflag_missing).run() prepared_mtz = 'prepared2.mtz' wf.temporary_files.add(prepared_mtz) ####### refinement ####### if opt.weight: refmac_weight = 'matrix %f' % opt.weight else: refmac_weight = 'auto' restr_ref_keys = """\ make newligand continue refinement type restrained weight %s """ % refmac_weight if opt.freecolumn_val: restr_ref_keys += 'free %s\n' % opt.freecolumn_val refmac_labin = '%s FREE=%s' % (refmac_labin_nofree, free_col) comment('\nRestrained refinement, %d+%d cycles.' % (opt.jelly, opt.restr_cycles)) if opt.jelly: wf.temporary_files |= {'jelly.pdb', 'jelly.mtz'} wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin, hklout='jelly.mtz', xyzout='jelly.pdb', labin=refmac_labin, libin=opt.libin, keys=restr_ref_keys + 'ridge distance sigma 0.01\n' 'make hydrogen no\n' 'ncycle %d' % opt.jelly + opt.extra_ref_keys).run() comment(_refmac_rms_line(wf.jobs[-1].data)) refmac_xyzin = 'jelly.pdb' restr_job = wf.refmac5( hklin=prepared_mtz, xyzin=refmac_xyzin, hklout=opt.hklout, xyzout=opt.xyzout, labin=refmac_labin, libin=opt.libin, keys=(restr_ref_keys + 'ncycle %d' % opt.restr_cycles + opt.extra_ref_keys)).run() comment(_refmac_rms_line(restr_job.data)) # if that run is repeated with --from-step it's useful to compare Rfree if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based prev = [j for j in wf.repl_jobs if j.name == restr_job.name] if prev and prev[0].data and 'free_r' in prev[0].data: comment('\nPreviously: R/Rfree %.4f/%.4f Rfree change: %+.4f' % (prev[0].data['overall_r'], prev[0].data['free_r'], restr_job.data['free_r'] - prev[0].data['free_r'])) ####### check blobs ####### if opt.blob_search: if restr_job.data['free_r'] <= BAD_FINAL_RFREE: fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run() coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data) if coot_script: comment('\nTo see it in Coot run %s' % coot_script) else: comment('\nNo blob search for Rfree > %g.' % BAD_FINAL_RFREE) _generate_scripts_and_pictures(wf, opt, None) if opt.anode: # check if mtz contains I+/- and SIGI+/- column_types = list(reindexed_mtz_meta.columns.values()) if column_types.count('K') != 2 and column_types.count('M') != 2: comment('\nColumns I+/- and SIG+/- not found. Skipping AnoDe.') return anode_name = 'anode' # convert to sca for input to shelxc scaout = anode_name + '.sca' wf.mtz2sca(prepared_mtz, scaout).run() wf.shelxc(scaout, reindexed_mtz_meta.cell, reindexed_mtz_meta.symmetry).run() wf.copy_uncompressed(opt.xyzout, anode_name + '.pdb') anode_job = wf.anode(anode_name).run() wf.temporary_files |= { scaout, anode_name + '.pdb', anode_name + '.hkl', anode_name + '.pha', anode_name + '_sad.cif', anode_name + '_fa.hkl' } cell = Cell(reindexed_mtz_meta.cell, reindexed_mtz_meta.symmetry) # need orthogonal not fractional coordinates to generate coot script anode_job.data['blobs'] = cell.orthogonalize(anode_job.data['xyz']) comment(_anode_anom_peak_lines(anode_job.data)) coot_script = _generate_scripts_and_pictures(wf, opt, anode_job.data, pha=anode_name + '.pha')
def _comment_summary_line(name, meta): comment('\n%-21s %s' % (name, meta or '???'))
def _generate_scripts_and_pictures(wf, opt, data): blobs = data["blobs"] if data else [] if not blobs: comment("\nUnmodelled blobs not found.") elif opt.img_format and _check_picture_tools(): if len(blobs) == 1: comment("\nRendering density blob at (%.1f, %.1f, %.1f)" % blobs[0]) else: comment("\nRendering 2 largest blobs: at (%.1f, %.1f, %.1f) " "and at (%.1f, %.1f, %.1f)" % (blobs[0]+blobs[1])) com = data and data["center"] # run-coot.py centers on the biggest blob. It uses relative paths - # it can be run only from the output directory, but is not affected # by moving that directory to different location. # There are blobN-coot.py scripts generated below with absolute paths. # write coot script (apart from pictures) that centers on the biggest blob script_path = os.path.join(wf.output_dir, "run-coot.py") script = coots.basic_script(pdb=opt.xyzout, mtz=opt.hklout, center=(blobs and blobs[0]), toward=com) _write_script(script_path, script, executable=True) # blob images, for now for not more than two blobs d = os.path.abspath(wf.output_dir) for n, b in enumerate(blobs[:2]): py_path = os.path.join(wf.output_dir, "blob%d-coot.py" % (n+1)) content = coots.basic_script(pdb=os.path.join(d, opt.xyzout), mtz=os.path.join(d, opt.hklout), center=blobs[n], toward=com) _write_script(py_path, content) # coot.sh - one-line script for convenience if blobs: coot_sh_text = '{coot} --no-guano {out}/blob1-coot.py\n' else: coot_sh_text = '{coot} --no-guano {out}/final.mtz {out}/final.pdb\n' coot_sh_path = os.path.join(wf.output_dir, "coot.sh") _write_script(coot_sh_path, coot_sh_text.format(coot=coots.find_path(), out=wf.output_dir), executable=True) if opt.img_format and blobs: script = '' basenames = [] # as a workaround for buggy coot the maps are reloaded for each blob for n, b in enumerate(blobs[:2]): script += coots.basic_script(pdb=opt.xyzout, mtz=opt.hklout, center=b, toward=com) rs, names = coots.r3d_script(b, com, blobname="blob%s"%(n+1)) script += rs basenames += names coot_job = wf.coot_py(script) try: coot_job.run() except workflow.JobError: # check for a possible cause to hint the user # (possible workaround: change $HOME to non-existing directory) if utils.silently_run(coot_job.args, cwd=wf.output_dir)[0] != 0: put_error("coot fails with options: --no-graphics --python", comment="It happens when scripts in .coot or " ".coot-preferences are not compatible\n" "with the --no-graphics mode.") raise for n, basename in enumerate(basenames): job = wf.render_r3d(basename, img_format=opt.img_format) if n % 3 == 0: job.run() else: # minimal output job.run(show_progress=False, new_line=False) wf.delete_files([name+".r3d" for name in basenames]) return coot_sh_path
def _generate_scripts_and_pictures(wf, opt, data): blobs = data["blobs"] if data else [] coot_path = coots.find_path() if not blobs: comment("\nUnmodelled blobs not found.") elif opt.img_format: if coot_path: coot_ver = coots.find_version(coot_path) if coot_ver is None: put_error("coot not working(?), no pictures") opt.img_format = None elif "with python" not in coot_ver: put_error("coot with Python support is needed") opt.img_format = None else: put_error("No coot, no pictures") opt.img_format = None if not utils.syspath("render"): put_error("No Raster3d, no pictures") opt.img_format = None if opt.img_format: if len(blobs) == 1: comment("\nRendering density blob at (%.1f, %.1f, %.1f)" % blobs[0]) else: comment("\nRendering 2 largest blobs: at (%.1f, %.1f, %.1f) " "and at (%.1f, %.1f, %.1f)" % (blobs[0]+blobs[1])) com = data and data["center"] # run-coot.py centers on the biggest blob. It uses relative paths - # it can be run only from the output directory, but is not affected # by moving that directory to different location. # There are blobN-coot.py scripts generated below with absolute paths. # write coot script (apart from pictures) that centers on the biggest blob script_path = os.path.join(wf.output_dir, "run-coot.py") script = coots.basic_script(pdb=opt.xyzout, mtz=opt.hklout, center=(blobs and blobs[0]), toward=com, white_bg=opt.white_bg) _write_script(script_path, script, executable=True) # blob images, for now for not more than two blobs d = os.path.abspath(wf.output_dir) for n, b in enumerate(blobs[:2]): py_path = os.path.join(wf.output_dir, "blob%d-coot.py" % (n+1)) content = coots.basic_script(pdb=os.path.join(d, opt.xyzout), mtz=os.path.join(d, opt.hklout), center=blobs[n], toward=com, white_bg=opt.white_bg) _write_script(py_path, content) # coot.sh - one-line script for convenience if blobs: coot_sh_text = '{coot} --no-guano {out}/blob1-coot.py\n' else: coot_sh_text = '{coot} --no-guano {out}/final.mtz {out}/final.pdb\n' coot_sh_path = os.path.join(wf.output_dir, "coot.sh") _write_script(coot_sh_path, coot_sh_text.format(coot=coot_path or 'coot', out=wf.output_dir), executable=True) if opt.img_format and blobs: script = '' basenames = [] # as a workaround for buggy coot the maps are reloaded for each blob for n, b in enumerate(blobs[:2]): script += coots.basic_script(pdb=opt.xyzout, mtz=opt.hklout, center=b, toward=com, white_bg=opt.white_bg) rs, names = coots.r3d_script(center=b, toward=com, blobname="blob%s" % (n+1)) script += rs basenames += names coot_job = wf.coot_py(script) try: coot_job.run() except workflow.JobError: # check for a possible cause to hint the user # (possible workaround: change $HOME to non-existing directory) if utils.silently_run(coot_job.args, cwd=wf.output_dir)[0] != 0: put_error("coot fails with options: --no-graphics --python", comment="It happens when scripts in .coot or " ".coot-preferences are not compatible\n" "with the --no-graphics mode.") raise for n, basename in enumerate(basenames): job = wf.render_r3d(basename, img_format=opt.img_format) if n % 3 == 0: job.run() else: # minimal output job.run(show_progress=False, new_line=False) wf.delete_files([name+".r3d" for name in basenames]) return coot_sh_path