def add_update_job( jobs_set: JobsSet, hpc, project: Project, tool, dataset, dataset_batch ): batch = hpc.new_batch_file( f"update results for {dataset.name}", "./manage.py", project_syslog_path(project, "update_dataset_results-%j.stdout"), project_syslog_path(project, "update_dataset_results-%j.stderr"), ) jobs_set.add_job( batch, ["update", f"{project.id}", tool, f"{dataset.id}"], run_after=[dataset_batch], run_on=JobsSet.Destination.LOCAL, ) return batch
def pandda_worker(project: Project, method, methodshort, options, cif_method): rn = str(randint(10000, 99999)) prepare_scripts = [] proc_tool, refine_tool = method.split("_") refine_results = _get_best_results(project, proc_tool, refine_tool) selection = PanddaSelectedDatasets() for refine_result in refine_results: res_dir = project.get_refine_result_dir(refine_result) final_pdb = Path(res_dir, "final.pdb") final_mtz = Path(res_dir, "final.mtz") selection.add(refine_result.dataset.name, final_pdb) res_high, free_r_flag, native_f, sigma_fp = read_info(project, str(final_mtz)) script = _write_prepare_script( project, rn, method, refine_result.dataset, final_pdb, final_mtz, res_high, free_r_flag, native_f, sigma_fp, cif_method, ) prepare_scripts.append(script) pandda_dir = Path(project.pandda_dir, method) pandda_dir.mkdir(parents=True, exist_ok=True) selection.save(Path(pandda_dir)) main_script = _write_main_script(project, pandda_dir, method, methodshort, options) # # submit all pandda script to the HPC # jobs = JobsSet("PanDDa") for prep_script in prepare_scripts: jobs.add_job(prep_script) jobs.add_job(main_script, run_after=prepare_scripts) jobs.submit()
def run_dials(proj, filters, options): # Modules list for HPC env softwares = ["gopresto", versions.DIALS_MOD] jobs = JobsSet("XIA2/DIALS") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(proj, filters, "dials")): outdir, image_file = _get_dataset_params(proj, dset) spg = get_space_group_argument(Tools.DIALS, options["spacegroup"]) if options["cellparam"] != "": cellpar = ",".join(options["cellparam"].split(",")) cellpar = cellpar.replace("(", "").replace(")", "") unit_cell = f"unit_cell={cellpar}" else: unit_cell = "" customdials = options["customdials"] if options["friedel_law"] == "true": friedel = "atom=X" else: friedel = "" dials_commands, cpus = get_xia_dials_commands(spg, unit_cell, customdials, friedel, image_file, dset.images) batch = hpc.new_batch_file( "DIALS", project_script(proj, f"dials_fragmax_part{num}.sh"), project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_out.txt"), project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, ) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( f"mkdir -p {outdir}/dials", f"cd {outdir}/dials", *dials_commands, ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, proj, "dials", dset, batch) jobs.submit()
def run_xdsapp(project, filters, options): # Modules list for HPC env softwares = ["gopresto", versions.XDSAPP_MOD] jobs = JobsSet("XDSAPP") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(project, filters, "xdsapp")): outdir, image_file = _get_dataset_params(project, dset) if options["spacegroup"] is not None: cellpar = " ".join(options["cellparam"].split(",")) spacegroup = get_space_group_argument(Tools.XDSAPP, options["spacegroup"]) spg = f"--spacegroup='{spacegroup} {cellpar}'" else: spg = "" customxdsapp = options["customxdsapp"] if options["friedel_law"] == "true": friedel = "--fried=True" else: friedel = "--fried=False" xdsapp_command, cpus = get_xdsapp_command(outdir, spg, customxdsapp, friedel, image_file, dset.images) batch = hpc.new_batch_file( "XDSAPP", project_script(project, f"xdsapp_fragmax_part{num}.sh"), project_log_path(project, f"multi_xdsapp_{epoch}_%j_out.txt"), project_log_path(project, f"multi_xdsapp_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, ) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( f"mkdir -p {outdir}/xdsapp", f"cd {outdir}/xdsapp", xdsapp_command, ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, project, "xdsapp", dset, batch) jobs.submit()
def _save_pdb(project: Project, pdb_id, filename, pdb_data): name = path.splitext(filename)[0] nohet_filename = f"{name}_noHETATM.pdb" noanisou_filename = f"{name}_noANISOU.pdb" nohetanisou_filename = f"{name}_noANISOU_noHETATM.pdb" txc_filename = f"{name}_txc.pdb" orig_pdb = _add_pdb_entry(project, filename, pdb_id) nohet_pdb = _add_pdb_entry(project, nohet_filename, pdb_id) noanisou_pdb = _add_pdb_entry(project, noanisou_filename, pdb_id) nohetnoanisou_pdb = _add_pdb_entry(project, nohetanisou_filename, pdb_id) # write original pdb file 'as-is' to models folder with open_proj_file(project, project.get_pdb_file(orig_pdb)) as dest: dest.write(pdb_data) # filter out all non-ATOM entries from pdb and write it as *_noHETATM.pdb with open_proj_file(project, project.get_pdb_file(nohetnoanisou_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"HETATM") or not line.startswith(b"ANISOU"): dest.write(line) with open_proj_file(project, project.get_pdb_file(nohet_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"HETATM"): dest.write(line) with open_proj_file(project, project.get_pdb_file(noanisou_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"ANISOU"): dest.write(line) n_chains = pdb_chains(pdb_data.splitlines(keepends=True)) if n_chains > 1: txc_pdb = _add_pdb_entry(project, txc_filename, pdb_id) input_pdb_name = path.join(project.models_dir, f"{name}.pdb") jobs = JobsSet("phenix ensembler") batch = SITE.get_hpc_runner().new_batch_file( "phenix ensembler", project_script(project, "phenix_ensembler.sh"), project_syslog_path(project, "phenix_ensembler_%j.out"), project_syslog_path(project, "phenix_ensembler_%j.err"), ) batch.load_modules(["gopresto", PHENIX_MOD]) batch.add_commands( f"cd {project.models_dir}", f"phenix.ensembler {input_pdb_name} trim=TRUE output.location='{project.models_dir}'", f"mv {project.models_dir}/ensemble_merged.pdb {project.get_pdb_file(txc_pdb)}", ) batch.save() jobs.add_job(batch) jobs.submit()
def run_autoproc(proj, filters, options): # Modules list for HPC env softwares = [ "gopresto", versions.CCP4_MOD, versions.AUTOPROC_MOD, versions.DURIN_MOD, ] jobs = JobsSet("autoPROC") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(proj, filters, "autoproc")): batch = hpc.new_batch_file( "autoPROC", project_script(proj, f"autoproc_fragmax_part{num}.sh"), project_log_path(proj, f"multi_autoproc_{epoch}_%j_out.txt"), project_log_path(proj, f"multi_autoproc_{epoch}_%j_err.txt"), ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, cpus_per_task=64, memory=DataSize(gigabyte=300), ) batch.purge_modules() batch.load_modules(softwares) outdir, h5master, sample, num_images = _get_dataset_params(proj, dset) if options["spacegroup"] != "": spacegroup = options["spacegroup"] spg = f"symm='{spacegroup}'" else: spg = "" if options["cellparam"] != "": cellpar = " ".join(options["cellparam"].split(",")) cellpar = cellpar.replace("(", "").replace(")", "") unit_cell = f"cell='{cellpar}'" else: unit_cell = "" customautoproc = options["customautoproc"] if options["friedel_law"] == "true": friedel = "-ANO" else: friedel = "-noANO" batch.add_commands( f"rm -rf {outdir}/autoproc", f"mkdir -p {outdir}", f"cd {outdir}", get_autoproc_command(outdir, spg, unit_cell, customautoproc, friedel, h5master, num_images), ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, proj, "autoproc", dset, batch) jobs.submit()
def launch_refine_jobs( project: Project, filters, pdb_file, space_group, run_aimless, refine_tool, refine_tool_commands, cpus, ): epoch = round(time.time()) jobs = JobsSet("Refine") hpc = SITE.get_hpc_runner() for dset in get_refine_datasets(project, filters, refine_tool): for tool, input_mtz in _find_input_mtzs(project, dset): batch = hpc.new_batch_file( f"refine {tool} {dset.name}", project_script(project, f"refine_{tool}_{refine_tool}_{dset.name}.sh"), project_log_path( project, f"refine_{tool}_{dset.name}_{epoch}_%j_out.txt"), project_log_path( project, f"refine_{tool}_{dset.name}_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=12), nodes=1, mem_per_cpu=DataSize(gigabyte=5), ) batch.add_commands(crypt_shell.crypt_cmd(project)) batch.assign_variable("WORK_DIR", "`mktemp -d`") batch.add_commands( "cd $WORK_DIR", crypt_shell.fetch_file(project, pdb_file, "model.pdb"), crypt_shell.fetch_file(project, input_mtz, "input.mtz"), ) # TODO: load tool specific modules? batch.load_modules(HPC_MODULES) if run_aimless: batch.add_commands( _aimless_cmd(space_group.short_name, "input.mtz")) results_dir = Path(project.get_dataset_results_dir(dset), tool) batch.add_commands( *refine_tool_commands, _upload_result_cmd(project, results_dir), "cd", "rm -rf $WORK_DIR", ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, project, refine_tool, dset, batch) jobs.submit()
def auto_ligand_fit(project, useLigFit, useRhoFit, filters, cifMethod, custom_ligfit, custom_rhofit): # Modules for HPC env softwares = ["gopresto", versions.BUSTER_MOD, versions.PHENIX_MOD] jobs = JobsSet("Ligand Fit") hpc = SITE.get_hpc_runner() refine_results = _get_refine_results(project, filters, useLigFit, useRhoFit) for num, result in enumerate(refine_results): dataset = result.dataset if dataset.crystal.is_apo(): # don't try to fit ligand to an apo crystal continue fragment = get_crystals_fragment(dataset.crystal) result_dir = project.get_refine_result_dir(result) pdb = Path(result_dir, "final.pdb") clear_tmp_cmd = "" cif_out = Path(result_dir, fragment.code) if cifMethod == "elbow": cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output={cif_out}\n" elif cifMethod == "acedrg": cif_cmd = f"acedrg -i '{fragment.smiles}' -o {cif_out}\n" clear_tmp_cmd = f"rm -rf {cif_out}_TMP/\n" elif cifMethod == "grade": cif_cmd = ( f"rm -f {cif_out}.cif {cif_out}.pdb\n" f"grade '{fragment.smiles}' -ocif {cif_out}.cif -opdb {cif_out}.pdb -nomogul\n" ) else: cif_cmd = "" rhofit_cmd = "" ligfit_cmd = "" ligCIF = f"{cif_out}.cif" ligPDB = f"{cif_out}.pdb" rhofit_outdir = Path(result_dir, "rhofit") ligfit_outdir = Path(result_dir, "ligfit") mtz_input = Path(result_dir, "final.mtz") if useRhoFit: if rhofit_outdir.exists(): rhofit_cmd += f"rm -rf {rhofit_outdir}\n" rhofit_cmd += f"rhofit -l {ligCIF} -m {mtz_input} -p {pdb} -d {rhofit_outdir} {custom_rhofit}\n" if useLigFit: if ligfit_outdir.exists(): ligfit_cmd += f"rm -rf {ligfit_outdir}\n" ligfit_cmd += f"mkdir -p {ligfit_outdir}\n" ligfit_cmd += f"cd {ligfit_outdir} \n" ligfit_cmd += ( f"phenix.ligandfit data={mtz_input} model={pdb} ligand={ligPDB} " f"fill=True clean_up=True {custom_ligfit}\n") batch = hpc.new_batch_file( "autoLigfit", project_script(project, f"autoligand_{dataset.name}_{num}.sh"), project_log_path(project, "auto_ligfit_%j_out.txt"), project_log_path(project, "auto_ligfit_%j_err.txt"), cpus=1, ) batch.set_options(time=Duration(hours=1)) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( cif_cmd, rhofit_cmd, ligfit_cmd, ) batch.add_commands(clear_tmp_cmd) batch.save() jobs.add_job(batch) # NOTE: all the update commands needs to be chained to run after each other, # due to limitations (bugs!) in jobsd handling of 'run_after' dependencies. # Currently it does not work to specify that multiple jobs should be run after # a job is finished. # if useRhoFit: batch = add_update_job(jobs, hpc, project, "rhofit", dataset, batch) if useLigFit: add_update_job(jobs, hpc, project, "ligandfit", dataset, batch) jobs.submit()