def show(request, dataset_id): project = current_project(request) dataset = get_dataset_by_id(project, dataset_id) # must be a list, as template needs to iterate over this multiple times processing_stats = list(_get_processing_info(project, dataset)) return render( request, "dataset_info.html", { "dataset": DatasetInfo(dataset), "processing_stats": processing_stats, "refine_results": wrap_refine_results( project.get_datasets_refine_results(dataset) ), "energy": 12.4 / dataset.wavelength, "total_exposure": dataset.exposure_time * dataset.images, "total_rotation": dataset.images * dataset.angle_increment, "corner_resolution": dataset.resolution * 0.75625, "proc_logs": _get_processing_logs(project, dataset), "refine_logs": _get_refine_logs(project, dataset), "site": SITE, "beamline": SITE.get_beamline_info(), }, )
def run_dials(proj, filters, options): # Modules list for HPC env softwares = ["gopresto", versions.DIALS_MOD] jobs = JobsSet("XIA2/DIALS") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(proj, filters, "dials")): outdir, image_file = _get_dataset_params(proj, dset) spg = get_space_group_argument(Tools.DIALS, options["spacegroup"]) if options["cellparam"] != "": cellpar = ",".join(options["cellparam"].split(",")) cellpar = cellpar.replace("(", "").replace(")", "") unit_cell = f"unit_cell={cellpar}" else: unit_cell = "" customdials = options["customdials"] if options["friedel_law"] == "true": friedel = "atom=X" else: friedel = "" dials_commands, cpus = get_xia_dials_commands(spg, unit_cell, customdials, friedel, image_file, dset.images) batch = hpc.new_batch_file( "DIALS", project_script(proj, f"dials_fragmax_part{num}.sh"), project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_out.txt"), project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, ) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( f"mkdir -p {outdir}/dials", f"cd {outdir}/dials", *dials_commands, ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, proj, "dials", dset, batch) jobs.submit()
def processing_form(request): project = current_project(request) default_ligand_tool, ligand_tools = SITE.get_supported_ligand_tools() datasets = sorted(project.get_datasets(), key=lambda d: d.name) return render( request, "data_analysis.html", { "pipelines": SITE.get_supported_pipelines(), "datasets": datasets, "default_ligand_tool": default_ligand_tool, "ligand_tools": ligand_tools, "space_group_systems": by_system(), }, )
def run_xdsapp(project, filters, options): # Modules list for HPC env softwares = ["gopresto", versions.XDSAPP_MOD] jobs = JobsSet("XDSAPP") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(project, filters, "xdsapp")): outdir, image_file = _get_dataset_params(project, dset) if options["spacegroup"] is not None: cellpar = " ".join(options["cellparam"].split(",")) spacegroup = get_space_group_argument(Tools.XDSAPP, options["spacegroup"]) spg = f"--spacegroup='{spacegroup} {cellpar}'" else: spg = "" customxdsapp = options["customxdsapp"] if options["friedel_law"] == "true": friedel = "--fried=True" else: friedel = "--fried=False" xdsapp_command, cpus = get_xdsapp_command(outdir, spg, customxdsapp, friedel, image_file, dset.images) batch = hpc.new_batch_file( "XDSAPP", project_script(project, f"xdsapp_fragmax_part{num}.sh"), project_log_path(project, f"multi_xdsapp_{epoch}_%j_out.txt"), project_log_path(project, f"multi_xdsapp_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, ) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( f"mkdir -p {outdir}/xdsapp", f"cd {outdir}/xdsapp", xdsapp_command, ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, project, "xdsapp", dset, batch) jobs.submit()
def _save_pdb(project: Project, pdb_id, filename, pdb_data): name = path.splitext(filename)[0] nohet_filename = f"{name}_noHETATM.pdb" noanisou_filename = f"{name}_noANISOU.pdb" nohetanisou_filename = f"{name}_noANISOU_noHETATM.pdb" txc_filename = f"{name}_txc.pdb" orig_pdb = _add_pdb_entry(project, filename, pdb_id) nohet_pdb = _add_pdb_entry(project, nohet_filename, pdb_id) noanisou_pdb = _add_pdb_entry(project, noanisou_filename, pdb_id) nohetnoanisou_pdb = _add_pdb_entry(project, nohetanisou_filename, pdb_id) # write original pdb file 'as-is' to models folder with open_proj_file(project, project.get_pdb_file(orig_pdb)) as dest: dest.write(pdb_data) # filter out all non-ATOM entries from pdb and write it as *_noHETATM.pdb with open_proj_file(project, project.get_pdb_file(nohetnoanisou_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"HETATM") or not line.startswith(b"ANISOU"): dest.write(line) with open_proj_file(project, project.get_pdb_file(nohet_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"HETATM"): dest.write(line) with open_proj_file(project, project.get_pdb_file(noanisou_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"ANISOU"): dest.write(line) n_chains = pdb_chains(pdb_data.splitlines(keepends=True)) if n_chains > 1: txc_pdb = _add_pdb_entry(project, txc_filename, pdb_id) input_pdb_name = path.join(project.models_dir, f"{name}.pdb") jobs = JobsSet("phenix ensembler") batch = SITE.get_hpc_runner().new_batch_file( "phenix ensembler", project_script(project, "phenix_ensembler.sh"), project_syslog_path(project, "phenix_ensembler_%j.out"), project_syslog_path(project, "phenix_ensembler_%j.err"), ) batch.load_modules(["gopresto", PHENIX_MOD]) batch.add_commands( f"cd {project.models_dir}", f"phenix.ensembler {input_pdb_name} trim=TRUE output.location='{project.models_dir}'", f"mv {project.models_dir}/ensemble_merged.pdb {project.get_pdb_file(txc_pdb)}", ) batch.save() jobs.add_job(batch) jobs.submit()
def _setup_project_folders(project: Project): # # create the project root directory and make sure it: # # - it's owner group is set to the proposal group # - the SETGID bit is set # # this ownership and permission makes all the files created under # the project folder accessible to all users in the proposal group project_dir = project.project_dir # make the root directory project_dir.mkdir() # look-up proposal group ID proposal_group = grp.getgrnam(SITE.get_group_name(project)) # set owner group os.chown(project_dir, -1, proposal_group.gr_gid) # make sure SETGID bit is set os.chmod( project_dir, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_ISGID | stat.S_IRGRP | stat.S_IWGRP | stat.S_IXGRP, ) # create misc project subdirectories project.logs_dir.mkdir() project.system_logs_dir.mkdir() project.scripts_dir.mkdir() project.process_dir.mkdir() project.results_dir.mkdir() project.models_dir.mkdir() # copy our help scripts to project's directory _copy_scripts(project)
def run_autoproc(proj, filters, options): # Modules list for HPC env softwares = [ "gopresto", versions.CCP4_MOD, versions.AUTOPROC_MOD, versions.DURIN_MOD, ] jobs = JobsSet("autoPROC") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(proj, filters, "autoproc")): batch = hpc.new_batch_file( "autoPROC", project_script(proj, f"autoproc_fragmax_part{num}.sh"), project_log_path(proj, f"multi_autoproc_{epoch}_%j_out.txt"), project_log_path(proj, f"multi_autoproc_{epoch}_%j_err.txt"), ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, cpus_per_task=64, memory=DataSize(gigabyte=300), ) batch.purge_modules() batch.load_modules(softwares) outdir, h5master, sample, num_images = _get_dataset_params(proj, dset) if options["spacegroup"] != "": spacegroup = options["spacegroup"] spg = f"symm='{spacegroup}'" else: spg = "" if options["cellparam"] != "": cellpar = " ".join(options["cellparam"].split(",")) cellpar = cellpar.replace("(", "").replace(")", "") unit_cell = f"cell='{cellpar}'" else: unit_cell = "" customautoproc = options["customautoproc"] if options["friedel_law"] == "true": friedel = "-ANO" else: friedel = "-noANO" batch.add_commands( f"rm -rf {outdir}/autoproc", f"mkdir -p {outdir}", f"cd {outdir}", get_autoproc_command(outdir, spg, unit_cell, customautoproc, friedel, h5master, num_images), ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, proj, "autoproc", dset, batch) jobs.submit()
def _get_pipe_cmds(): return SITE.get_pipeline_commands()
def launch_refine_jobs( project: Project, filters, pdb_file, space_group, run_aimless, refine_tool, refine_tool_commands, cpus, ): epoch = round(time.time()) jobs = JobsSet("Refine") hpc = SITE.get_hpc_runner() for dset in get_refine_datasets(project, filters, refine_tool): for tool, input_mtz in _find_input_mtzs(project, dset): batch = hpc.new_batch_file( f"refine {tool} {dset.name}", project_script(project, f"refine_{tool}_{refine_tool}_{dset.name}.sh"), project_log_path( project, f"refine_{tool}_{dset.name}_{epoch}_%j_out.txt"), project_log_path( project, f"refine_{tool}_{dset.name}_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=12), nodes=1, mem_per_cpu=DataSize(gigabyte=5), ) batch.add_commands(crypt_shell.crypt_cmd(project)) batch.assign_variable("WORK_DIR", "`mktemp -d`") batch.add_commands( "cd $WORK_DIR", crypt_shell.fetch_file(project, pdb_file, "model.pdb"), crypt_shell.fetch_file(project, input_mtz, "input.mtz"), ) # TODO: load tool specific modules? batch.load_modules(HPC_MODULES) if run_aimless: batch.add_commands( _aimless_cmd(space_group.short_name, "input.mtz")) results_dir = Path(project.get_dataset_results_dir(dset), tool) batch.add_commands( *refine_tool_commands, _upload_result_cmd(project, results_dir), "cd", "rm -rf $WORK_DIR", ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, project, refine_tool, dset, batch) jobs.submit()
def _write_main_script( project: Project, pandda_dir: Path, method, methodshort, options ): epoch = round(time.time()) log_prefix = project_log_path(project, f"PanDDA_{method}_{epoch}_%j_") pandda_script = project_script(project, PANDDA_WORKER) giant_cluster = "/mxn/groups/biomax/wmxsoft/pandda/bin/giant.datasets.cluster" if options["reprocessZmap"]: pandda_cluster = "" else: pandda_cluster = f"{giant_cluster} ./*/final.pdb pdb_label=foldername" hpc = SITE.get_hpc_runner() batch = hpc.new_batch_file( f"PDD{methodshort}", project_script(project, f"pandda_{method}.sh"), f"{log_prefix}out.txt", f"{log_prefix}err.txt", cpus=40, ) batch.set_options( time=Duration(hours=99), exclusive=True, nodes=1, ) if project.encrypted: # TODO: implement this? raise NotImplementedError("pandda for encrypted projects") # batch.add_command(crypt_shell.crypt_cmd(project)) # batch.assign_variable("WORK_DIR", "`mktemp -d`") # batch.add_commands( # "cd $WORK_DIR", crypt_shell.fetch_dir(project, data_dir, ".") # ) # # batch.load_modules(["gopresto", versions.CCP4_MOD, versions.PYMOL_MOD]) # batch.add_commands( # pandda_cluster, # f'python {pandda_script} . {project.protein} "{options}"', # crypt_shell.upload_dir( # project, "$WORK_DIR/pandda", path.join(data_dir, "pandda") # ), # crypt_shell.upload_dir( # project, # "$WORK_DIR/clustered-datasets", # path.join(data_dir, "clustered-datasets"), # ), # ) else: batch.add_command(f"cd {pandda_dir}") add_pandda_init_commands(batch) batch.add_commands( pandda_cluster, f'python {pandda_script} {pandda_dir} {project.protein} "{options}"', f"chmod -R 777 {project.pandda_dir}", ) # add commands to fix symlinks ln_command = '\'ln -f "$(readlink -m "$0")" "$0"\' {} \\;' batch.add_commands( f"cd {project.pandda_dir}; find -type l -iname *-pandda-input.* -exec bash -c {ln_command}", f"cd {project.pandda_dir}; find -type l -iname *pandda-model.pdb -exec bash -c {ln_command}", ) batch.save() return batch
def _write_prepare_script( project: Project, rn, method, dataset, pdb, mtz, resHigh, free_r_flag, native_f, sigma_fp, cif_method, ): epoch = round(time.time()) output_dir = Path(project.pandda_method_dir(method), dataset.name) hpc = SITE.get_hpc_runner() batch = hpc.new_batch_file( f"PnD{rn}", project_script(project, f"pandda_prepare_{method}_{dataset.name}.sh"), project_log_path(project, f"{dataset.name}_PanDDA_{epoch}_%j_out.txt"), project_log_path(project, f"{dataset.name}_PanDDA_{epoch}_%j_err.txt"), cpus=1, ) batch.set_options(time=Duration(minutes=15), memory=DataSize(gigabyte=5)) batch.add_command(crypt_shell.crypt_cmd(project)) batch.assign_variable("DEST_DIR", output_dir) batch.assign_variable("WORK_DIR", "`mktemp -d`") batch.add_commands( "cd $WORK_DIR", crypt_shell.fetch_file(project, pdb, "final.pdb"), crypt_shell.fetch_file(project, mtz, "final.mtz"), ) batch.purge_modules() batch.load_modules( ["gopresto", versions.PHENIX_MOD, versions.CCP4_MOD, versions.BUSTER_MOD] ) if not dataset.crystal.is_apo(): fragment = get_crystals_fragment(dataset.crystal) # non-apo crystal should have a fragment assert fragment if cif_method == "elbow": cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output=$WORK_DIR/{fragment.code} --opt\n" else: assert cif_method == "grade" cif_cmd = ( f"grade '{fragment.smiles}' -ocif $WORK_DIR/{fragment.code}.cif " f"-opdb $WORK_DIR/{fragment.code}.pdb -nomogul\n" ) batch.add_command(cif_cmd) batch.add_commands( f'printf "monitor BRIEF\\n labin file 1 -\\n ALL\\n resolution file 1 999.0 {resHigh}\\n" | \\\n' " cad hklin1 $WORK_DIR/final.mtz hklout $WORK_DIR/final.mtz", "uniqueify -f FreeR_flag $WORK_DIR/final.mtz $WORK_DIR/final.mtz", f'printf "COMPLETE FREE={free_r_flag} \\nEND\\n" | \\\n' " freerflag hklin $WORK_DIR/final.mtz hklout $WORK_DIR/final_rfill.mtz", f"phenix.maps final_rfill.mtz final.pdb maps.input.reflection_data.labels='{native_f},{sigma_fp}'", "mv final.mtz final_original.mtz", "mv final_map_coeffs.mtz final.mtz", "rm -rf $DEST_DIR", crypt_shell.upload_dir(project, "$WORK_DIR", "$DEST_DIR"), "rm -rf $WORK_DIR", ) batch.save() return batch
def auto_ligand_fit(project, useLigFit, useRhoFit, filters, cifMethod, custom_ligfit, custom_rhofit): # Modules for HPC env softwares = ["gopresto", versions.BUSTER_MOD, versions.PHENIX_MOD] jobs = JobsSet("Ligand Fit") hpc = SITE.get_hpc_runner() refine_results = _get_refine_results(project, filters, useLigFit, useRhoFit) for num, result in enumerate(refine_results): dataset = result.dataset if dataset.crystal.is_apo(): # don't try to fit ligand to an apo crystal continue fragment = get_crystals_fragment(dataset.crystal) result_dir = project.get_refine_result_dir(result) pdb = Path(result_dir, "final.pdb") clear_tmp_cmd = "" cif_out = Path(result_dir, fragment.code) if cifMethod == "elbow": cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output={cif_out}\n" elif cifMethod == "acedrg": cif_cmd = f"acedrg -i '{fragment.smiles}' -o {cif_out}\n" clear_tmp_cmd = f"rm -rf {cif_out}_TMP/\n" elif cifMethod == "grade": cif_cmd = ( f"rm -f {cif_out}.cif {cif_out}.pdb\n" f"grade '{fragment.smiles}' -ocif {cif_out}.cif -opdb {cif_out}.pdb -nomogul\n" ) else: cif_cmd = "" rhofit_cmd = "" ligfit_cmd = "" ligCIF = f"{cif_out}.cif" ligPDB = f"{cif_out}.pdb" rhofit_outdir = Path(result_dir, "rhofit") ligfit_outdir = Path(result_dir, "ligfit") mtz_input = Path(result_dir, "final.mtz") if useRhoFit: if rhofit_outdir.exists(): rhofit_cmd += f"rm -rf {rhofit_outdir}\n" rhofit_cmd += f"rhofit -l {ligCIF} -m {mtz_input} -p {pdb} -d {rhofit_outdir} {custom_rhofit}\n" if useLigFit: if ligfit_outdir.exists(): ligfit_cmd += f"rm -rf {ligfit_outdir}\n" ligfit_cmd += f"mkdir -p {ligfit_outdir}\n" ligfit_cmd += f"cd {ligfit_outdir} \n" ligfit_cmd += ( f"phenix.ligandfit data={mtz_input} model={pdb} ligand={ligPDB} " f"fill=True clean_up=True {custom_ligfit}\n") batch = hpc.new_batch_file( "autoLigfit", project_script(project, f"autoligand_{dataset.name}_{num}.sh"), project_log_path(project, "auto_ligfit_%j_out.txt"), project_log_path(project, "auto_ligfit_%j_err.txt"), cpus=1, ) batch.set_options(time=Duration(hours=1)) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( cif_cmd, rhofit_cmd, ligfit_cmd, ) batch.add_commands(clear_tmp_cmd) batch.save() jobs.add_job(batch) # NOTE: all the update commands needs to be chained to run after each other, # due to limitations (bugs!) in jobsd handling of 'run_after' dependencies. # Currently it does not work to specify that multiple jobs should be run after # a job is finished. # if useRhoFit: batch = add_update_job(jobs, hpc, project, "rhofit", dataset, batch) if useLigFit: add_update_job(jobs, hpc, project, "ligandfit", dataset, batch) jobs.submit()
def run_sbatch(sbatch_script, sbatch_options=None): SITE.get_hpc_runner().run_batch(sbatch_script, sbatch_options)