def preface_predict_workflow( configs: dict, sample_id: str, afterok: int, slurm_api: SlurmAPI, dry_run: bool = False, ): """Run the preface predict workflow""" LOG.info("Running the preface predict workflow") out_dir = configs["out"] singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) preface_predict_cmd = get_preface_predict_cmd( singularity=singularity, out_dir=out_dir, model_dir=configs["preface"]["model_dir"], sample_id=sample_id, ) jobid = slurm_api.run_job( name=f"preface_predict-{sample_id}", command=preface_predict_cmd, afterok=[afterok], dry_run=dry_run, ) return jobid
def real_slurm_api_fixture(configs, out_dir): """Return a real slurm API""" _api = SlurmAPI( slurm_settings=configs["slurm"], out_dir=out_dir, ) return _api
def cleanup_workflow( configs: dict, sample_outdir: Path, sample_id: str, afterok: list, slurm_api: SlurmAPI, dry_run: bool = False, ) -> int: """Run the workflow to compress an analysis folder""" out_dir = configs["out"] cleanup_cmd = get_cleanup_cmd( out_dir=out_dir, sample_outdir=sample_outdir, sample_id=sample_id, ) singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) multiqc_cmd = get_multiqc_cmd(singularity=singularity, input_dir=sample_outdir, out_dir=sample_outdir) jobid = slurm_api.run_job( name=f"cleanup-{sample_id}", command="\n".join([multiqc_cmd, cleanup_cmd]), afterok=afterok, dry_run=dry_run, ) return jobid
def align_individual( configs: dict, sample: dict, slurm_api: SlurmAPI, dry_run: bool = False, ): """Align a individual with bwa on slurm""" out_dir = configs["out"] sample_id = sample["sample_id"] LOG.info("Aligning reads for %s", sample_id) single_end = sample["single_end"] fastq = sample["fastq"] if single_end: run_bwa = align_and_convert_single_end(config=configs, fastq=fastq[0], out=out_dir, sample_id=sample_id) else: run_bwa = align_and_convert_paired_end(config=configs, fastq=fastq, out=out_dir, sample_id=sample_id) align_jobid = slurm_api.run_job(name=f"bwaAln-{sample_id}", command=run_bwa, dry_run=dry_run) return align_jobid
def test_init(): """Test to initialize a slurm api""" # GIVEN a acount, a time, an out dir account = "my_account" time = "5:00:00" out_dir = Path("tests/fixtures/output") # WHEN instantiating a slurm api api = SlurmAPI(slurm_settings={"account":account, "time":time}, out_dir=out_dir) # THEN assert it is setup correct assert api.account == account assert api.log_dir assert api.scripts_dir
def make_reference(samples: Iterator[dict], configs: dict, slurm_api: SlurmAPI, dry_run: bool = None) -> int: """Create a reference based on some samples""" out_dir = configs["out"] jobids = [] for sample in samples: sample_id = sample["sample_id"] sample_outdir = configs["out"] / sample_id # This will fail if dir already exists if not dry_run: sample_outdir.mkdir(parents=True) slurm_api.slurm_settings["ntasks"] = configs["align"]["ntasks"] slurm_api.slurm_settings["mem"] = configs["align"]["mem"] align_jobid = align_individual(configs=configs, sample=sample, slurm_api=slurm_api, dry_run=dry_run) jobids.append(align_jobid) slurm_api.slurm_settings["ntasks"] = configs["slurm"]["ntasks"] slurm_api.slurm_settings["mem"] = configs["slurm"]["mem"] singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) mkref_cmd = get_mkref_cmd( singularity=singularity, out=str(out_dir), testbinsize=configs["wisecondorx"]["testbinsize"], prefacebinsize=configs["wisecondorx"]["prefacebinsize"], ) jobid = slurm_api.run_job( name="wcxmkref", command=mkref_cmd, afterok=jobids, dry_run=dry_run, ) slurm_api.slurm_settings["time"] = "1:00:00" pipe_complete(configs=configs, afterok=jobid, slurm_api=slurm_api, dry_run=dry_run) pipe_fail(configs=configs, slurm_api=slurm_api, dry_run=dry_run, afternotok=jobid) return jobid
def pipe_fail( configs: dict, slurm_api: SlurmAPI, afternotok: int, dry_run: bool = False, ) -> int: """Run Sed to update the analysis run flag""" out_dir = configs["out"] sed_replace_inplace_cmd = sed_replace_inplace( in_filename= str(out_dir / "analysis_status.json"),project_dir=str(out_dir),flag="FAIL", find_str=" \"running\"", replace_str=" \"fail\"" ) jobid = slurm_api.run_job( name="fluffy-fail", command=sed_replace_inplace_cmd, afternotok=[afternotok], dry_run=dry_run, ) return jobid
def wisecondor_xtest_workflow( configs: dict, sample_id: str, afterok: int, slurm_api: SlurmAPI, dry_run: bool = False, ): """Run the wisecondor x test workflow""" out_dir = configs["out"] run_wcx_pipe = wisecondor_x_test(configs=configs, out_dir=out_dir, sample_id=sample_id) jobid = slurm_api.run_job( name=f"wcx-{sample_id}", command=run_wcx_pipe, afterok=[afterok], dry_run=dry_run, ) return jobid
def estimate_ffy( configs: dict, sample_id: str, afterok: int, slurm_api: SlurmAPI, dry_run: bool = False, ) -> int: """Run the estimate fetal fraction with AMYCNE""" LOG.info("Running the estimate fetal fraction with AMYCNE workflow") out_dir = configs["out"] fetal_fraction_pipe = amycne_ffy(configs=configs, out_dir=out_dir, sample_id=sample_id) jobid = slurm_api.run_job( name=f"amycne-{sample_id}", command=fetal_fraction_pipe, afterok=[afterok], dry_run=dry_run, ) return jobid
def picard_qc_workflow( configs: dict, sample_id: str, afterok: int, slurm_api: SlurmAPI, dry_run: bool = False, ): """Run the picard pipeline""" LOG.info("Running the picard tools QC workflow") out_dir = configs["out"] picard_qc_pipe = picard_qc(configs=configs, out_dir=out_dir, sample_id=sample_id) jobid = slurm_api.run_job( name=f"picard_qc-{sample_id}", command=picard_qc_pipe, afterok=[afterok], dry_run=dry_run, ) return jobid
def summarize_workflow( configs: dict, afterok: list, slurm_api: SlurmAPI, dry_run: bool = False, batch_ref: bool = False, two_pass: bool = False, ) -> int: """Run the workflow to summarize an analysis""" LOG.info("Run the summarize workflow") out_dir = configs["out"] project_id = configs["project_id"] singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) wd = os.path.dirname(os.path.realpath(__file__)).replace( "fluffy/workflows", "fluffy/scripts") if not two_pass: multiqc_cmd = get_multiqc_cmd(singularity=singularity, input_dir=out_dir, out_dir=out_dir) if batch_ref: outfile = out_dir / f"{project_id}.2pass.csv" summarize_cmd = get_summarize_cmd( singularity=singularity, out_dir=out_dir, outfile=outfile, project_id=configs["project_id"], sample_sheet=configs["sample_sheet"], zscore=configs["summary"]["zscore"], mincnv=configs["summary"]["mincnv"], maxgcd=configs["summary"]["maxGCD"], maxatd=configs["summary"]["maxATD"], maxbin2bin=configs["summary"]["maxbin2bin"], maxdup=configs["summary"]["maxdup"], minreads=configs["summary"]["minreads"]) merge_cmd = get_merge_cmd(out_dir, configs["project_id"], wd) command_str = f"{multiqc_cmd}\n{summarize_cmd}\n{merge_cmd}" else: outfile = out_dir / f"{project_id}.csv" summarize_cmd = get_summarize_cmd( singularity=singularity, out_dir=out_dir, outfile=outfile, project_id=configs["project_id"], sample_sheet=configs["sample_sheet"], zscore=configs["summary"]["zscore"], mincnv=configs["summary"]["mincnv"], maxgcd=configs["summary"]["maxGCD"], maxatd=configs["summary"]["maxATD"], maxbin2bin=configs["summary"]["maxbin2bin"], maxdup=configs["summary"]["maxdup"], minreads=configs["summary"]["minreads"]) command_str = f"{multiqc_cmd}\n{summarize_cmd}" else: outfile = out_dir / f"{project_id}.1pass.csv" summarize_cmd = get_summarize_cmd( singularity=singularity, out_dir=out_dir, outfile=outfile, project_id=configs["project_id"], sample_sheet=configs["sample_sheet"], zscore=configs["summary"]["zscore"], mincnv=configs["summary"]["mincnv"], maxgcd=configs["summary"]["maxGCD"], maxatd=configs["summary"]["maxATD"], maxbin2bin=configs["summary"]["maxbin2bin"], maxdup=configs["summary"]["maxdup"], minreads=configs["summary"]["minreads"]) build_two_pass_ref = get_two_pass_ref_cmd( singularity, out_dir, configs["project_id"], wd, configs["wisecondorx"]["testbinsize"], configs["wisecondorx"]["prefacebinsize"]) command_str = f"{summarize_cmd}\n{build_two_pass_ref}" jobid = slurm_api.run_job( name=f"summarize_batch", command=command_str, afterok=afterok, dry_run=dry_run, ) return jobid
def base_command(): args, parser = base_arguments(sys.argv[1:]) if args.version: print("Fluffy-{}".format(__version__)) quit() coloredlogs.install("INFO") ctx = {} sample = args.sample out = pathlib.Path(args.out) LOG.info("Create outdir %s (if not exist)", out) out.mkdir(parents=True, exist_ok=True) config = pathlib.Path(args.config) configs = get_configs(config) configs["out"] = out configs["name"] = config.name configs["config_path"] = config ctx["configs"] = configs new_config = out / config.name project_dir = pathlib.Path(args.project) ctx["project"] = project_dir configs["project"] = project_dir sacct_dir = out / "sacct" sacct_dir.mkdir(parents=True, exist_ok=True) with open(sample, "r") as samplesheet: ctx["samples"] = list(read_samplesheet(samplesheet, project_dir)) ctx["sample_sheet"] = sample if args.slurm_params: for param in args.slurm_params: configs["slurm"][param.split(":")[0]] = param.split(":")[-1] ctx["slurm_api"] = SlurmAPI( slurm_settings=configs["slurm"], out_dir=out, ) if args.reference: reference(args, ctx, args.dry_run) elif args.rerun: parser.add_argument( "--batch-ref", help= "Build a wisecondorX refeference from the input batch (overrides refpreface and reftest)", required=False, action="store_true") parser.add_argument("--skip-preface", help="Skip preface fetal fraction estimation", required=False, action="store_true") args, unknown = parser.parse_known_args() rerun(args, ctx, args.skip_preface, args.dry_run) elif args.analyse: parser.add_argument( "--batch-ref", help= "Build a wisecondorX refeference from the input batch (overrides refpreface and reftest)", required=False, action="store_true") parser.add_argument("--skip-preface", help="Skip preface fetal fraction estimation", required=False, action="store_true") args, unknown = parser.parse_known_args() analyse(ctx, args.skip_preface, args.dry_run, args.batch_ref) else: parser.print_help()
def analyse_workflow( samples: Iterator[dict], configs: dict, slurm_api: SlurmAPI, skip_preface: bool = False, dry_run: bool = False, batch_ref: bool = True, ) -> int: """Run the wisecondor analysis""" jobids = [] sample_jobids = {} for sample in samples: sample_id = sample["sample_id"] sample_jobids[sample_id] = [] sample_outdir = configs["out"] / sample_id # This will fail if dir already exists if not dry_run: sample_outdir.mkdir(parents=True) slurm_api.slurm_settings["ntasks"] = configs["align"]["ntasks"] slurm_api.slurm_settings["mem"] = configs["align"]["mem"] align_jobid = align_individual( configs=configs, sample=sample, slurm_api=slurm_api, dry_run=dry_run, ) jobids.append(align_jobid) sample_jobids[sample_id].append(align_jobid) if batch_ref: binsize_test = configs["wisecondorx"]["testbinsize"] binsize_preface = configs["wisecondorx"]["prefacebinsize"] out_dir = configs["out"] configs["wisecondorx"][ "reftest"] = f"{str(out_dir).rstrip('/')}.wcxref.{binsize_test}.npz" configs["wisecondorx"][ "refpreface"] = f"{str(out_dir).rstrip('/')}.wcxref.{binsize_preface}.npz" singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) mkref_cmd = get_mkref_cmd( singularity=singularity, out=str(out_dir), testbinsize=configs["wisecondorx"]["testbinsize"], prefacebinsize=configs["wisecondorx"]["prefacebinsize"], ) make_ref_jobid = slurm_api.run_job( name="wcxmkref", command=mkref_cmd, afterok=jobids, dry_run=dry_run, ) for sample in samples: sample_id = sample["sample_id"] sample_jobids[sample_id].append(make_ref_jobid) first_pass_jobid, jobids, slurm_api = run_analysis( samples=samples, sample_jobids=sample_jobids, configs=configs, slurm_api=slurm_api, skip_preface=skip_preface, dry_run=dry_run, batch_ref=batch_ref, jobids=jobids, two_pass=True) for sample in samples: sample_id = sample["sample_id"] sample_jobids[sample_id].append(first_pass_jobid) summarize_jobid, jobids, slurm_api = run_analysis( samples=samples, sample_jobids=sample_jobids, configs=configs, slurm_api=slurm_api, skip_preface=skip_preface, dry_run=dry_run, batch_ref=batch_ref, jobids=jobids, two_pass=False) slurm_api.print_submitted_jobs() slurm_api.slurm_settings["time"] = "1:00:00" pipe_complete(configs=configs, afterok=summarize_jobid, slurm_api=slurm_api, dry_run=dry_run) pipe_fail(configs=configs, slurm_api=slurm_api, dry_run=dry_run, afternotok=summarize_jobid) return summarize_jobid
def run_analysis( samples: Iterator[dict], sample_jobids: dict, configs: dict, slurm_api: SlurmAPI, skip_preface: bool, dry_run: bool, batch_ref: bool, jobids: list, two_pass: bool, ): for sample in samples: sample_id = sample["sample_id"] sample_outdir = configs["out"] / sample_id slurm_api.slurm_settings["ntasks"] = configs["slurm"]["ntasks"] slurm_api.slurm_settings["mem"] = configs["slurm"]["mem"] align_jobid = sample_jobids[sample_id][-1] ffy_jobid = estimate_ffy( configs=configs, sample_id=sample_id, afterok=align_jobid, slurm_api=slurm_api, dry_run=dry_run, ) jobids.append(ffy_jobid) sample_jobids[sample_id].append(ffy_jobid) picard_jobid = picard_qc_workflow( configs=configs, sample_id=sample_id, afterok=align_jobid, slurm_api=slurm_api, dry_run=dry_run, ) jobids.append(picard_jobid) sample_jobids[sample_id].append(picard_jobid) wcx_test_jobid = wisecondor_xtest_workflow( configs=configs, sample_id=sample_id, afterok=align_jobid, slurm_api=slurm_api, dry_run=dry_run, ) jobids.append(wcx_test_jobid) sample_jobids[sample_id].append(wcx_test_jobid) if not skip_preface: preface_predict_jobid = preface_predict_workflow( configs=configs, sample_id=sample_id, afterok=wcx_test_jobid, slurm_api=slurm_api, dry_run=dry_run, ) jobids.append(preface_predict_jobid) sample_jobids[sample_id].append(preface_predict_jobid) if not two_pass: cleanup_jobid = cleanup_workflow(configs=configs, sample_outdir=sample_outdir, sample_id=sample_id, afterok=sample_jobids[sample_id], slurm_api=slurm_api, dry_run=dry_run) jobids.append(cleanup_jobid) summarize_jobid = summarize_workflow(configs=configs, afterok=jobids, slurm_api=slurm_api, dry_run=dry_run, batch_ref=batch_ref, two_pass=two_pass) return summarize_jobid, jobids, slurm_api