def cleanup_workflow( configs: dict, sample_outdir: Path, sample_id: str, afterok: list, slurm_api: SlurmAPI, dry_run: bool = False, ) -> int: """Run the workflow to compress an analysis folder""" out_dir = configs["out"] cleanup_cmd = get_cleanup_cmd( out_dir=out_dir, sample_outdir=sample_outdir, sample_id=sample_id, ) singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) multiqc_cmd = get_multiqc_cmd(singularity=singularity, input_dir=sample_outdir, out_dir=sample_outdir) jobid = slurm_api.run_job( name=f"cleanup-{sample_id}", command="\n".join([multiqc_cmd, cleanup_cmd]), afterok=afterok, dry_run=dry_run, ) return jobid
def preface_predict_workflow( configs: dict, sample_id: str, afterok: int, slurm_api: SlurmAPI, dry_run: bool = False, ): """Run the preface predict workflow""" LOG.info("Running the preface predict workflow") out_dir = configs["out"] singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) preface_predict_cmd = get_preface_predict_cmd( singularity=singularity, out_dir=out_dir, model_dir=configs["preface"]["model_dir"], sample_id=sample_id, ) jobid = slurm_api.run_job( name=f"preface_predict-{sample_id}", command=preface_predict_cmd, afterok=[afterok], dry_run=dry_run, ) return jobid
def make_reference(samples: Iterator[dict], configs: dict, slurm_api: SlurmAPI, dry_run: bool = None) -> int: """Create a reference based on some samples""" out_dir = configs["out"] jobids = [] for sample in samples: sample_id = sample["sample_id"] sample_outdir = configs["out"] / sample_id # This will fail if dir already exists if not dry_run: sample_outdir.mkdir(parents=True) slurm_api.slurm_settings["ntasks"] = configs["align"]["ntasks"] slurm_api.slurm_settings["mem"] = configs["align"]["mem"] align_jobid = align_individual(configs=configs, sample=sample, slurm_api=slurm_api, dry_run=dry_run) jobids.append(align_jobid) slurm_api.slurm_settings["ntasks"] = configs["slurm"]["ntasks"] slurm_api.slurm_settings["mem"] = configs["slurm"]["mem"] singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) mkref_cmd = get_mkref_cmd( singularity=singularity, out=str(out_dir), testbinsize=configs["wisecondorx"]["testbinsize"], prefacebinsize=configs["wisecondorx"]["prefacebinsize"], ) jobid = slurm_api.run_job( name="wcxmkref", command=mkref_cmd, afterok=jobids, dry_run=dry_run, ) slurm_api.slurm_settings["time"] = "1:00:00" pipe_complete(configs=configs, afterok=jobid, slurm_api=slurm_api, dry_run=dry_run) pipe_fail(configs=configs, slurm_api=slurm_api, dry_run=dry_run, afternotok=jobid) return jobid
def align_and_convert_paired_end(config: dict, fastq: list, out: Path, sample_id: str) -> str: """create a command for running bwa and wisecondorX convert (paired end)""" singularity = singularity_base( config["singularity"], config["out"], config["project"], config["singularity_bind"], ) out_prefix = get_outprefix(out, sample_id) aln_r1 = get_align_command( singularity=singularity, sample_id=sample_id, tmp_dir=config["align"]["tmpdir"], threads=config["align"]["ntasks"], reference=config["reference"], fastq=fastq[0], out_dir=str(out), out_prefix=out_prefix, read="r1", ) aln_r2 = get_align_command( singularity=singularity, sample_id=sample_id, tmp_dir=config["align"]["tmpdir"], reference=config["reference"], threads=config["align"]["ntasks"], fastq=fastq[1], out_dir=str(out), out_prefix=out_prefix, read="r2", ) sampe_cmd = get_sampe_command( singularity=singularity, reference=config["reference"], threads=config["align"]["ntasks"], fastq1=fastq[0], fastq2=fastq[1], out_prefix=out_prefix, ) bamsormadup_cmd = get_bamsormadup_command( singularity=singularity, tmp_dir=config["align"]["tmpdir"], out_prefix=out_prefix, ) sampe = " \n ".join([sampe_cmd, bamsormadup_cmd]) convert = get_convert_cmd(singularity=singularity, out_prefix=out_prefix) return "\n".join([aln_r1, aln_r2, sampe, convert])
def wisecondor_x_test(configs: dict, out_dir: Path, sample_id: str) -> str: """Get the commands for running the wisecondor chromosome X test""" out_prefix = out_dir / sample_id / sample_id singularity = singularity_base( configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"], ) blacklist = configs["wisecondorx"]["blacklist"] zscore = str(configs["wisecondorx"]["zscore"]) wisecondor_test_cmd = get_predict_cmd( singularity=singularity, out_prefix=out_prefix, reference=configs["wisecondorx"]["reftest"], blacklist=blacklist, zscore=zscore, ) wisecondor_preface_cmd = get_predict_cmd( singularity=singularity, out_prefix=out_prefix, reference=configs["wisecondorx"]["refpreface"], blacklist=blacklist, zscore=zscore, preface=True, ) wisecondor_gender_cmd = get_gender_cmd( singularity=singularity, out_prefix=out_prefix, reference=configs["wisecondorx"]["reftest"], ) return "\n".join( [wisecondor_test_cmd, wisecondor_preface_cmd, wisecondor_gender_cmd])
def picard_qc(configs: dict, out_dir: Path, sample_id: str) -> str: """Get a string with pipeline steps to run picard qc""" out_prefix = out_dir / sample_id / sample_id reference = configs["reference"] javasettings = configs["picard"]["javasettings"] singularity = singularity_base( configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"], ) gc_bias_cmd = get_collect_gc_bias_cmd( singularity=singularity, out_prefix=str(out_prefix), reference=reference, javasettings=javasettings, tmp_dir=str(configs["align"]["tmpdir"]), ) insert_size_cmd = get_collect_insert_size_cmd( singularity=singularity, out_prefix=str(out_prefix), javasettings=javasettings, tmp_dir=str(configs["align"]["tmpdir"]), ) estimate_complexity_cmd = get_estimate_complexity_cmd( singularity=singularity, out_prefix=str(out_prefix), javasettings=javasettings, tmp_dir=str(configs["align"]["tmpdir"]), ) return "\n".join([gc_bias_cmd, insert_size_cmd, estimate_complexity_cmd])
def amycne_ffy(configs: dict, out_dir: Path, sample_id: str) -> str: """fetal fraction estimation using tiddit and AMYCNE""" out_prefix = out_dir / sample_id / sample_id path_gc_tab = out_dir / sample_id / ".".join([sample_id, "gc.tab"]) singularity = singularity_base( configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"], ) # Calculate coverage bins with tiddit tiddit_cmd = get_tiddit_cmd( singularity=singularity, out_prefix=str(out_prefix), binsize=configs["tiddit"]["binsize"], ) # Calculate bins with GC and quality filtering gc_tab_cmd = get_gctab_cmd( singularity=singularity, reference=configs["reference"], binsize=configs["tiddit"]["binsize"], path_gc_tab=str(path_gc_tab), ) amycne_cmd = run_amycne_cmd( singularity=singularity, out_prefix=str(out_prefix), path_gc_tab=str(path_gc_tab), minq=configs["amycne"]["minq"], slope=configs["amycne"]["coefficient"], intercept=configs["amycne"]["intercept"], ) return "\n".join([tiddit_cmd, gc_tab_cmd, amycne_cmd])
def summarize_workflow( configs: dict, afterok: list, slurm_api: SlurmAPI, dry_run: bool = False, batch_ref: bool = False, two_pass: bool = False, ) -> int: """Run the workflow to summarize an analysis""" LOG.info("Run the summarize workflow") out_dir = configs["out"] project_id = configs["project_id"] singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) wd = os.path.dirname(os.path.realpath(__file__)).replace( "fluffy/workflows", "fluffy/scripts") if not two_pass: multiqc_cmd = get_multiqc_cmd(singularity=singularity, input_dir=out_dir, out_dir=out_dir) if batch_ref: outfile = out_dir / f"{project_id}.2pass.csv" summarize_cmd = get_summarize_cmd( singularity=singularity, out_dir=out_dir, outfile=outfile, project_id=configs["project_id"], sample_sheet=configs["sample_sheet"], zscore=configs["summary"]["zscore"], mincnv=configs["summary"]["mincnv"], maxgcd=configs["summary"]["maxGCD"], maxatd=configs["summary"]["maxATD"], maxbin2bin=configs["summary"]["maxbin2bin"], maxdup=configs["summary"]["maxdup"], minreads=configs["summary"]["minreads"]) merge_cmd = get_merge_cmd(out_dir, configs["project_id"], wd) command_str = f"{multiqc_cmd}\n{summarize_cmd}\n{merge_cmd}" else: outfile = out_dir / f"{project_id}.csv" summarize_cmd = get_summarize_cmd( singularity=singularity, out_dir=out_dir, outfile=outfile, project_id=configs["project_id"], sample_sheet=configs["sample_sheet"], zscore=configs["summary"]["zscore"], mincnv=configs["summary"]["mincnv"], maxgcd=configs["summary"]["maxGCD"], maxatd=configs["summary"]["maxATD"], maxbin2bin=configs["summary"]["maxbin2bin"], maxdup=configs["summary"]["maxdup"], minreads=configs["summary"]["minreads"]) command_str = f"{multiqc_cmd}\n{summarize_cmd}" else: outfile = out_dir / f"{project_id}.1pass.csv" summarize_cmd = get_summarize_cmd( singularity=singularity, out_dir=out_dir, outfile=outfile, project_id=configs["project_id"], sample_sheet=configs["sample_sheet"], zscore=configs["summary"]["zscore"], mincnv=configs["summary"]["mincnv"], maxgcd=configs["summary"]["maxGCD"], maxatd=configs["summary"]["maxATD"], maxbin2bin=configs["summary"]["maxbin2bin"], maxdup=configs["summary"]["maxdup"], minreads=configs["summary"]["minreads"]) build_two_pass_ref = get_two_pass_ref_cmd( singularity, out_dir, configs["project_id"], wd, configs["wisecondorx"]["testbinsize"], configs["wisecondorx"]["prefacebinsize"]) command_str = f"{summarize_cmd}\n{build_two_pass_ref}" jobid = slurm_api.run_job( name=f"summarize_batch", command=command_str, afterok=afterok, dry_run=dry_run, ) return jobid
def analyse_workflow( samples: Iterator[dict], configs: dict, slurm_api: SlurmAPI, skip_preface: bool = False, dry_run: bool = False, batch_ref: bool = True, ) -> int: """Run the wisecondor analysis""" jobids = [] sample_jobids = {} for sample in samples: sample_id = sample["sample_id"] sample_jobids[sample_id] = [] sample_outdir = configs["out"] / sample_id # This will fail if dir already exists if not dry_run: sample_outdir.mkdir(parents=True) slurm_api.slurm_settings["ntasks"] = configs["align"]["ntasks"] slurm_api.slurm_settings["mem"] = configs["align"]["mem"] align_jobid = align_individual( configs=configs, sample=sample, slurm_api=slurm_api, dry_run=dry_run, ) jobids.append(align_jobid) sample_jobids[sample_id].append(align_jobid) if batch_ref: binsize_test = configs["wisecondorx"]["testbinsize"] binsize_preface = configs["wisecondorx"]["prefacebinsize"] out_dir = configs["out"] configs["wisecondorx"][ "reftest"] = f"{str(out_dir).rstrip('/')}.wcxref.{binsize_test}.npz" configs["wisecondorx"][ "refpreface"] = f"{str(out_dir).rstrip('/')}.wcxref.{binsize_preface}.npz" singularity = singularity_base(configs["singularity"], configs["out"], configs["project"], configs["singularity_bind"]) mkref_cmd = get_mkref_cmd( singularity=singularity, out=str(out_dir), testbinsize=configs["wisecondorx"]["testbinsize"], prefacebinsize=configs["wisecondorx"]["prefacebinsize"], ) make_ref_jobid = slurm_api.run_job( name="wcxmkref", command=mkref_cmd, afterok=jobids, dry_run=dry_run, ) for sample in samples: sample_id = sample["sample_id"] sample_jobids[sample_id].append(make_ref_jobid) first_pass_jobid, jobids, slurm_api = run_analysis( samples=samples, sample_jobids=sample_jobids, configs=configs, slurm_api=slurm_api, skip_preface=skip_preface, dry_run=dry_run, batch_ref=batch_ref, jobids=jobids, two_pass=True) for sample in samples: sample_id = sample["sample_id"] sample_jobids[sample_id].append(first_pass_jobid) summarize_jobid, jobids, slurm_api = run_analysis( samples=samples, sample_jobids=sample_jobids, configs=configs, slurm_api=slurm_api, skip_preface=skip_preface, dry_run=dry_run, batch_ref=batch_ref, jobids=jobids, two_pass=False) slurm_api.print_submitted_jobs() slurm_api.slurm_settings["time"] = "1:00:00" pipe_complete(configs=configs, afterok=summarize_jobid, slurm_api=slurm_api, dry_run=dry_run) pipe_fail(configs=configs, slurm_api=slurm_api, dry_run=dry_run, afternotok=summarize_jobid) return summarize_jobid