示例#1
0
def preface_predict_workflow(
    configs: dict,
    sample_id: str,
    afterok: int,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
):
    """Run the preface predict workflow"""
    LOG.info("Running the preface predict workflow")
    out_dir = configs["out"]

    singularity = singularity_base(configs["singularity"], configs["out"],
                                   configs["project"],
                                   configs["singularity_bind"])

    preface_predict_cmd = get_preface_predict_cmd(
        singularity=singularity,
        out_dir=out_dir,
        model_dir=configs["preface"]["model_dir"],
        sample_id=sample_id,
    )

    jobid = slurm_api.run_job(
        name=f"preface_predict-{sample_id}",
        command=preface_predict_cmd,
        afterok=[afterok],
        dry_run=dry_run,
    )

    return jobid
示例#2
0
def real_slurm_api_fixture(configs, out_dir):
    """Return a real slurm API"""
    _api = SlurmAPI(
        slurm_settings=configs["slurm"],
        out_dir=out_dir,
    )
    return _api
示例#3
0
def cleanup_workflow(
    configs: dict,
    sample_outdir: Path,
    sample_id: str,
    afterok: list,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
) -> int:
    """Run the workflow to compress  an analysis folder"""

    out_dir = configs["out"]
    cleanup_cmd = get_cleanup_cmd(
        out_dir=out_dir,
        sample_outdir=sample_outdir,
        sample_id=sample_id,
    )

    singularity = singularity_base(configs["singularity"], configs["out"],
                                   configs["project"],
                                   configs["singularity_bind"])

    multiqc_cmd = get_multiqc_cmd(singularity=singularity,
                                  input_dir=sample_outdir,
                                  out_dir=sample_outdir)

    jobid = slurm_api.run_job(
        name=f"cleanup-{sample_id}",
        command="\n".join([multiqc_cmd, cleanup_cmd]),
        afterok=afterok,
        dry_run=dry_run,
    )

    return jobid
示例#4
0
def align_individual(
    configs: dict,
    sample: dict,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
):
    """Align a individual with bwa on slurm"""
    out_dir = configs["out"]
    sample_id = sample["sample_id"]
    LOG.info("Aligning reads for %s", sample_id)
    single_end = sample["single_end"]
    fastq = sample["fastq"]

    if single_end:
        run_bwa = align_and_convert_single_end(config=configs,
                                               fastq=fastq[0],
                                               out=out_dir,
                                               sample_id=sample_id)
    else:
        run_bwa = align_and_convert_paired_end(config=configs,
                                               fastq=fastq,
                                               out=out_dir,
                                               sample_id=sample_id)

    align_jobid = slurm_api.run_job(name=f"bwaAln-{sample_id}",
                                    command=run_bwa,
                                    dry_run=dry_run)

    return align_jobid
def test_init():
    """Test to initialize a slurm api"""
    # GIVEN a acount, a time, an out dir
    account = "my_account"
    time = "5:00:00"
    out_dir = Path("tests/fixtures/output")
    # WHEN instantiating a slurm api
    api = SlurmAPI(slurm_settings={"account":account, "time":time}, out_dir=out_dir)
    # THEN assert it is setup correct
    assert api.account == account
    assert api.log_dir
    assert api.scripts_dir
示例#6
0
def make_reference(samples: Iterator[dict],
                   configs: dict,
                   slurm_api: SlurmAPI,
                   dry_run: bool = None) -> int:
    """Create a reference based on some samples"""
    out_dir = configs["out"]
    jobids = []
    for sample in samples:

        sample_id = sample["sample_id"]
        sample_outdir = configs["out"] / sample_id

        # This will fail if dir already exists
        if not dry_run:
            sample_outdir.mkdir(parents=True)

        slurm_api.slurm_settings["ntasks"] = configs["align"]["ntasks"]
        slurm_api.slurm_settings["mem"] = configs["align"]["mem"]

        align_jobid = align_individual(configs=configs,
                                       sample=sample,
                                       slurm_api=slurm_api,
                                       dry_run=dry_run)
        jobids.append(align_jobid)

        slurm_api.slurm_settings["ntasks"] = configs["slurm"]["ntasks"]
        slurm_api.slurm_settings["mem"] = configs["slurm"]["mem"]

    singularity = singularity_base(configs["singularity"], configs["out"],
                                   configs["project"],
                                   configs["singularity_bind"])

    mkref_cmd = get_mkref_cmd(
        singularity=singularity,
        out=str(out_dir),
        testbinsize=configs["wisecondorx"]["testbinsize"],
        prefacebinsize=configs["wisecondorx"]["prefacebinsize"],
    )

    jobid = slurm_api.run_job(
        name="wcxmkref",
        command=mkref_cmd,
        afterok=jobids,
        dry_run=dry_run,
    )

    slurm_api.slurm_settings["time"] = "1:00:00"
    pipe_complete(configs=configs,
                  afterok=jobid,
                  slurm_api=slurm_api,
                  dry_run=dry_run)
    pipe_fail(configs=configs,
              slurm_api=slurm_api,
              dry_run=dry_run,
              afternotok=jobid)

    return jobid
示例#7
0
def pipe_fail(
    configs: dict,
    slurm_api: SlurmAPI,
    afternotok: int,
    dry_run: bool = False,
) -> int:
    """Run Sed to update the analysis run flag"""
    out_dir = configs["out"]

    sed_replace_inplace_cmd = sed_replace_inplace(
        in_filename= str(out_dir / "analysis_status.json"),project_dir=str(out_dir),flag="FAIL", find_str=" \"running\"", replace_str=" \"fail\""
    )

    jobid = slurm_api.run_job(
        name="fluffy-fail",
        command=sed_replace_inplace_cmd,
        afternotok=[afternotok],
        dry_run=dry_run,
    )

    return jobid
示例#8
0
def wisecondor_xtest_workflow(
    configs: dict,
    sample_id: str,
    afterok: int,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
):
    """Run the wisecondor x test workflow"""
    out_dir = configs["out"]
    run_wcx_pipe = wisecondor_x_test(configs=configs,
                                     out_dir=out_dir,
                                     sample_id=sample_id)

    jobid = slurm_api.run_job(
        name=f"wcx-{sample_id}",
        command=run_wcx_pipe,
        afterok=[afterok],
        dry_run=dry_run,
    )

    return jobid
示例#9
0
def estimate_ffy(
    configs: dict,
    sample_id: str,
    afterok: int,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
) -> int:
    """Run the estimate fetal fraction with AMYCNE"""
    LOG.info("Running the estimate fetal fraction with AMYCNE workflow")
    out_dir = configs["out"]
    fetal_fraction_pipe = amycne_ffy(configs=configs,
                                     out_dir=out_dir,
                                     sample_id=sample_id)

    jobid = slurm_api.run_job(
        name=f"amycne-{sample_id}",
        command=fetal_fraction_pipe,
        afterok=[afterok],
        dry_run=dry_run,
    )

    return jobid
示例#10
0
def picard_qc_workflow(
    configs: dict,
    sample_id: str,
    afterok: int,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
):
    """Run the picard pipeline"""
    LOG.info("Running the picard tools QC workflow")
    out_dir = configs["out"]
    picard_qc_pipe = picard_qc(configs=configs,
                               out_dir=out_dir,
                               sample_id=sample_id)

    jobid = slurm_api.run_job(
        name=f"picard_qc-{sample_id}",
        command=picard_qc_pipe,
        afterok=[afterok],
        dry_run=dry_run,
    )

    return jobid
示例#11
0
def summarize_workflow(
    configs: dict,
    afterok: list,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
    batch_ref: bool = False,
    two_pass: bool = False,
) -> int:
    """Run the workflow to summarize an analysis"""
    LOG.info("Run the summarize workflow")
    out_dir = configs["out"]
    project_id = configs["project_id"]

    singularity = singularity_base(configs["singularity"], configs["out"],
                                   configs["project"],
                                   configs["singularity_bind"])

    wd = os.path.dirname(os.path.realpath(__file__)).replace(
        "fluffy/workflows", "fluffy/scripts")

    if not two_pass:
        multiqc_cmd = get_multiqc_cmd(singularity=singularity,
                                      input_dir=out_dir,
                                      out_dir=out_dir)
        if batch_ref:

            outfile = out_dir / f"{project_id}.2pass.csv"
            summarize_cmd = get_summarize_cmd(
                singularity=singularity,
                out_dir=out_dir,
                outfile=outfile,
                project_id=configs["project_id"],
                sample_sheet=configs["sample_sheet"],
                zscore=configs["summary"]["zscore"],
                mincnv=configs["summary"]["mincnv"],
                maxgcd=configs["summary"]["maxGCD"],
                maxatd=configs["summary"]["maxATD"],
                maxbin2bin=configs["summary"]["maxbin2bin"],
                maxdup=configs["summary"]["maxdup"],
                minreads=configs["summary"]["minreads"])

            merge_cmd = get_merge_cmd(out_dir, configs["project_id"], wd)
            command_str = f"{multiqc_cmd}\n{summarize_cmd}\n{merge_cmd}"

        else:
            outfile = out_dir / f"{project_id}.csv"
            summarize_cmd = get_summarize_cmd(
                singularity=singularity,
                out_dir=out_dir,
                outfile=outfile,
                project_id=configs["project_id"],
                sample_sheet=configs["sample_sheet"],
                zscore=configs["summary"]["zscore"],
                mincnv=configs["summary"]["mincnv"],
                maxgcd=configs["summary"]["maxGCD"],
                maxatd=configs["summary"]["maxATD"],
                maxbin2bin=configs["summary"]["maxbin2bin"],
                maxdup=configs["summary"]["maxdup"],
                minreads=configs["summary"]["minreads"])
            command_str = f"{multiqc_cmd}\n{summarize_cmd}"

    else:
        outfile = out_dir / f"{project_id}.1pass.csv"
        summarize_cmd = get_summarize_cmd(
            singularity=singularity,
            out_dir=out_dir,
            outfile=outfile,
            project_id=configs["project_id"],
            sample_sheet=configs["sample_sheet"],
            zscore=configs["summary"]["zscore"],
            mincnv=configs["summary"]["mincnv"],
            maxgcd=configs["summary"]["maxGCD"],
            maxatd=configs["summary"]["maxATD"],
            maxbin2bin=configs["summary"]["maxbin2bin"],
            maxdup=configs["summary"]["maxdup"],
            minreads=configs["summary"]["minreads"])

        build_two_pass_ref = get_two_pass_ref_cmd(
            singularity, out_dir, configs["project_id"], wd,
            configs["wisecondorx"]["testbinsize"],
            configs["wisecondorx"]["prefacebinsize"])
        command_str = f"{summarize_cmd}\n{build_two_pass_ref}"

    jobid = slurm_api.run_job(
        name=f"summarize_batch",
        command=command_str,
        afterok=afterok,
        dry_run=dry_run,
    )

    return jobid
示例#12
0
def base_command():
    args, parser = base_arguments(sys.argv[1:])

    if args.version:
        print("Fluffy-{}".format(__version__))
        quit()

    coloredlogs.install("INFO")
    ctx = {}

    sample = args.sample
    out = pathlib.Path(args.out)
    LOG.info("Create outdir %s (if not exist)", out)

    out.mkdir(parents=True, exist_ok=True)
    config = pathlib.Path(args.config)
    configs = get_configs(config)
    configs["out"] = out
    configs["name"] = config.name
    configs["config_path"] = config
    ctx["configs"] = configs

    new_config = out / config.name

    project_dir = pathlib.Path(args.project)
    ctx["project"] = project_dir
    configs["project"] = project_dir

    sacct_dir = out / "sacct"
    sacct_dir.mkdir(parents=True, exist_ok=True)

    with open(sample, "r") as samplesheet:
        ctx["samples"] = list(read_samplesheet(samplesheet, project_dir))

    ctx["sample_sheet"] = sample
    if args.slurm_params:
        for param in args.slurm_params:
            configs["slurm"][param.split(":")[0]] = param.split(":")[-1]

    ctx["slurm_api"] = SlurmAPI(
        slurm_settings=configs["slurm"],
        out_dir=out,
    )

    if args.reference:
        reference(args, ctx, args.dry_run)

    elif args.rerun:
        parser.add_argument(
            "--batch-ref",
            help=
            "Build a wisecondorX refeference from the input batch (overrides refpreface and reftest)",
            required=False,
            action="store_true")
        parser.add_argument("--skip-preface",
                            help="Skip preface fetal fraction estimation",
                            required=False,
                            action="store_true")
        args, unknown = parser.parse_known_args()

        rerun(args, ctx, args.skip_preface, args.dry_run)

    elif args.analyse:
        parser.add_argument(
            "--batch-ref",
            help=
            "Build a wisecondorX refeference from the input batch (overrides refpreface and reftest)",
            required=False,
            action="store_true")
        parser.add_argument("--skip-preface",
                            help="Skip preface fetal fraction estimation",
                            required=False,
                            action="store_true")
        args, unknown = parser.parse_known_args()

        analyse(ctx, args.skip_preface, args.dry_run, args.batch_ref)

    else:
        parser.print_help()
示例#13
0
def analyse_workflow(
    samples: Iterator[dict],
    configs: dict,
    slurm_api: SlurmAPI,
    skip_preface: bool = False,
    dry_run: bool = False,
    batch_ref: bool = True,
) -> int:
    """Run the wisecondor analysis"""
    jobids = []
    sample_jobids = {}

    for sample in samples:
        sample_id = sample["sample_id"]
        sample_jobids[sample_id] = []
        sample_outdir = configs["out"] / sample_id
        # This will fail if dir already exists
        if not dry_run:
            sample_outdir.mkdir(parents=True)

        slurm_api.slurm_settings["ntasks"] = configs["align"]["ntasks"]
        slurm_api.slurm_settings["mem"] = configs["align"]["mem"]

        align_jobid = align_individual(
            configs=configs,
            sample=sample,
            slurm_api=slurm_api,
            dry_run=dry_run,
        )
        jobids.append(align_jobid)
        sample_jobids[sample_id].append(align_jobid)

    if batch_ref:
        binsize_test = configs["wisecondorx"]["testbinsize"]
        binsize_preface = configs["wisecondorx"]["prefacebinsize"]
        out_dir = configs["out"]

        configs["wisecondorx"][
            "reftest"] = f"{str(out_dir).rstrip('/')}.wcxref.{binsize_test}.npz"
        configs["wisecondorx"][
            "refpreface"] = f"{str(out_dir).rstrip('/')}.wcxref.{binsize_preface}.npz"

        singularity = singularity_base(configs["singularity"], configs["out"],
                                       configs["project"],
                                       configs["singularity_bind"])

        mkref_cmd = get_mkref_cmd(
            singularity=singularity,
            out=str(out_dir),
            testbinsize=configs["wisecondorx"]["testbinsize"],
            prefacebinsize=configs["wisecondorx"]["prefacebinsize"],
        )

        make_ref_jobid = slurm_api.run_job(
            name="wcxmkref",
            command=mkref_cmd,
            afterok=jobids,
            dry_run=dry_run,
        )

        for sample in samples:
            sample_id = sample["sample_id"]
            sample_jobids[sample_id].append(make_ref_jobid)

        first_pass_jobid, jobids, slurm_api = run_analysis(
            samples=samples,
            sample_jobids=sample_jobids,
            configs=configs,
            slurm_api=slurm_api,
            skip_preface=skip_preface,
            dry_run=dry_run,
            batch_ref=batch_ref,
            jobids=jobids,
            two_pass=True)

        for sample in samples:
            sample_id = sample["sample_id"]
            sample_jobids[sample_id].append(first_pass_jobid)

    summarize_jobid, jobids, slurm_api = run_analysis(
        samples=samples,
        sample_jobids=sample_jobids,
        configs=configs,
        slurm_api=slurm_api,
        skip_preface=skip_preface,
        dry_run=dry_run,
        batch_ref=batch_ref,
        jobids=jobids,
        two_pass=False)

    slurm_api.print_submitted_jobs()
    slurm_api.slurm_settings["time"] = "1:00:00"
    pipe_complete(configs=configs,
                  afterok=summarize_jobid,
                  slurm_api=slurm_api,
                  dry_run=dry_run)

    pipe_fail(configs=configs,
              slurm_api=slurm_api,
              dry_run=dry_run,
              afternotok=summarize_jobid)

    return summarize_jobid
示例#14
0
def run_analysis(
    samples: Iterator[dict],
    sample_jobids: dict,
    configs: dict,
    slurm_api: SlurmAPI,
    skip_preface: bool,
    dry_run: bool,
    batch_ref: bool,
    jobids: list,
    two_pass: bool,
):

    for sample in samples:
        sample_id = sample["sample_id"]
        sample_outdir = configs["out"] / sample_id

        slurm_api.slurm_settings["ntasks"] = configs["slurm"]["ntasks"]
        slurm_api.slurm_settings["mem"] = configs["slurm"]["mem"]

        align_jobid = sample_jobids[sample_id][-1]
        ffy_jobid = estimate_ffy(
            configs=configs,
            sample_id=sample_id,
            afterok=align_jobid,
            slurm_api=slurm_api,
            dry_run=dry_run,
        )
        jobids.append(ffy_jobid)
        sample_jobids[sample_id].append(ffy_jobid)

        picard_jobid = picard_qc_workflow(
            configs=configs,
            sample_id=sample_id,
            afterok=align_jobid,
            slurm_api=slurm_api,
            dry_run=dry_run,
        )
        jobids.append(picard_jobid)
        sample_jobids[sample_id].append(picard_jobid)

        wcx_test_jobid = wisecondor_xtest_workflow(
            configs=configs,
            sample_id=sample_id,
            afterok=align_jobid,
            slurm_api=slurm_api,
            dry_run=dry_run,
        )

        jobids.append(wcx_test_jobid)
        sample_jobids[sample_id].append(wcx_test_jobid)

        if not skip_preface:
            preface_predict_jobid = preface_predict_workflow(
                configs=configs,
                sample_id=sample_id,
                afterok=wcx_test_jobid,
                slurm_api=slurm_api,
                dry_run=dry_run,
            )
            jobids.append(preface_predict_jobid)
            sample_jobids[sample_id].append(preface_predict_jobid)

        if not two_pass:
            cleanup_jobid = cleanup_workflow(configs=configs,
                                             sample_outdir=sample_outdir,
                                             sample_id=sample_id,
                                             afterok=sample_jobids[sample_id],
                                             slurm_api=slurm_api,
                                             dry_run=dry_run)
            jobids.append(cleanup_jobid)

    summarize_jobid = summarize_workflow(configs=configs,
                                         afterok=jobids,
                                         slurm_api=slurm_api,
                                         dry_run=dry_run,
                                         batch_ref=batch_ref,
                                         two_pass=two_pass)
    return summarize_jobid, jobids, slurm_api