Exemplo n.º 1
0
def _get_files_project(sample, upload_config):
    """Retrieve output files associated with an entire analysis project.
    """
    out = [{"path": sample["provenance"]["programs"]}]
    for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]:
        if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)):
            out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname),
                        "type": "external_command_log",
                        "ext": ""})

    if "summary" in sample and sample["summary"].get("project"):
        out.append({"path": sample["summary"]["project"]})
    mixup_check = tz.get_in(["summary", "mixup_check"], sample)
    if mixup_check:
        out.append({"path": sample["summary"]["mixup_check"],
                    "type": "directory", "ext": "mixup_check"})

    for x in sample.get("variants", []):
        if "pop_db" in x:
            out.append({"path": x["pop_db"],
                        "type": "sqlite",
                        "variantcaller": x["variantcaller"]})
    for x in sample.get("variants", []):
        if "population" in x:
            pop_db = tz.get_in(["population", "db"], x)
            if pop_db:
                out.append({"path": pop_db,
                            "type": "sqlite",
                            "variantcaller": x["variantcaller"]})
            out.extend(_get_variant_file(x, ("population", "vcf")))
    for x in sample.get("variants", []):
        if x.get("validate") and x["validate"].get("grading_summary"):
            out.append({"path": x["validate"]["grading_summary"]})
            break
    if "coverage" in sample:
        cov_db = tz.get_in(["coverage", "summary"], sample)
        if cov_db:
            out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"})

    if "combined_counts" in sample:
        out.append({"path": sample["combined_counts"]})
    if "annotated_combined_counts" in sample:
        out.append({"path": sample["annotated_combined_counts"]})
    if "combined_fpkm" in sample:
        out.append({"path": sample["combined_fpkm"]})
    if "combined_fpkm_isoform" in sample:
        out.append({"path": sample["combined_fpkm_isoform"]})
    if "assembled_gtf" in sample:
        out.append({"path": sample["assembled_gtf"]})
    if dd.get_dexseq_counts(sample):
        out.append({"path": dd.get_dexseq_counts(sample)})
    if dd.get_express_counts(sample):
        out.append({"path": dd.get_express_counts(sample)})
    if dd.get_express_fpkm(sample):
        out.append({"path": dd.get_express_fpkm(sample)})
    if dd.get_express_tpm(sample):
        out.append({"path": dd.get_express_tpm(sample)})

    return _add_meta(out, config=upload_config)
Exemplo n.º 2
0
def _get_files_project(sample, upload_config):
    """Retrieve output files associated with an entire analysis project.
    """
    out = [{"path": sample["provenance"]["programs"]}]
    for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]:
        if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)):
            out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname),
                        "type": "external_command_log",
                        "ext": ""})

    if "summary" in sample and sample["summary"].get("project"):
        out.append({"path": sample["summary"]["project"]})
    mixup_check = tz.get_in(["summary", "mixup_check"], sample)
    if mixup_check:
        out.append({"path": sample["summary"]["mixup_check"],
                    "type": "directory", "ext": "mixup_check"})

    for x in sample.get("variants", []):
        if "pop_db" in x:
            out.append({"path": x["pop_db"],
                        "type": "sqlite",
                        "variantcaller": x["variantcaller"]})
    for x in sample.get("variants", []):
        if "population" in x:
            pop_db = tz.get_in(["population", "db"], x)
            if pop_db:
                out.append({"path": pop_db,
                            "type": "sqlite",
                            "variantcaller": x["variantcaller"]})
            out.extend(_get_variant_file(x, ("population", "vcf")))
    for x in sample.get("variants", []):
        if x.get("validate") and x["validate"].get("grading_summary"):
            out.append({"path": x["validate"]["grading_summary"]})
            break

    if "combined_counts" in sample:
        out.append({"path": sample["combined_counts"]})
    if "annotated_combined_counts" in sample:
        out.append({"path": sample["annotated_combined_counts"]})
    if "combined_fpkm" in sample:
        out.append({"path": sample["combined_fpkm"]})
    if "combined_fpkm_isoform" in sample:
        out.append({"path": sample["combined_fpkm_isoform"]})
    if "assembled_gtf" in sample:
        out.append({"path": sample["assembled_gtf"]})
    if dd.get_dexseq_counts(sample):
        out.append({"path": dd.get_dexseq_counts(sample)})

    return _add_meta(out, config=upload_config)
Exemplo n.º 3
0
def create(parallel, dirs, config):
    """Create a cluster based on the provided parallel arguments.

    Returns an IPython view on the cluster, enabling processing on jobs.

    Adds a mincores specification if he have machines with a larger
    number of cores to allow jobs to be batched together for shared
    memory usage.
    """
    profile_dir = utils.safe_makedir(os.path.join(dirs["work"], get_log_dir(config), "ipython"))
    has_mincores = any(x.startswith("mincores=") for x in parallel["resources"])
    cores = min(_get_common_cores(config["resources"]), parallel["system_cores"])
    if cores > 1 and not has_mincores:
        adj_cores = max(1, int(math.floor(cores * float(parallel.get("mem_pct", 1.0)))))
        # if we have less scheduled cores than per machine, use the scheduled count
        if cores > parallel["cores"]:
            cores = parallel["cores"]
        # if we have less total cores required for the entire process, use that
        elif adj_cores > parallel["num_jobs"] * parallel["cores_per_job"]:
            cores = parallel["num_jobs"] * parallel["cores_per_job"]
        else:
            cores = adj_cores
            cores = per_machine_target_cores(cores, parallel["num_jobs"] // cores)
        parallel["resources"].append("mincores=%s" % cores)
    return ipython_cluster.cluster_view(parallel["scheduler"].lower(), parallel["queue"],
                                        parallel["num_jobs"], parallel["cores_per_job"],
                                        profile=profile_dir, start_wait=parallel["timeout"],
                                        extra_params={"resources": parallel["resources"],
                                                      "mem": parallel["mem"],
                                                      "tag": parallel.get("tag"),
                                                      "run_local": parallel.get("run_local")},
                                        retries=parallel.get("retries"))
Exemplo n.º 4
0
def create(parallel, dirs, config):
    """Create a cluster based on the provided parallel arguments.

    Returns an IPython view on the cluster, enabling processing on jobs.

    Adds a mincores specification if he have machines with a larger
    number of cores to allow jobs to be batched together for shared
    memory usage.
    """
    profile_dir = utils.safe_makedir(
        os.path.join(dirs["work"], get_log_dir(config), "ipython"))
    has_mincores = any(
        x.startswith("mincores=") for x in parallel["resources"])
    if parallel["system_cores"] > 3 and not has_mincores:
        cores = parallel["system_cores"]
        # if we have larger number of cores, leave room for standard batch script and controller
        if parallel["system_cores"] > 30:
            cores = cores - 2
        elif parallel["system_cores"] > 15:
            cores = cores - 1
        parallel["resources"].append("mincores=%s" % cores)
    return ipython_cluster.cluster_view(parallel["scheduler"].lower(),
                                        parallel["queue"],
                                        parallel["num_jobs"],
                                        parallel["cores_per_job"],
                                        profile=profile_dir,
                                        start_wait=parallel["timeout"],
                                        extra_params={
                                            "resources": parallel["resources"],
                                            "mem": parallel["mem"],
                                            "tag": parallel.get("tag"),
                                            "run_local":
                                            parallel.get("run_local")
                                        },
                                        retries=parallel.get("retries"))
Exemplo n.º 5
0
def create(parallel, dirs, config):
    """Create a cluster based on the provided parallel arguments.

    Returns an IPython view on the cluster, enabling processing on jobs.

    Adds a mincores specification if he have machines with a larger
    number of cores to allow jobs to be batched together for shared
    memory usage.
    """
    profile_dir = utils.safe_makedir(os.path.join(dirs["work"], get_log_dir(config), "ipython"))
    has_mincores = any(x.startswith("mincores=") for x in parallel["resources"])
    if parallel["system_cores"] > 3 and not has_mincores:
        cores = parallel["system_cores"]
        # if we have larger number of cores, leave room for standard batch script and controller
        if parallel["system_cores"] > 30:
            cores = cores - 2
        elif parallel["system_cores"] > 15:
            cores = cores - 1
        parallel["resources"].append("mincores=%s" % cores)
    return ipython_cluster.cluster_view(parallel["scheduler"].lower(), parallel["queue"],
                                        parallel["num_jobs"], parallel["cores_per_job"],
                                        profile=profile_dir, start_wait=parallel["timeout"],
                                        extra_params={"resources": parallel["resources"],
                                                      "mem": parallel["mem"],
                                                      "tag": parallel.get("tag"),
                                                      "run_local": parallel.get("run_local")},
                                        retries=parallel.get("retries"))
Exemplo n.º 6
0
def _view_from_parallel(parallel, work_dir, config):
    """Translate parallel map into options for a cluster view.
    """
    profile_dir = utils.safe_makedir(os.path.join(work_dir, get_log_dir(config), "ipython"))
    return ipython_cluster.cluster_view(parallel["scheduler"].lower(), parallel["queue"],
                                        parallel["num_jobs"], parallel["cores_per_job"],
                                        profile=profile_dir, start_wait=parallel["timeout"],
                                        extra_params={"resources": parallel["resources"]},
                                        retries=parallel.get("retries"))
Exemplo n.º 7
0
def _get_files_project(sample, upload_config):
    """Retrieve output files associated with an entire analysis project.
    """
    out = [{"path": sample["provenance"]["programs"]}]
    for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]:
        if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)):
            out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname),
                        "type": "external_command_log",
                        "ext": ""})

    if "summary" in sample and sample["summary"].get("project"):
        out.append({"path": sample["summary"]["project"]})

    for x in sample.get("variants", []):
        if "pop_db" in x:
            out.append({"path": x["pop_db"],
                        "type": "sqlite",
                        "variantcaller": x["variantcaller"]})
    for x in sample.get("variants", []):
        if "population" in x:
            pop_db = x["population"].get("db")
            if pop_db:
                out.append({"path": pop_db,
                            "type": "sqlite",
                            "variantcaller": x["variantcaller"]})
            out.extend(_get_variant_file(x, ("population", "vcf")))
    for x in sample.get("variants", []):
        if x.get("validate") and x["validate"].get("grading_summary"):
            out.append({"path": x["validate"]["grading_summary"]})
            break

    if "combined_counts" in sample:
        out.append({"path": sample["combined_counts"]})
    if "annotated_combined_counts" in sample:
        out.append({"path": sample["annotated_combined_counts"]})
    if "combined_fpkm" in sample:
        out.append({"path": sample["combined_fpkm"]})

    return _add_meta(out, config=upload_config)
Exemplo n.º 8
0
def create(parallel, dirs, config):
    """Create a cluster based on the provided parallel arguments.

    Returns an IPython view on the cluster, enabling processing on jobs.
    """
    profile_dir = utils.safe_makedir(os.path.join(dirs["work"], get_log_dir(config), "ipython"))
    return ipython_cluster.cluster_view(parallel["scheduler"].lower(), parallel["queue"],
                                        parallel["num_jobs"], parallel["cores_per_job"],
                                        profile=profile_dir, start_wait=parallel["timeout"],
                                        extra_params={"resources": parallel["resources"],
                                                      "mem": parallel["mem"],
                                                      "tag": parallel.get("tag"),
                                                      "run_local": parallel.get("run_local")},
                                        retries=parallel.get("retries"))
Exemplo n.º 9
0
def _view_from_parallel(parallel, work_dir, config):
    """Translate parallel map into options for a cluster view.
    """
    profile_dir = utils.safe_makedir(
        os.path.join(work_dir, get_log_dir(config), "ipython"))
    return ipython_cluster.cluster_view(
        parallel["scheduler"].lower(),
        parallel["queue"],
        parallel["num_jobs"],
        parallel["cores_per_job"],
        profile=profile_dir,
        start_wait=parallel["timeout"],
        extra_params={"resources": parallel["resources"]},
        retries=parallel.get("retries"))
Exemplo n.º 10
0
def create(parallel, dirs, config):
    """Create a cluster based on the provided parallel arguments.

    Returns an IPython view on the cluster, enabling processing on jobs.

    Adds a mincores specification if he have machines with a larger
    number of cores to allow jobs to be batched together for shared
    memory usage.
    """
    profile_dir = utils.safe_makedir(
        os.path.join(dirs["work"], get_log_dir(config), "ipython"))
    has_mincores = any(
        x.startswith("mincores=") for x in parallel["resources"])
    cores = min(_get_common_cores(config["resources"]),
                parallel["system_cores"])
    if cores > 1 and not has_mincores:
        adj_cores = max(
            1, int(math.floor(cores * float(parallel.get("mem_pct", 1.0)))))
        # if we have less scheduled cores than per machine, use the scheduled count
        if cores > parallel["cores"]:
            cores = parallel["cores"]
        # if we have less total cores required for the entire process, use that
        elif adj_cores > parallel["num_jobs"] * parallel["cores_per_job"]:
            cores = parallel["num_jobs"] * parallel["cores_per_job"]
        else:
            cores = adj_cores
            cores = per_machine_target_cores(cores,
                                             parallel["num_jobs"] // cores)
        parallel["resources"].append("mincores=%s" % cores)
    return ipython_cluster.cluster_view(parallel["scheduler"].lower(),
                                        parallel["queue"],
                                        parallel["num_jobs"],
                                        parallel["cores_per_job"],
                                        profile=profile_dir,
                                        start_wait=parallel["timeout"],
                                        extra_params={
                                            "resources":
                                            parallel["resources"],
                                            "mem":
                                            parallel["mem"],
                                            "tag":
                                            parallel.get("tag"),
                                            "run_local":
                                            parallel.get("run_local"),
                                            "local_controller":
                                            parallel.get("local_controller")
                                        },
                                        retries=parallel.get("retries"))
Exemplo n.º 11
0
def _get_files_project(sample, upload_config):
    """Retrieve output files associated with an entire analysis project.
    """
    out = [{"path": sample["provenance"]["programs"]}]
    if os.path.exists(tz.get_in(["provenance", "data"], sample) or ""):
        out.append({"path": sample["provenance"]["data"]})
    for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]:
        if os.path.exists(
                os.path.join(log.get_log_dir(sample["config"]), fname)):
            out.append({
                "path":
                os.path.join(log.get_log_dir(sample["config"]), fname),
                "type":
                "external_command_log",
                "ext":
                ""
            })

    if "summary" in sample and sample["summary"].get("project"):
        out.append({"path": sample["summary"]["project"]})
    if "summary" in sample and sample["summary"].get("metadata"):
        out.append({"path": sample["summary"]["metadata"]})
    mixup_check = tz.get_in(["summary", "mixup_check"], sample)
    if mixup_check:
        out.append({
            "path": sample["summary"]["mixup_check"],
            "type": "directory",
            "ext": "mixup_check"
        })

    report = os.path.join(dd.get_work_dir(sample), "report")
    if utils.file_exists(report):
        out.append({"path": report, "type": "directory", "ext": "report"})

    multiqc = tz.get_in(["summary", "multiqc"], sample)
    if multiqc:
        out.extend(_flatten_file_with_secondary(multiqc, "multiqc"))

    if sample.get("seqcluster", {}):
        out.append({
            "path": sample["seqcluster"].get("out_dir"),
            "type": "directory",
            "ext": "seqcluster"
        })

    if sample.get("mirge", {}):
        for fn in sample["mirge"]:
            out.append({"path": fn, "dir": "mirge"})

    if sample.get("report", None):
        out.append({
            "path": os.path.dirname(sample["report"]),
            "type": "directory",
            "ext": "seqclusterViz"
        })

    for x in sample.get("variants", []):
        if "pop_db" in x:
            out.append({
                "path": x["pop_db"],
                "type": "sqlite",
                "variantcaller": x["variantcaller"]
            })
    for x in sample.get("variants", []):
        if "population" in x:
            pop_db = tz.get_in(["population", "db"], x)
            if pop_db:
                out.append({
                    "path": pop_db,
                    "type": "sqlite",
                    "variantcaller": x["variantcaller"]
                })
            suffix = "-annotated-decomposed" if tz.get_in(
                ("population", "decomposed"), x) else "-annotated"
            vcfs = _get_project_vcf(x, suffix)
            out.extend([_add_batch(f, sample) for f in vcfs])
    for x in sample.get("variants", []):
        if x.get("validate") and x["validate"].get("grading_summary"):
            out.append({"path": x["validate"]["grading_summary"]})
            break
    sv_project = set([])
    for svcall in sample.get("sv", []):
        if svcall.get("variantcaller") == "seq2c":
            if svcall.get(
                    "calls_all") and svcall["calls_all"] not in sv_project:
                out.append({
                    "path": svcall["coverage_all"],
                    "batch": "seq2c",
                    "ext": "coverage",
                    "type": "tsv"
                })
                out.append({
                    "path": svcall["read_mapping"],
                    "batch": "seq2c",
                    "ext": "read_mapping",
                    "type": "txt"
                })
                out.append({
                    "path": svcall["calls_all"],
                    "batch": "seq2c",
                    "ext": "calls",
                    "type": "tsv"
                })
                sv_project.add(svcall["calls_all"])
    if "coverage" in sample:
        cov_db = tz.get_in(["coverage", "summary"], sample)
        if cov_db:
            out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"})
        all_coverage = tz.get_in(["coverage", "all"], sample)
        if all_coverage:
            out.append({
                "path": all_coverage,
                "type": "bed",
                "ext": "coverage"
            })

    if dd.get_mirna_counts(sample):
        out.append({"path": dd.get_mirna_counts(sample)})
    if dd.get_isomir_counts(sample):
        out.append({"path": dd.get_isomir_counts(sample)})
    if dd.get_novel_mirna_counts(sample):
        out.append({"path": dd.get_novel_mirna_counts(sample)})
    if dd.get_novel_isomir_counts(sample):
        out.append({"path": dd.get_novel_isomir_counts(sample)})
    if dd.get_combined_counts(sample):
        count_file = dd.get_combined_counts(sample)
        if sample["analysis"].lower() == "scrna-seq":
            out.append({"path": count_file, "type": "mtx"})
            out.append({"path": count_file + ".rownames", "type": "rownames"})
            out.append({"path": count_file + ".colnames", "type": "colnames"})
            out.append({"path": count_file + ".metadata", "type": "metadata"})
            umi_file = os.path.splitext(count_file)[0] + "-dupes.mtx"
            if utils.file_exists(umi_file):
                out.append({"path": umi_file, "type": "mtx"})
                out.append({
                    "path": umi_file + ".rownames",
                    "type": "rownames"
                })
                out.append({
                    "path": umi_file + ".colnames",
                    "type": "colnames"
                })
            if dd.get_combined_histogram(sample):
                out.append({
                    "path": dd.get_combined_histogram(sample),
                    "type": "txt"
                })
            rda = os.path.join(os.path.dirname(count_file), "se.rda")
            if utils.file_exists(rda):
                out.append({"path": rda, "type": "rda"})
        else:
            out.append({"path": dd.get_combined_counts(sample)})
    if dd.get_tximport(sample):
        out.append({"path": dd.get_tximport(sample)["gene_tpm"], "dir": "tpm"})
        out.append({
            "path": dd.get_tximport(sample)["gene_counts"],
            "dir": "counts"
        })
    if dd.get_annotated_combined_counts(sample):
        out.append({"path": dd.get_annotated_combined_counts(sample)})
    if dd.get_combined_fpkm(sample):
        out.append({"path": dd.get_combined_fpkm(sample)})
    if dd.get_combined_fpkm_isoform(sample):
        out.append({"path": dd.get_combined_fpkm_isoform(sample)})
    if dd.get_transcript_assembler(sample):
        out.append({"path": dd.get_merged_gtf(sample)})
    if dd.get_dexseq_counts(sample):
        out.append({"path": dd.get_dexseq_counts(sample)})
        out.append({"path": "%s.ann" % dd.get_dexseq_counts(sample)})
    if dd.get_express_counts(sample):
        out.append({"path": dd.get_express_counts(sample)})
    if dd.get_express_fpkm(sample):
        out.append({"path": dd.get_express_fpkm(sample)})
    if dd.get_express_tpm(sample):
        out.append({"path": dd.get_express_tpm(sample)})
    if dd.get_isoform_to_gene(sample):
        out.append({"path": dd.get_isoform_to_gene(sample)})
    if dd.get_square_vcf(sample):
        out.append({"path": dd.get_square_vcf(sample)})
    if dd.get_sailfish_transcript_tpm(sample):
        out.append({"path": dd.get_sailfish_transcript_tpm(sample)})
    if dd.get_sailfish_gene_tpm(sample):
        out.append({"path": dd.get_sailfish_gene_tpm(sample)})
    if dd.get_tx2gene(sample):
        out.append({"path": dd.get_tx2gene(sample)})
    if dd.get_spikein_counts(sample):
        out.append({"path": dd.get_spikein_counts(sample)})
    if tz.get_in(("peaks_files", "consensus", "main"), sample):
        out.append({
            "path":
            tz.get_in(("peaks_files", "consensus", "main"), sample),
            "dir":
            "consensus"
        })
    if tz.get_in(("peak_counts", "peaktable"), sample):
        out.append({
            "path": tz.get_in(("peak_counts", "peaktable"), sample),
            "dir": "consensus"
        })

    transcriptome_dir = os.path.join(dd.get_work_dir(sample), "inputs",
                                     "transcriptome")
    if os.path.exists(transcriptome_dir):
        out.append({
            "path": transcriptome_dir,
            "type": "directory",
            "ext": "transcriptome"
        })
    return _add_meta(out, config=upload_config)
Exemplo n.º 12
0
def _get_files_project(sample, upload_config):
    """Retrieve output files associated with an entire analysis project.
    """
    out = [{"path": sample["provenance"]["programs"]}]
    if os.path.exists(tz.get_in(["provenance", "data"], sample) or ""):
        out.append({"path": sample["provenance"]["data"]})
    for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]:
        if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)):
            out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname),
                        "type": "external_command_log",
                        "ext": ""})

    if "summary" in sample and sample["summary"].get("project"):
        out.append({"path": sample["summary"]["project"]})
    mixup_check = tz.get_in(["summary", "mixup_check"], sample)
    if mixup_check:
        out.append({"path": sample["summary"]["mixup_check"],
                    "type": "directory", "ext": "mixup_check"})

    report = os.path.join(dd.get_work_dir(sample), "report")
    if utils.file_exists(report):
        out.append({"path": report,
                    "type": "directory", "ext": "report"})

    multiqc = tz.get_in(["summary", "multiqc"], sample)
    if multiqc:
        out.extend(_flatten_file_with_secondary(multiqc, "multiqc"))

    if sample.get("seqcluster", None):
        out.append({"path": sample["seqcluster"],
                    "type": "directory", "ext": "seqcluster"})

    if sample.get("report", None):
        out.append({"path": os.path.dirname(sample["report"]),
                    "type": "directory", "ext": "seqclusterViz"})

    for x in sample.get("variants", []):
        if "pop_db" in x:
            out.append({"path": x["pop_db"],
                        "type": "sqlite",
                        "variantcaller": x["variantcaller"]})
    for x in sample.get("variants", []):
        if "population" in x:
            pop_db = tz.get_in(["population", "db"], x)
            if pop_db:
                out.append({"path": pop_db,
                            "type": "sqlite",
                            "variantcaller": x["variantcaller"]})
            out.extend(_get_variant_file(x, ("population", "vcf")))
    for x in sample.get("variants", []):
        if x.get("validate") and x["validate"].get("grading_summary"):
            out.append({"path": x["validate"]["grading_summary"]})
            break
    if "coverage" in sample:
        cov_db = tz.get_in(["coverage", "summary"], sample)
        if cov_db:
            out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"})
        all_coverage = tz.get_in(["coverage", "all"], sample)
        if all_coverage:
            out.append({"path": all_coverage, "type": "bed", "ext": "coverage"})

    if dd.get_mirna_counts(sample):
        out.append({"path": dd.get_mirna_counts(sample)})
    if dd.get_isomir_counts(sample):
        out.append({"path": dd.get_isomir_counts(sample)})
    if dd.get_novel_mirna_counts(sample):
        out.append({"path": dd.get_novel_mirna_counts(sample)})
    if dd.get_novel_isomir_counts(sample):
        out.append({"path": dd.get_novel_isomir_counts(sample)})
    if dd.get_combined_counts(sample):
        out.append({"path": dd.get_combined_counts(sample)})
    if dd.get_annotated_combined_counts(sample):
        out.append({"path": dd.get_annotated_combined_counts(sample)})
    if dd.get_combined_fpkm(sample):
        out.append({"path": dd.get_combined_fpkm(sample)})
    if dd.get_combined_fpkm_isoform(sample):
        out.append({"path": dd.get_combined_fpkm_isoform(sample)})
    if dd.get_transcript_assembler(sample):
        out.append({"path": dd.get_merged_gtf(sample)})
    if dd.get_dexseq_counts(sample):
        out.append({"path": dd.get_dexseq_counts(sample)})
    if dd.get_express_counts(sample):
        out.append({"path": dd.get_express_counts(sample)})
    if dd.get_express_fpkm(sample):
        out.append({"path": dd.get_express_fpkm(sample)})
    if dd.get_express_tpm(sample):
        out.append({"path": dd.get_express_tpm(sample)})
    if dd.get_isoform_to_gene(sample):
        out.append({"path": dd.get_isoform_to_gene(sample)})
    if dd.get_square_vcf(sample):
        out.append({"path": dd.get_square_vcf(sample)})
    if dd.get_sailfish_tidy(sample):
        out.append({"path": dd.get_sailfish_tidy(sample)})
    if dd.get_sailfish_transcript_tpm(sample):
        out.append({"path": dd.get_sailfish_transcript_tpm(sample)})
    if dd.get_sailfish_gene_tpm(sample):
        out.append({"path": dd.get_sailfish_gene_tpm(sample)})
    if dd.get_tx2gene(sample):
        out.append({"path": dd.get_tx2gene(sample)})
    return _add_meta(out, config=upload_config)
Exemplo n.º 13
0
def _get_files_project(sample, upload_config):
    """Retrieve output files associated with an entire analysis project.
    """
    out = [{"path": sample["provenance"]["programs"]}]
    if os.path.exists(tz.get_in(["provenance", "data"], sample) or ""):
        out.append({"path": sample["provenance"]["data"]})
    for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]:
        if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)):
            out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname),
                        "type": "external_command_log",
                        "ext": ""})

    if "summary" in sample and sample["summary"].get("project"):
        out.append({"path": sample["summary"]["project"]})
    mixup_check = tz.get_in(["summary", "mixup_check"], sample)
    if mixup_check:
        out.append({"path": sample["summary"]["mixup_check"],
                    "type": "directory", "ext": "mixup_check"})

    report = os.path.join(dd.get_work_dir(sample), "report")
    if utils.file_exists(report):
        out.append({"path": report,
                    "type": "directory", "ext": "report"})

    multiqc = tz.get_in(["summary", "multiqc"], sample)
    if multiqc:
        out.extend(_flatten_file_with_secondary(multiqc, "multiqc"))

    if sample.get("seqcluster", {}):
        out.append({"path": sample["seqcluster"].get("out_dir"),
                    "type": "directory", "ext": "seqcluster"})

    if sample.get("report", None):
        out.append({"path": os.path.dirname(sample["report"]),
                    "type": "directory", "ext": "seqclusterViz"})

    for x in sample.get("variants", []):
        if "pop_db" in x:
            out.append({"path": x["pop_db"],
                        "type": "sqlite",
                        "variantcaller": x["variantcaller"]})
    for x in sample.get("variants", []):
        if "population" in x:
            pop_db = tz.get_in(["population", "db"], x)
            if pop_db:
                out.append({"path": pop_db,
                            "type": "sqlite",
                            "variantcaller": x["variantcaller"]})
            suffix = "-annotated-decomposed" if tz.get_in(("population", "decomposed"), x) else "-annotated"
            out.extend([_add_batch(x, sample)
                        for x in _get_variant_file(x, ("population", "vcf"), suffix=suffix)])
    for x in sample.get("variants", []):
        if x.get("validate") and x["validate"].get("grading_summary"):
            out.append({"path": x["validate"]["grading_summary"]})
            break
    if "coverage" in sample:
        cov_db = tz.get_in(["coverage", "summary"], sample)
        if cov_db:
            out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"})
        all_coverage = tz.get_in(["coverage", "all"], sample)
        if all_coverage:
            out.append({"path": all_coverage, "type": "bed", "ext": "coverage"})

    if dd.get_mirna_counts(sample):
        out.append({"path": dd.get_mirna_counts(sample)})
    if dd.get_isomir_counts(sample):
        out.append({"path": dd.get_isomir_counts(sample)})
    if dd.get_novel_mirna_counts(sample):
        out.append({"path": dd.get_novel_mirna_counts(sample)})
    if dd.get_novel_isomir_counts(sample):
        out.append({"path": dd.get_novel_isomir_counts(sample)})
    if dd.get_combined_counts(sample):
        count_file = dd.get_combined_counts(sample)
        if sample["analysis"].lower() == "scrna-seq":
            out.append({"path": count_file,
                    "type": "mtx"})
            out.append({"path": count_file + ".rownames",
                    "type": "rownames"})
            out.append({"path": count_file + ".colnames",
                    "type": "colnames"})
        else:
            out.append({"path": dd.get_combined_counts(sample)})
    if dd.get_annotated_combined_counts(sample):
        out.append({"path": dd.get_annotated_combined_counts(sample)})
    if dd.get_combined_fpkm(sample):
        out.append({"path": dd.get_combined_fpkm(sample)})
    if dd.get_combined_fpkm_isoform(sample):
        out.append({"path": dd.get_combined_fpkm_isoform(sample)})
    if dd.get_transcript_assembler(sample):
        out.append({"path": dd.get_merged_gtf(sample)})
    if dd.get_dexseq_counts(sample):
        out.append({"path": dd.get_dexseq_counts(sample)})
    if dd.get_express_counts(sample):
        out.append({"path": dd.get_express_counts(sample)})
    if dd.get_express_fpkm(sample):
        out.append({"path": dd.get_express_fpkm(sample)})
    if dd.get_express_tpm(sample):
        out.append({"path": dd.get_express_tpm(sample)})
    if dd.get_isoform_to_gene(sample):
        out.append({"path": dd.get_isoform_to_gene(sample)})
    if dd.get_square_vcf(sample):
        out.append({"path": dd.get_square_vcf(sample)})
    if dd.get_sailfish_tidy(sample):
        out.append({"path": dd.get_sailfish_tidy(sample)})
    if dd.get_sailfish_transcript_tpm(sample):
        out.append({"path": dd.get_sailfish_transcript_tpm(sample)})
    if dd.get_sailfish_gene_tpm(sample):
        out.append({"path": dd.get_sailfish_gene_tpm(sample)})
    if dd.get_tx2gene(sample):
        out.append({"path": dd.get_tx2gene(sample)})
    if dd.get_spikein_counts(sample):
        out.append({"path": dd.get_spikein_counts(sample)})
    return _add_meta(out, config=upload_config)
Exemplo n.º 14
0
def _get_files_project(sample, upload_config):
    """Retrieve output files associated with an entire analysis project.
    """
    out = [{"path": sample["provenance"]["programs"]}]
    for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]:
        if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)):
            out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname),
                        "type": "external_command_log",
                        "ext": ""})

    if "summary" in sample and sample["summary"].get("project"):
        out.append({"path": sample["summary"]["project"]})
    mixup_check = tz.get_in(["summary", "mixup_check"], sample)
    if mixup_check:
        out.append({"path": sample["summary"]["mixup_check"],
                    "type": "directory", "ext": "mixup_check"})

    report = os.path.join(dd.get_work_dir(sample), "report")
    if utils.file_exists(report):
        out.append({"path": report,
            "type": "directory", "ext": "report"})

    if sample.get("seqcluster", None):
        out.append({"path": sample["seqcluster"],
                    "type": "directory", "ext": "seqcluster"})

    for x in sample.get("variants", []):
        if "pop_db" in x:
            out.append({"path": x["pop_db"],
                        "type": "sqlite",
                        "variantcaller": x["variantcaller"]})
    for x in sample.get("variants", []):
        if "population" in x:
            pop_db = tz.get_in(["population", "db"], x)
            if pop_db:
                out.append({"path": pop_db,
                            "type": "sqlite",
                            "variantcaller": x["variantcaller"]})
            out.extend(_get_variant_file(x, ("population", "vcf")))
    for x in sample.get("variants", []):
        if x.get("validate") and x["validate"].get("grading_summary"):
            out.append({"path": x["validate"]["grading_summary"]})
            break
    if "coverage" in sample:
        cov_db = tz.get_in(["coverage", "summary"], sample)
        if cov_db:
            out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"})
        all_coverage = tz.get_in(["coverage", "all"], sample)
        if all_coverage:
            out.append({"path": all_coverage, "type": "bed", "ext": "coverage"})

    if dd.get_mirna_counts(sample):
        out.append({"path": dd.get_mirna_counts(sample)})
    if dd.get_isomir_counts(sample):
        out.append({"path": dd.get_isomir_counts(sample)})
    if dd.get_combined_counts(sample):
        out.append({"path": dd.get_combined_counts(sample)})
    if dd.get_annotated_combined_counts(sample):
        out.append({"path": dd.get_annotated_combined_counts(sample)})
    if dd.get_combined_fpkm(sample):
        out.append({"path": dd.get_combined_fpkm(sample)})
    if dd.get_combined_fpkm_isoform(sample):
        out.append({"path": dd.get_combined_fpkm_isoform(sample)})
    if dd.get_assembled_gtf(sample):
        out.append({"path": dd.get_assembled_gtf(sample)})
    if dd.get_dexseq_counts(sample):
        out.append({"path": dd.get_dexseq_counts(sample)})
    if dd.get_express_counts(sample):
        out.append({"path": dd.get_express_counts(sample)})
    if dd.get_express_fpkm(sample):
        out.append({"path": dd.get_express_fpkm(sample)})
    if dd.get_express_tpm(sample):
        out.append({"path": dd.get_express_tpm(sample)})
    if dd.get_isoform_to_gene(sample):
        out.append({"path": dd.get_isoform_to_gene(sample)})
    if dd.get_square_vcf(sample):
        out.append({"path": dd.get_square_vcf(sample)})
    if dd.get_sailfish_tidy(sample):
        out.append({"path": dd.get_sailfish_tidy(sample)})
    if dd.get_sailfish_transcript_tpm(sample):
        out.append({"path": dd.get_sailfish_transcript_tpm(sample)})
    if dd.get_sailfish_gene_tpm(sample):
        out.append({"path": dd.get_sailfish_gene_tpm(sample)})
    return _add_meta(out, config=upload_config)
Exemplo n.º 15
0
def _get_files_project(sample, upload_config):
    """Retrieve output files associated with an entire analysis project.
    """
    out = [{"path": sample["provenance"]["programs"]}]
    if os.path.exists(tz.get_in(["provenance", "data"], sample) or ""):
        out.append({"path": sample["provenance"]["data"]})
    for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]:
        if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)):
            out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname),
                        "type": "external_command_log",
                        "ext": ""})

    if "summary" in sample and sample["summary"].get("project"):
        out.append({"path": sample["summary"]["project"]})
    if "summary" in sample and sample["summary"].get("metadata"):
        out.append({"path": sample["summary"]["metadata"]})
    mixup_check = tz.get_in(["summary", "mixup_check"], sample)
    if mixup_check:
        out.append({"path": sample["summary"]["mixup_check"],
                    "type": "directory", "ext": "mixup_check"})

    report = os.path.join(dd.get_work_dir(sample), "report")
    if utils.file_exists(report):
        out.append({"path": report,
                    "type": "directory", "ext": "report"})

    multiqc = tz.get_in(["summary", "multiqc"], sample)
    if multiqc:
        out.extend(_flatten_file_with_secondary(multiqc, "multiqc"))

    if sample.get("seqcluster", {}):
        out.append({"path": sample["seqcluster"].get("out_dir"),
                    "type": "directory", "ext": "seqcluster"})

    if sample.get("mirge", {}):
        for fn in sample["mirge"]:
            out.append({"path": fn,
                        "dir": "mirge"})

    if sample.get("report", None):
        out.append({"path": os.path.dirname(sample["report"]),
                    "type": "directory", "ext": "seqclusterViz"})

    for x in sample.get("variants", []):
        if "pop_db" in x:
            out.append({"path": x["pop_db"],
                        "type": "sqlite",
                        "variantcaller": x["variantcaller"]})
    for x in sample.get("variants", []):
        if "population" in x:
            pop_db = tz.get_in(["population", "db"], x)
            if pop_db:
                out.append({"path": pop_db,
                            "type": "sqlite",
                            "variantcaller": x["variantcaller"]})
            suffix = "-annotated-decomposed" if tz.get_in(("population", "decomposed"), x) else "-annotated"
            vcfs = _get_project_vcf(x, suffix)
            out.extend([_add_batch(f, sample) for f in vcfs])
    for x in sample.get("variants", []):
        if x.get("validate") and x["validate"].get("grading_summary"):
            out.append({"path": x["validate"]["grading_summary"]})
            break
    sv_project = set([])
    for svcall in sample.get("sv", []):
        if svcall.get("variantcaller") == "seq2c":
            if svcall.get("calls_all") and svcall["calls_all"] not in sv_project:
                out.append({"path": svcall["coverage_all"], "batch": "seq2c", "ext": "coverage", "type": "tsv"})
                out.append({"path": svcall["read_mapping"], "batch": "seq2c", "ext": "read_mapping", "type": "txt"})
                out.append({"path": svcall["calls_all"], "batch": "seq2c", "ext": "calls", "type": "tsv"})
                sv_project.add(svcall["calls_all"])
    if "coverage" in sample:
        cov_db = tz.get_in(["coverage", "summary"], sample)
        if cov_db:
            out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"})
        all_coverage = tz.get_in(["coverage", "all"], sample)
        if all_coverage:
            out.append({"path": all_coverage, "type": "bed", "ext": "coverage"})

    if dd.get_mirna_counts(sample):
        out.append({"path": dd.get_mirna_counts(sample)})
    if dd.get_isomir_counts(sample):
        out.append({"path": dd.get_isomir_counts(sample)})
    if dd.get_novel_mirna_counts(sample):
        out.append({"path": dd.get_novel_mirna_counts(sample)})
    if dd.get_novel_isomir_counts(sample):
        out.append({"path": dd.get_novel_isomir_counts(sample)})
    if dd.get_combined_counts(sample):
        count_file = dd.get_combined_counts(sample)
        if sample["analysis"].lower() == "scrna-seq":
            out.append({"path": count_file,
                        "type": "mtx"})
            out.append({"path": count_file + ".rownames",
                        "type": "rownames"})
            out.append({"path": count_file + ".colnames",
                        "type": "colnames"})
            out.append({"path": count_file + ".metadata",
                        "type": "metadata"})
            umi_file = os.path.splitext(count_file)[0] + "-dupes.mtx"
            if utils.file_exists(umi_file):
                out.append({"path": umi_file,
                            "type": "mtx"})
                out.append({"path": umi_file + ".rownames",
                            "type": "rownames"})
                out.append({"path": umi_file + ".colnames",
                            "type": "colnames"})
            if dd.get_combined_histogram(sample):
                out.append({"path": dd.get_combined_histogram(sample),
                            "type": "txt"})
            rda = os.path.join(os.path.dirname(count_file), "se.rda")
            if utils.file_exists(rda):
                out.append({"path": rda,
                            "type": "rda"})
        else:
            out.append({"path": dd.get_combined_counts(sample)})
    if dd.get_annotated_combined_counts(sample):
        out.append({"path": dd.get_annotated_combined_counts(sample)})
    if dd.get_combined_fpkm(sample):
        out.append({"path": dd.get_combined_fpkm(sample)})
    if dd.get_combined_fpkm_isoform(sample):
        out.append({"path": dd.get_combined_fpkm_isoform(sample)})
    if dd.get_transcript_assembler(sample):
        out.append({"path": dd.get_merged_gtf(sample)})
    if dd.get_dexseq_counts(sample):
        out.append({"path": dd.get_dexseq_counts(sample)})
        out.append({"path": "%s.ann" % dd.get_dexseq_counts(sample)})
    if dd.get_express_counts(sample):
        out.append({"path": dd.get_express_counts(sample)})
    if dd.get_express_fpkm(sample):
        out.append({"path": dd.get_express_fpkm(sample)})
    if dd.get_express_tpm(sample):
        out.append({"path": dd.get_express_tpm(sample)})
    if dd.get_isoform_to_gene(sample):
        out.append({"path": dd.get_isoform_to_gene(sample)})
    if dd.get_square_vcf(sample):
        out.append({"path": dd.get_square_vcf(sample)})
    if dd.get_sailfish_transcript_tpm(sample):
        out.append({"path": dd.get_sailfish_transcript_tpm(sample)})
    if dd.get_sailfish_gene_tpm(sample):
        out.append({"path": dd.get_sailfish_gene_tpm(sample)})
    if dd.get_tx2gene(sample):
        out.append({"path": dd.get_tx2gene(sample)})
    if dd.get_spikein_counts(sample):
        out.append({"path": dd.get_spikein_counts(sample)})
    transcriptome_dir = os.path.join(dd.get_work_dir(sample), "inputs",
                                     "transcriptome")
    if os.path.exists(transcriptome_dir):
        out.append({"path": transcriptome_dir, "type": "directory",
                    "ext": "transcriptome"})
    return _add_meta(out, config=upload_config)