Exemplo n.º 1
0
    def run(self, config, config_file, parallel, dirs, samples):
        with prun.start(_wres(parallel, ["picard", "AlienTrimmer"]),
                        samples, config, dirs, "trimming") as run_parallel:
            with profile.report("adapter trimming", dirs):
                samples = run_parallel("prepare_sample", samples)
                samples = run_parallel("trim_sample", samples)
        with prun.start(_wres(parallel, ["aligner", "picard"],
                              ensure_mem={"tophat": 8, "tophat2": 8, "star": 40}),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            with profile.report("alignment", dirs):
                samples = disambiguate.split(samples)
                samples = run_parallel("process_alignment", samples)
        with prun.start(_wres(parallel, ["samtools", "cufflinks"]),
                        samples, config, dirs, "rnaseqcount") as run_parallel:
            with profile.report("disambiguation", dirs):
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("transcript assembly", dirs):
                samples = rnaseq.assemble_transcripts(run_parallel, samples)
            with profile.report("estimate expression", dirs):
                samples = rnaseq.estimate_expression(samples, run_parallel)

        with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc","kraken"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
        
        logger.info("Timing: finished")
        return samples
Exemplo n.º 2
0
    def run(self, config, config_file, parallel, dirs, samples):
        with prun.start(_wres(parallel, ["picard"]),
                        samples, config, dirs, "trimming") as run_parallel:
            samples = run_parallel("process_lane", samples)
            samples = run_parallel("trim_lane", samples)
        with prun.start(_wres(parallel, ["aligner"],
                              ensure_mem={"tophat": 8, "tophat2": 8, "star": 30}),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = disambiguate.resolve(samples, run_parallel)

        with prun.start(_wres(parallel, ["samtools", "cufflinks"]),
                        samples, config, dirs, "rnaseqcount") as run_parallel:
            samples = rnaseq.estimate_expression(samples, run_parallel)
            #samples = rnaseq.detect_fusion(samples, run_parallel)

        combined = combine_count_files([x[0].get("count_file") for x in samples])
        gtf_file = utils.get_in(samples[0][0], ('genome_resources', 'rnaseq',
                                                'transcripts'), None)
        annotated = annotate_combined_count_file(combined, gtf_file)
        for x in samples:
            x[0]["combined_counts"] = combined
            if annotated:
                x[0]["annotated_combined_counts"] = annotated

        with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc"]),
                        samples, config, dirs, "persample") as run_parallel:
            samples = qcsummary.generate_parallel(samples, run_parallel)
        return samples
Exemplo n.º 3
0
    def run(self, config, config_file, parallel, dirs, samples):
        with prun.start(parallel, samples, config, dirs, "trimming") as run_parallel:
            samples = run_parallel("trim_lane", samples)
        with prun.start(
            _wprogs(parallel, ["aligner"], {"tophat": 8, "tophat2": 8, "star": 30}),
            samples,
            config,
            dirs,
            "multicore",
            multiplier=alignprep.parallel_multiplier(samples),
        ) as run_parallel:
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = disambiguate.resolve(samples, run_parallel)

        with prun.start(
            _wprogs(parallel, ["samtools", "gatk", "cufflinks"]), samples, config, dirs, "rnaseqcount"
        ) as run_parallel:
            samples = rnaseq.estimate_expression(samples, run_parallel)
            # samples = rnaseq.detect_fusion(samples, run_parallel)

        combined = combine_count_files([x[0].get("count_file") for x in samples])
        organism = utils.get_in(samples[0][0], ("genome_resources", "aliases", "ensembl"), None)
        annotated = annotate_combined_count_file(combined, organism)
        for x in samples:
            x[0]["combined_counts"] = combined
            x[0]["annotated_combined_counts"] = annotated

        with prun.start(
            _wprogs(parallel, ["picard", "fastqc", "rnaseqc"]), samples, config, dirs, "persample"
        ) as run_parallel:
            samples = qcsummary.generate_parallel(samples, run_parallel)
        return samples
Exemplo n.º 4
0
def create_combined_tx2gene(data):
    out_dir = os.path.join(dd.get_work_dir(data), "inputs", "transcriptome")
    items = disambiguate.split([data])
    tx2gene_files = []
    for i in items:
        odata = i[0]
        gtf_file = dd.get_transcriptome_gtf(odata)
        if not gtf_file:
            gtf_file = dd.get_gtf_file(odata)
        out_file = os.path.join(out_dir,
                                dd.get_genome_build(odata) + "-tx2gene.csv")
        if file_exists(out_file):
            tx2gene_files.append(out_file)
        else:
            out_file = gtf.tx2genefile(gtf_file, out_file, tsv=False)
            tx2gene_files.append(out_file)
    combined_file = os.path.join(out_dir, "tx2gene.csv")
    if file_exists(combined_file):
        return combined_file

    tx2gene_file_string = " ".join(tx2gene_files)
    cmd = "cat {tx2gene_file_string} > {tx_out_file}"
    with file_transaction(data, combined_file) as tx_out_file:
        do.run(cmd.format(**locals()), "Combining tx2gene CSV files.")
    return combined_file
Exemplo n.º 5
0
    def run(self, config, config_file, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with prun.start(_wres(parallel, ["aligner", "samtools", "sambamba"],
                              (["reference", "fasta"], ["reference", "aligner"], ["files"])),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            with profile.report("alignment preparation", dirs):
                samples = run_parallel("prep_align_inputs", samples)
                samples = disambiguate.split(samples)
            with profile.report("alignment", dirs):
                samples = run_parallel("process_alignment", samples)
                samples = alignprep.merge_split_alignments(samples, run_parallel)
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("callable regions", dirs):
                samples = run_parallel("postprocess_alignment", samples)
                samples = run_parallel("combine_sample_regions", [samples])
                samples = region.clean_sample_data(samples)
            with profile.report("coverage", dirs):
                samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                        samples, config, dirs, "full",
                        multiplier=region.get_max_counts(samples), max_multicore=1) as run_parallel:
            with profile.report("alignment post-processing", dirs):
                samples = region.parallel_prep_region(samples, run_parallel)
            with profile.report("variant calling", dirs):
                samples = genotype.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with prun.start(_wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("joint squaring off/backfilling", dirs):
                samples = joint.square_off(samples, run_parallel)
            with profile.report("variant post-processing", dirs):
                samples = run_parallel("postprocess_variants", samples)
                samples = run_parallel("split_variants_by_sample", samples)
            with profile.report("validation", dirs):
                samples = run_parallel("compare_to_rm", samples)
                samples = genotype.combine_multiple_callers(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with prun.start(_wres(parallel, ["gemini", "samtools", "fastqc", "bamtools", "bcbio_variation",
                                         "bcbio-variation-recall"]),
                        samples, config, dirs, "multicore2") as run_parallel:
            with profile.report("prepped BAM merging", dirs):
                samples = region.delayed_bamprep_merge(samples, run_parallel)
            with profile.report("ensemble calling", dirs):
                samples = ensemble.combine_calls_parallel(samples, run_parallel)
            with profile.report("validation summary", dirs):
                samples = validate.summarize_grading(samples)
            with profile.report("structural variation", dirs):
                samples = structural.run(samples, run_parallel)
            with profile.report("population database", dirs):
                samples = population.prep_db_parallel(samples, run_parallel)
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
            with profile.report("archive", dirs):
                samples = archive.compress(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Exemplo n.º 6
0
def _create_combined_fasta(data, out_dir):
    """
    if there are genomes to be disambiguated, create a FASTA file of
    all of the transcripts for all genomes
    """
    items = disambiguate.split([data])
    fasta_files = []
    for i in items:
        odata = i[0]
        gtf_file = dd.get_gtf_file(odata)
        ref_file = dd.get_ref_file(odata)
        out_file = os.path.join(out_dir, dd.get_genome_build(odata) + ".fa")
        if file_exists(out_file):
            fasta_files.append(out_file)
        else:
            out_file = _gtf_to_fasta(gtf_file, ref_file, out_file)
            out_file = _clean_gtf_fa(out_file, out_file)
            fasta_files.append(out_file)
    out_stem = os.path.join(out_dir, dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_stem = "-".join([out_stem] + dd.get_disambiguate(data))
    combined_file = out_stem + ".fa"
    if file_exists(combined_file):
        return combined_file

    fasta_file_string = " ".join(fasta_files)
    cmd = "cat {fasta_file_string} > {tx_out_file}"
    with file_transaction(combined_file) as tx_out_file:
        do.run(cmd.format(**locals()), "Combining transcriptome FASTA files.")
    return combined_file
Exemplo n.º 7
0
def _create_combined_fasta(data, out_dir):
    """
    if there are genomes to be disambiguated, create a FASTA file of
    all of the transcripts for all genomes
    """
    items = disambiguate.split([data])
    fasta_files = []
    for i in items:
        odata = i[0]
        gtf_file = dd.get_gtf_file(odata)
        ref_file = dd.get_ref_file(odata)
        out_file = os.path.join(out_dir, dd.get_genome_build(odata) + ".fa")
        if file_exists(out_file):
            fasta_files.append(out_file)
        else:
            out_file = _gtf_to_fasta(gtf_file, ref_file, out_file)
            out_file = _clean_gtf_fa(out_file, out_file)
            fasta_files.append(out_file)
    out_stem = os.path.join(out_dir, dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_stem = "-".join([out_stem] + dd.get_disambiguate(data))
    combined_file = out_stem + ".fa"
    if file_exists(combined_file):
        return combined_file

    fasta_file_string = " ".join(fasta_files)
    cmd = "cat {fasta_file_string} > {tx_out_file}"
    with file_transaction(combined_file) as tx_out_file:
        do.run(cmd.format(**locals()), "Combining transcriptome FASTA files.")
    return combined_file
Exemplo n.º 8
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     samples = disambiguate.split(lane_items)
     samples = run_parallel("process_alignment", samples)
     samples = run_parallel("clean_chipseq_alignment", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Exemplo n.º 9
0
 def run(self, config, run_info_yaml, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner"],
                           ensure_mem={"tophat": 8, "tophat2": 8, "star": 2}),
                     [samples[0]], config, dirs, "organize_samples") as run_parallel:
         with profile.report("organize samples", dirs):
             samples = run_parallel("organize_samples", [[dirs, config, run_info_yaml,
                                                          [x[0]["description"] for x in samples]]])
     with prun.start(_wres(parallel, ["picard", "cutadapt"]),
                     samples, config, dirs, "trimming") as run_parallel:
         with profile.report("adapter trimming", dirs):
             samples = run_parallel("prepare_sample", samples)
             samples = run_parallel("trim_sample", samples)
     with prun.start(_wres(parallel, ["aligner", "picard"],
                           ensure_mem={"tophat": 8, "tophat2": 8, "star": 2}),
                     samples, config, dirs, "alignment",
                     multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
         with profile.report("alignment", dirs):
             samples = disambiguate.split(samples)
             samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["samtools", "cufflinks"]),
                     samples, config, dirs, "rnaseqcount") as run_parallel:
         with profile.report("disambiguation", dirs):
             samples = disambiguate.resolve(samples, run_parallel)
         with profile.report("transcript assembly", dirs):
             samples = rnaseq.assemble_transcripts(run_parallel, samples)
         with profile.report("estimate expression", dirs):
             samples = rnaseq.estimate_expression(samples, run_parallel)
     with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc", "kraken"]),
                     samples, config, dirs, "qc") as run_parallel:
         with profile.report("quality control", dirs):
             samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Exemplo n.º 10
0
    def run(self, config, config_file, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with prun.start(_wres(parallel, ["aligner", "samtools", "sambamba"],
                              (["reference", "fasta"], ["reference", "aligner"], ["files"])),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            with profile.report("alignment preparation", dirs):
                samples = run_parallel("prep_align_inputs", samples)
                samples = disambiguate.split(samples)
            with profile.report("alignment", dirs):
                samples = run_parallel("process_alignment", samples)
                samples = alignprep.merge_split_alignments(samples, run_parallel)
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("callable regions", dirs):
                samples = run_parallel("postprocess_alignment", samples)
                samples = run_parallel("combine_sample_regions", [samples])
                samples = region.clean_sample_data(samples)
            with profile.report("coverage", dirs):
                samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                        samples, config, dirs, "full",
                        multiplier=region.get_max_counts(samples), max_multicore=1) as run_parallel:
            with profile.report("alignment post-processing", dirs):
                samples = region.parallel_prep_region(samples, run_parallel)
            with profile.report("variant calling", dirs):
                samples = genotype.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with prun.start(_wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("variant post-processing", dirs):
                samples = run_parallel("postprocess_variants", samples)
                samples = run_parallel("split_variants_by_sample", samples)
            with profile.report("validation", dirs):
                samples = run_parallel("compare_to_rm", samples)
                samples = genotype.combine_multiple_callers(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with prun.start(_wres(parallel, ["gemini", "samtools", "fastqc", "bamtools", "bcbio_variation",
                                         "bcbio-variation-recall"]),
                        samples, config, dirs, "multicore2") as run_parallel:
            with profile.report("prepped BAM merging", dirs):
                samples = region.delayed_bamprep_merge(samples, run_parallel)
            with profile.report("ensemble calling", dirs):
                samples = ensemble.combine_calls_parallel(samples, run_parallel)
            with profile.report("validation summary", dirs):
                samples = validate.summarize_grading(samples)
            with profile.report("structural variation", dirs):
                samples = structural.run(samples, run_parallel)
            with profile.report("population database", dirs):
                samples = population.prep_db_parallel(samples, run_parallel)
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
            with profile.report("archive", dirs):
                samples = archive.compress(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Exemplo n.º 11
0
    def run(self, config, config_file, run_parallel, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["process_alignment", "postprocess_alignment"],
                             samples, dirs, config,
                             multiplier=alignprep.parallel_multiplier(samples)) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel("prep_align_inputs", samples)
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("postprocess_alignment", samples)
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(parallel, "full", ["piped_bamprep", "variantcall_sample"],
                             samples, dirs, config,
                             multiplier=len(regions["analysis"]), max_multicore=1) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions, run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with global_parallel(parallel, "multicore2", ["prep_gemini_db", "delayed_bam_merge"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: structural variation")
            samples = structural.run(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Exemplo n.º 12
0
 def run(self, config, config_file, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner", "picard"]),
                     samples, config, dirs, "multicore",
                     multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
         samples = run_parallel("process_lane", samples)
         samples = run_parallel("trim_lane", samples)
         samples = disambiguate.split(samples)
         samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["picard", "fastqc"]),
                     samples, config, dirs, "persample") as run_parallel:
         samples = run_parallel("clean_chipseq_alignment", samples)
         samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Exemplo n.º 13
0
    def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
        lane_items = run_parallel("trim_lane", lane_items)
        samples = disambiguate.split(lane_items)
        samples = run_parallel("process_alignment", samples)
        samples = disambiguate.resolve(samples, run_parallel)
        samples = run_parallel("generate_transcript_counts", samples)
        combined = combine_count_files([x[0].get("count_file") for x in samples])
        for x in samples:
            x[0]["combined_counts"] = combined

        samples = qcsummary.generate_parallel(samples, run_parallel)
        #run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
        return samples
Exemplo n.º 14
0
 def run(self, config, config_file, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner", "picard"]),
                     samples, config, dirs, "multicore",
                     multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
         samples = run_parallel("process_lane", samples)
         samples = run_parallel("trim_lane", samples)
         samples = disambiguate.split(samples)
         samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["picard", "fastqc"]),
                     samples, config, dirs, "persample") as run_parallel:
         samples = run_parallel("clean_chipseq_alignment", samples)
         samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Exemplo n.º 15
0
 def run(self, config, run_info_yaml, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner", "picard"]),
                     samples, config, dirs, "multicore",
                     multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
         with profile.report("organize samples", dirs):
             samples = run_parallel("organize_samples", [[dirs, config, run_info_yaml,
                                                          [x[0]["description"] for x in samples]]])
         samples = run_parallel("prepare_sample", samples)
         samples = run_parallel("trim_sample", samples)
         samples = disambiguate.split(samples)
         samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["picard", "fastqc"]),
                     samples, config, dirs, "persample") as run_parallel:
         samples = run_parallel("clean_chipseq_alignment", samples)
         samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Exemplo n.º 16
0
    def run(self, config, config_file, parallel, dirs, samples):
        with prun.start(_wres(parallel, ["picard", "AlienTrimmer"]), samples,
                        config, dirs, "trimming") as run_parallel:
            with profile.report("adapter trimming", dirs):
                samples = run_parallel("process_lane", samples)
                samples = run_parallel("trim_lane", samples)
        with prun.start(_wres(parallel, ["aligner", "picard"],
                              ensure_mem={
                                  "tophat": 8,
                                  "tophat2": 8,
                                  "star": 40
                              }),
                        samples,
                        config,
                        dirs,
                        "multicore",
                        multiplier=alignprep.parallel_multiplier(
                            samples)) as run_parallel:
            with profile.report("alignment", dirs):
                samples = disambiguate.split(samples)
                samples = run_parallel("process_alignment", samples)

        with prun.start(_wres(parallel, ["samtools", "cufflinks"]), samples,
                        config, dirs, "rnaseqcount") as run_parallel:
            with profile.report("disambiguation", dirs):
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("estimate expression", dirs):
                samples = rnaseq.estimate_expression(samples, run_parallel)

        combined = combine_count_files(
            [x[0].get("count_file") for x in samples])
        gtf_file = utils.get_in(samples[0][0],
                                ('genome_resources', 'rnaseq', 'transcripts'),
                                None)
        annotated = annotate_combined_count_file(combined, gtf_file)
        for x in samples:
            x[0]["combined_counts"] = combined
            if annotated:
                x[0]["annotated_combined_counts"] = annotated

        with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Exemplo n.º 17
0
    def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
        lane_items = run_parallel("trim_lane", lane_items)
        samples = disambiguate.split(lane_items)
        samples = run_parallel("process_alignment", samples)
        samples = disambiguate.resolve(samples, run_parallel)
        samples = rnaseq.estimate_expression(samples, run_parallel)
        #samples = rnaseq.detect_fusion(samples, run_parallel)
        combined = combine_count_files([x[0].get("count_file") for x in samples])
        organism = utils.get_in(samples[0][0], ('genome_resources', 'aliases',
                                                'ensembl'), None)
        annotated = annotate_combined_count_file(combined, organism)
        for x in samples:
            x[0]["combined_counts"] = combined
            x[0]["annotated_combined_counts"] = annotated

        samples = qcsummary.generate_parallel(samples, run_parallel)
        #run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
        return samples
Exemplo n.º 18
0
def create_combined_tx2gene(data):
    out_dir = os.path.join(dd.get_work_dir(data), "inputs", "transcriptome")
    items = disambiguate.split([data])
    tx2gene_files = []
    for i in items:
        odata = i[0]
        gtf_file = dd.get_gtf_file(odata)
        out_file = os.path.join(out_dir, dd.get_genome_build(odata) + "-tx2gene.csv")
        if file_exists(out_file):
            tx2gene_files.append(out_file)
        else:
            out_file = gtf.tx2genefile(gtf_file, out_file, tsv=False)
            tx2gene_files.append(out_file)
    combined_file = os.path.join(out_dir, "tx2gene.csv")
    if file_exists(combined_file):
        return combined_file

    tx2gene_file_string = " ".join(tx2gene_files)
    cmd = "cat {tx2gene_file_string} > {tx_out_file}"
    with file_transaction(data, combined_file) as tx_out_file:
        do.run(cmd.format(**locals()), "Combining tx2gene CSV files.")
    return combined_file
Exemplo n.º 19
0
def disambiguate_split(*args):
    return disambiguate.split(*args)
Exemplo n.º 20
0
def disambiguate_split(*args):
    return disambiguate.split(*args)