Пример #1
0
def generate_transcript_counts(data):
    """Generate counts per transcript and per exon from an alignment"""
    data["count_file"] = featureCounts.count(data)
    if dd.get_fusion_mode(data, False):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            dd.set_oncofuse_file(data, oncofuse_file)
    if dd.get_dexseq_gff(data, None):
        data = dd.set_dexseq_counts(data, dexseq.bcbio_run(data))
    # if RSEM was run, stick the transcriptome BAM file into the datadict
    if dd.get_aligner(data).lower() == "star" and dd.get_rsem(data):
        base, ext = os.path.splitext(dd.get_work_bam(data))
        data = dd.set_transcriptome_bam(data, base + ".transcriptome" + ext)
    return [[data]]
Пример #2
0
def detect_fusions(data):
    data = to_single_data(data)
    # support the old style of fusion mode calling
    if dd.get_fusion_mode(data, False):
        data = dd.set_fusion_caller(data, ["oncofuse", "pizzly"])
        logger.warning(
            "``fusion_mode`` is deprecated in favor of turning on "
            "callers with ``fusion_caller``. It will run pizzly and "
            "oncofuse for now, but will eventually have support "
            "dropped.")
    fusion_caller = dd.get_fusion_caller(data, [])
    if "oncofuse" in fusion_caller:
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)
    if "pizzly" in fusion_caller:
        pizzly_dir = pizzly.run_pizzly(data)
        if pizzly_dir:
            data = dd.set_pizzly_dir(data, pizzly_dir)
            data["fusion"] = {
                "fasta":
                os.path.join(pizzly_dir,
                             "%s.fusions.fasta" % dd.get_sample_name(data)),
                "json":
                os.path.join(pizzly_dir, "%s.json" % dd.get_sample_name(data))
            }
    if "ericscript" in fusion_caller:
        ericscript_dir = ericscript.run(data)
    return [[data]]
Пример #3
0
def generate_transcript_counts(data):
    """Generate counts per transcript and per exon from an alignment"""
    data["count_file"] = featureCounts.count(data)

    if dd.get_fusion_mode(data, False):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)

    if dd.get_transcriptome_align(data) and not dd.get_transcriptome_bam(data):
        file1, file2 = None, None

        if dd.get_disambiguate(data):
            bam_path = data["work_bam"]
            fastq_paths = alignprep._bgzip_from_bam(bam_path, data["dirs"], data["config"], is_retry=False, output_infix='-transcriptome')
            if len(fastq_paths) == 2:
                file1, file2 = fastq_paths
            else:
                file1, file2 = fastq_paths[0], None
        else:
            file1, file2 = dd.get_input_sequence_files(data)

        ref_file = dd.get_ref_file(data)
        logger.info("Transcriptome alignment was flagged to run, but the "
                    "transcriptome BAM file was not found. Aligning to the "
                    "transcriptome with bowtie2.")
        data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
    return [[data]]
Пример #4
0
def generate_transcript_counts(data):
    """Generate counts per transcript and per exon from an alignment"""
    data["count_file"] = featureCounts.count(data)

    if dd.get_fusion_mode(data, False):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)

    if dd.get_transcriptome_align(data) and not dd.get_transcriptome_bam(data):
        file1, file2 = None, None

        if dd.get_disambiguate(data):
            bam_path = data["work_bam"]
            fastq_paths = alignprep._bgzip_from_bam(
                bam_path,
                data["dirs"],
                data["config"],
                is_retry=False,
                output_infix='-transcriptome')
            if len(fastq_paths) == 2:
                file1, file2 = fastq_paths
            else:
                file1, file2 = fastq_paths[0], None
        else:
            file1, file2 = dd.get_input_sequence_files(data)

        ref_file = dd.get_ref_file(data)
        logger.info("Transcriptome alignment was flagged to run, but the "
                    "transcriptome BAM file was not found. Aligning to the "
                    "transcriptome with bowtie2.")
        data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
    return [[data]]
Пример #5
0
def detect_fusions(data):
    if dd.get_fusion_mode(data, False):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)
        pizzly_dir = pizzly.run_pizzly(data)
        if pizzly_dir:
            data = dd.set_pizzly_dir(data, pizzly_dir)
    return [[data]]
Пример #6
0
def generate_transcript_counts(data):
    """Generate counts per transcript and per exon from an alignment"""
    data["count_file"] = featureCounts.count(data)
    if dd.get_fusion_mode(data, False):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)
    # if RSEM set to run, but the aligner didn't create the transcriptome BAM
    # file, make one with bwa
    if dd.get_rsem(data) and not dd.get_transcriptome_bam(data):
        file1, file2 = dd.get_input_sequence_files(data)
        ref_file = dd.get_ref_file(data)
        logger.info("RSEM was flagged to run, but the transcriptome BAM file "
                    "was not found. Aligning to the transcriptome with bowtie2.")
        data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
    return [[data]]
Пример #7
0
def generate_transcript_counts(data):
    """Generate counts per transcript and per exon from an alignment"""
    data["count_file"] = featureCounts.count(data)
    if dd.get_fusion_mode(data, False):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)
    # if RSEM set to run, but the aligner didn't create the transcriptome BAM
    # file, make one with bwa
    if dd.get_rsem(data) and not dd.get_transcriptome_bam(data):
        file1, file2 = dd.get_input_sequence_files(data)
        ref_file = dd.get_ref_file(data)
        logger.info(
            "RSEM was flagged to run, but the transcriptome BAM file "
            "was not found. Aligning to the transcriptome with bowtie2.")
        data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
    return [[data]]
Пример #8
0
def detect_fusions(data):
    # support the old style of fusion mode calling
    if dd.get_fusion_mode(data, False):
        data = dd.set_fusion_caller(data, ["oncofuse", "pizzly"])
        logger.warning("``fusion_mode`` is deprecated in favor of turning on "
                       "callers with ``fusion_caller``. It will run pizzly and "
                       "oncofuse for now, but will eventually have support "
                       "dropped.")
    if "oncofuse" in dd.get_fusion_caller(data, []):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)
    if "pizzly" in dd.get_fusion_caller(data, []):
        pizzly_dir = pizzly.run_pizzly(data)
        if pizzly_dir:
            data = dd.set_pizzly_dir(data, pizzly_dir)
    return [[data]]
Пример #9
0
def generate_transcript_counts(data):
    """Generate counts per transcript and per exon from an alignment"""
    data["count_file"] = featureCounts.count(data)
    if dd.get_fusion_mode(data, False):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)
    # if RSEM set to run, but the aligner didn't create the transcriptome BAM
    # file, make one with bwa
    if dd.get_disambiguate(data):
        logger.info("RSEM is not supported yet for disambiguation protocols. "
                    "See https://github.com/chapmanb/bcbio-nextgen/issues/859")
        return [[data]]
    if dd.get_rsem(data) and not dd.get_transcriptome_bam(data):
        file1, file2 = dd.get_input_sequence_files(data)
        ref_file = dd.get_ref_file(data)
        logger.info("RSEM was flagged to run, but the transcriptome BAM file "
                    "was not found. Aligning to the transcriptome with bowtie2.")
        data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
    return [[data]]
Пример #10
0
def detect_fusions(data):
    data = to_single_data(data)
    # support the old style of fusion mode calling
    if dd.get_fusion_mode(data, False):
        data = dd.set_fusion_caller(data, ["oncofuse", "pizzly"])
        logger.warning("``fusion_mode`` is deprecated in favor of turning on "
                       "callers with ``fusion_caller``. It will run pizzly and "
                       "oncofuse for now, but will eventually have support "
                       "dropped.")
    fusion_caller = dd.get_fusion_caller(data, [])
    if "oncofuse" in fusion_caller:
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)
    if "pizzly" in fusion_caller:
        pizzly_dir = pizzly.run_pizzly(data)
        if pizzly_dir:
            data = dd.set_pizzly_dir(data, pizzly_dir)
            data["fusion"] = {"fasta": os.path.join(pizzly_dir, "%s.fusions.fasta" % dd.get_sample_name(data)),
                              "json": os.path.join(pizzly_dir, "%s.json" % dd.get_sample_name(data))}
    if "ericscript" in fusion_caller:
        ericscript_dir = ericscript.run(data)
    return [[data]]