Пример #1
0
 def test_htseq_count(self):
     data = {"work_bam": self.in_bam,
             "sam_ref": os.path.join(self.data_dir, "foo"),
             "dirs": {"work": self.out_dir},
             "config": {"algorithm": {"transcripts": self.in_gtf}}}
     out_file = count.htseq_count(data)
     self.assertTrue(file_exists(out_file))
Пример #2
0
 def test_htseq_count(self):
     data = {"work_bam": self.in_bam,
             "sam_ref": os.path.join(self.data_dir, "foo"),
             "dirs": {"work": self.out_dir},
             "config": {"algorithm": {"transcripts": self.in_gtf}}}
     out_file = count.htseq_count(data)
     self.assertTrue(file_exists(out_file))
Пример #3
0
def count(data):
    """
    count reads mapping to genes using featureCounts
    falls back on htseq_count method if featureCounts is not
    found
    """
    in_bam = dd.get_work_bam(data)
    gtf_file = dd.get_gtf_file(data)
    work_dir = dd.get_work_dir(data)
    out_dir = os.path.join(work_dir, "htseq-count")
    safe_makedir(out_dir)
    count_file = os.path.join(out_dir, dd.get_sample_name(data)) + ".counts"
    if file_exists(count_file):
        return count_file

    config = data["config"]

    try:
        featureCounts = config_utils.get_program("featureCounts", config)
    except config_utils.CmdNotFound:
        logger.info(
            "featureCounts not found, falling back to htseq-count "
            "for feature counting. You can upgrade the tools to "
            "install featureCount with bcbio_nextgen.py upgrade "
            "--tools."
        )
        return htseq_count(data)

    paired_flag = _paired_flag(in_bam)
    strand_flag = _strand_flag(config)

    cmd = "{featureCounts} -a {gtf_file} -o {tx_count_file} -s {strand_flag} " "{paired_flag} {in_bam}"

    message = "Count reads in {tx_count_file} mapping to {gtf_file} using " "featureCounts"
    with file_transaction(count_file) as tx_count_file:
        do.run(cmd.format(**locals()), message.format(**locals()))
    fixed_count_file = _format_count_file(count_file)
    os.rename(fixed_count_file, count_file)

    return count_file
Пример #4
0
def count(data):
    """
    count reads mapping to genes using featureCounts
    falls back on htseq_count method if featureCounts is not
    found
    """
    in_bam = data["work_bam"]
    gtf_file = data["genome_resources"]["rnaseq"]["transcripts"]
    work_dir = data["dirs"].get("work", "work")
    out_dir = os.path.join(work_dir, "htseq-count")
    safe_makedir(out_dir)
    count_file = _count_file_name(in_bam, out_dir)
    if file_exists(count_file):
        return count_file

    config = data["config"]

    try:
        featureCounts = config_utils.get_program("featureCounts", config)
    except config_utils.CmdNotFound:
        logger.info("featureCounts not found, falling back to htseq-count "
                    "for feature counting. You can upgrade the tools to "
                    "install featureCount with bcbio_nextgen.py upgrade "
                    "--tools.")
        return htseq_count(data)

    paired_flag = _paired_flag(in_bam)
    strand_flag = _strand_flag(config)

    cmd = ("{featureCounts} -a {gtf_file} -o {tx_count_file} -s {strand_flag} "
           "{paired_flag} {in_bam}")

    message = ("Count reads in {tx_count_file} mapping to {gtf_file} using "
               "featureCounts")
    with file_transaction(count_file) as tx_count_file:
        do.run(cmd.format(**locals()), message.format(**locals()))
    fixed_count_file = _format_count_file(count_file)
    os.rename(fixed_count_file, count_file)

    return count_file
Пример #5
0
def generate_transcript_counts(data):
    """Generate counts per transcript from an alignment"""
    data["count_file"] = count.htseq_count(data)
    return [[data]]
Пример #6
0
def generate_transcript_counts(data):
    """Generate counts per transcript from an alignment"""
    data["count_file"] = count.htseq_count(data)
    if get_in(data, ("config", "algorithm", "fusion_mode"), False):
        data["oncofuse_file"] = oncofuse.run(data)
    return [[data]]