def test_htseq_count(self): data = {"work_bam": self.in_bam, "sam_ref": os.path.join(self.data_dir, "foo"), "dirs": {"work": self.out_dir}, "config": {"algorithm": {"transcripts": self.in_gtf}}} out_file = count.htseq_count(data) self.assertTrue(file_exists(out_file))
def count(data): """ count reads mapping to genes using featureCounts falls back on htseq_count method if featureCounts is not found """ in_bam = dd.get_work_bam(data) gtf_file = dd.get_gtf_file(data) work_dir = dd.get_work_dir(data) out_dir = os.path.join(work_dir, "htseq-count") safe_makedir(out_dir) count_file = os.path.join(out_dir, dd.get_sample_name(data)) + ".counts" if file_exists(count_file): return count_file config = data["config"] try: featureCounts = config_utils.get_program("featureCounts", config) except config_utils.CmdNotFound: logger.info( "featureCounts not found, falling back to htseq-count " "for feature counting. You can upgrade the tools to " "install featureCount with bcbio_nextgen.py upgrade " "--tools." ) return htseq_count(data) paired_flag = _paired_flag(in_bam) strand_flag = _strand_flag(config) cmd = "{featureCounts} -a {gtf_file} -o {tx_count_file} -s {strand_flag} " "{paired_flag} {in_bam}" message = "Count reads in {tx_count_file} mapping to {gtf_file} using " "featureCounts" with file_transaction(count_file) as tx_count_file: do.run(cmd.format(**locals()), message.format(**locals())) fixed_count_file = _format_count_file(count_file) os.rename(fixed_count_file, count_file) return count_file
def count(data): """ count reads mapping to genes using featureCounts falls back on htseq_count method if featureCounts is not found """ in_bam = data["work_bam"] gtf_file = data["genome_resources"]["rnaseq"]["transcripts"] work_dir = data["dirs"].get("work", "work") out_dir = os.path.join(work_dir, "htseq-count") safe_makedir(out_dir) count_file = _count_file_name(in_bam, out_dir) if file_exists(count_file): return count_file config = data["config"] try: featureCounts = config_utils.get_program("featureCounts", config) except config_utils.CmdNotFound: logger.info("featureCounts not found, falling back to htseq-count " "for feature counting. You can upgrade the tools to " "install featureCount with bcbio_nextgen.py upgrade " "--tools.") return htseq_count(data) paired_flag = _paired_flag(in_bam) strand_flag = _strand_flag(config) cmd = ("{featureCounts} -a {gtf_file} -o {tx_count_file} -s {strand_flag} " "{paired_flag} {in_bam}") message = ("Count reads in {tx_count_file} mapping to {gtf_file} using " "featureCounts") with file_transaction(count_file) as tx_count_file: do.run(cmd.format(**locals()), message.format(**locals())) fixed_count_file = _format_count_file(count_file) os.rename(fixed_count_file, count_file) return count_file
def generate_transcript_counts(data): """Generate counts per transcript from an alignment""" data["count_file"] = count.htseq_count(data) return [[data]]
def generate_transcript_counts(data): """Generate counts per transcript from an alignment""" data["count_file"] = count.htseq_count(data) if get_in(data, ("config", "algorithm", "fusion_mode"), False): data["oncofuse_file"] = oncofuse.run(data) return [[data]]