def run(self): # Check both type of indexes exist # if not os.path.exists(self.contigs_fasta + '.1.bt2'): self.contigs_fasta.index_bowtie() if not os.path.exists(self.contigs_fasta + '.fai'): self.contigs_fasta.index_samtools() # Make our options # options = ['-p', nr_threads, '-x', self.assembly.results.contigs_fasta, '-1', self.sample.fwd_path, '-2', self.sample.rev_path, '-S', self.p.map_sam] # We have to tell bowtie2 if they we have FASTA files instead of FASTQ # if self.sample.format == 'fasta': options += ['-f'] # Do the mapping # sh.bowtie2(*options) ## Create bam file, then sort it and finally index bamfile # sh.samtools('view', '-bt', self.contigs_fasta + '.fai', self.p.map_sam, '-o', self.p.map_bam) sh.samtools('sort', self.p.map_bam, self.p.map_s_bam.prefix_path) sh.samtools('index', self.p.map_s_bam) # Remove PCR duplicates # self.remove_duplicates() # Sort and index bam without duplicates # sh.samtools('sort', self.p.map_smd_bam, self.p.map_smds_bam.prefix_path) sh.samtools('index', self.p.map_smds_bam) # Compute coverage # sh.genomeCoverageBed('-ibam', self.p.map_smds_bam, _out=str(self.p.map_smds_coverage)) # Clean up the ones we don't need # os.remove(self.p.map_sam) os.remove(self.p.map_bam) os.remove(self.p.map_smd_bam)
def map(self,threads=nr_threads): """Maps reads from self.pool to self.assembly using bowtie2. PCR Duplicates are afterwards removed using MarkDuplicates. BEDTools is used to determine coverage.""" # Check indexes # if not os.path.exists(self.contigs + '.1.bt2'): raise(Exception('Bowtie2 index file not created, run index() first')) if not os.path.exists(self.contigs + '.fai'): raise(Exception('Samtools index file not created, run index() first')) # Do the mapping # sh.bowtie2('-p', threads, '-x', self.contigs, '-1', self.pool.fwd, '-2', self.pool.rev, '-S', self.p.map_sam) # Create bam, sort and index bamfile # sh.samtools('view', '-bt', self.contigs + '.fai', self.p.map_sam, '-o', self.p.map_bam) sh.samtools('sort', self.p.map_bam, self.p.map_s_bam.prefix_path) sh.samtools('index', self.p.map_s_bam) # Remove PCR duplicates # self.remove_duplicates() # Sort and index bam without duplicates # sh.samtools('sort', self.p.map_smd_bam, self.p.map_smds_bam.prefix_path) sh.samtools('index', self.p.map_smds_bam) # Compute coverage # sh.genomeCoverageBed('-ibam', self.p.map_smds_bam, _out=self.p.map_smds_coverage) # Clean up # os.remove(self.p.map_sam) os.remove(self.p.map_bam) os.remove(self.p.map_smd_bam)
def index_fasta(fasta, compress): """Index and optionally compress a fasta file. \b Examples: bionorm index_fasta Medicago_truncatula/jemalong_A17.gnm5.ann1.FAKE/medtr.jemalong_A17.gnm5.FAKE.genome_main.fna """ from sh import bgzip # isort:skip from sh import samtools # isort:skip target = Path(fasta) if len(target.suffixes) < 1: error_message = f"Target {target} does not have a file extension." logger.error(error_message) sys.exit(1) if target.suffix.lstrip(".") in COMPRESSED_TYPES: logger.error(f"Uncompress {target} befor indexing.") sys.exit(1) if target.suffix.lstrip(".") not in FASTA_TYPES: logger.error( f"File {target} does not have a recognized FASTA extension.") sys.exit(1) if compress: output = bgzip(["-f", "--index", str(target)]) target = Path(target.parent) / f"{target.name}.gz" output = samtools(["faidx", str(target)]) return target
def main(): ''' Call `samtools view` on the input file and split into fastqs by RNAME column. ''' raw_args = docopt(__doc__) scheme = Schema({ '<samfile>' : str, '--outdir' : str}) parsed_args = scheme.validate(raw_args) outdir = parsed_args['--outdir'] if not os.path.exists(outdir): os.mkdir(outdir) infile = parsed_args['<samfile>'] view = str(sh.samtools('view', infile, S=True)) if infile.endswith('.sam') else str(sh.samtools('view', infile)) get_seqs_by_ctg(outdir, view) return 0
def _merge_condition(in_files, condition): """ merge all of the bam files from a condition together as recomended in the MACS manual """ condition_files = [filename for filename in in_files if condition in filename] if not condition_files: return None condition_filename = os.path.join(os.path.dirname(condition_files[1]), condition + "_merged.bam") sorted_prefix = remove_suffix(condition_filename) + ".sorted" sorted_filename = sorted_prefix + ".bam" if file_exists(sorted_filename): return sorted_filename sh.samtools("merge", condition_filename, condition_files) sh.samtools("sort", condition_filename, sorted_prefix) sh.samtools("index", sorted_filename) return sorted_filename
def _merge_condition(in_files, condition): """ merge all of the bam files from a condition together as recomended in the MACS manual """ condition_files = [ filename for filename in in_files if condition in filename ] if not condition_files: return None condition_filename = os.path.join(os.path.dirname(condition_files[1]), condition + "_merged.bam") sorted_prefix = remove_suffix(condition_filename) + ".sorted" sorted_filename = sorted_prefix + ".bam" if file_exists(sorted_filename): return sorted_filename sh.samtools("merge", condition_filename, condition_files) sh.samtools("sort", condition_filename, sorted_prefix) sh.samtools("index", sorted_filename) return sorted_filename
def index_samtools(self): """Create an index on the fasta file compatible with samtools""" sh.samtools('faidx', self.path) return FilePath(self.path + '.fai')
def index(self): """Create two indexes. For both bowtie2 and samtools on the contigs fasta file.""" sh.bowtie2_build(self.contigs_fasta, self.contigs_fasta) sh.samtools('faidx', self.contigs_fasta)
def zero_coverage_positions(bam_file, ref_file): pileup = sh.samtools('mpileup', bam_file, f=ref_file, _iter=True) return map(compose(int, second, unicode.split), pileup)
def index_samtools(self): """Create an index on the fasta file compatible with samtools.""" sh.samtools('faidx', self.path) return FilePath(self.path + '.fai')