def index(self): """ Sort out indexing """ # Symlink reference: new_ref = os.path.join(self.rts.base, os.path.basename(self.ref)) os.symlink(self.ref, new_ref) # Register reference: self.rts.register(new_ref) old_ref = self.ref self.ref = new_ref ind_suf = [ ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2" ] if self.has_index: # Symlink index: for sx in ind_suf: os.symlink(old_ref + sx, new_ref + sx) else: # Run indexing: cmd = u.Cmd(self.log, prog="bowtie2-build", opts=self.index_opts, post_args=[self.ref, self.ref], sink_err=self.sink_err) cmd.comm() # Register index files: for suf in ind_suf: self.rts.register(self.ref + suf) pass
def sort(self, inp=None, outp=None, opts={}): """ Sort BAM file """ out_name = str() if type(outp) == file: self.log.fatal("Sort does not work on pipe output") elif type(outp) == str: out_name = self.rts.tempfile(outp) elif outp == None and type(inp) == str: base = os.path.basename(inp).split(".bam")[0] + "_sort" out_name = base else: self.log.fatal("No output available!") cmd = u.Cmd(log=self.log, prog=self.prog, pre_args=["sort"], opts=opts, post_args=["-", out_name], inp=inp, outp=None, path=self.path, sink_err=self.sink_err, cwd=self.rts.base) cmd.comm() out_name += ".bam" self.rts.register(out_name) return os.path.join(self.rts.base, out_name)
def mk_index(cls, ref, log, index_opts={}): """ Make a standalone index """ cmd = u.Cmd(log, prog="bowtie2-build", opts=index_opts, post_args=[ref, ref], sink_err=True) cmd.comm()
def flagstat(self, bam): """ Get BAM statistics """ if not os.path.exists(bam): self.log.fatal("BAM file does not exists.") cmd = u.Cmd(log=self.log, prog=self.prog, pre_args=["flagstat"], opts={}, post_args=[bam], path=self.path, sink_err=self.sink_err) data = cmd.comm() stats = {} feat_map = { re.compile("(\d+)\s\+\s\d+\sin total \(QC-passed reads \+ QC-failed reads\)"): tuple(["qc_fail"]), re.compile("(\d+)\s\+\s\d+\sduplicates"): tuple(["duplicates"]), re.compile("(\d+)\s\+\s\d+\smapped\s\((\S+)%\s\+\s\S+%\)"): tuple(["nr_mapped", "percent_mapped"]), re.compile("(\d+)\s\+\s\d+\spaired in sequencing"): tuple(["seq_paired"]), re.compile("(\d+)\s\+\s\d+\sproperly paired \((\S+)%\s\+\s\S+%\)"): tuple(["nr_proper_pairs", "percent_proper_pairs"]), re.compile("(\d+)\s\+\s\d+\swith itself and mate mapped"): tuple(["nr_proper_with_mate"]), re.compile("(\d+)\s\+\s\d+\ssingletons\s\((\S+)%\s\+\s\S+%\)"): tuple(["singletons", "percent_singletons"]), re.compile("(\d+)\s\+\s\d+\sread1"): tuple(["nr_read1"]), re.compile("(\d+)\s\+\s\d+\sread2"): tuple(["nr_read2"]), re.compile("(\d+)\s\+\s\d+\swith mate mapped to a different chr \(mapQ.+\)\Z"): tuple(["chr_mismatch_q5"]), re.compile("(\d+)\s\+\s\d+\swith mate mapped to a different chr\Z"): tuple(["chr_mismatch"]) } for line in data.split("\n"): for (pattern, names) in feat_map.iteritems(): m = pattern.match(line) if m != None: groups = m.groups() if len(groups) != len(names): self.log.fatal("Name/group mismatch: %s %s" % (groups, names)) for i in xrange(len(groups)): stats[names[i]] = float(groups[i]) return stats
def index(self, bam): """ Index BAM file """ cmd = u.Cmd(log=self.log, prog=self.prog, pre_args=["index"], opts={}, post_args=[bam], inp=None, outp=None, path=self.path, sink_err=self.sink_err, cwd=self.rts.base) cmd.comm() self.rts.register(bam + ".sai")
def flagstat_text(self, bam): """ Get BAM statistics """ if not os.path.exists(bam): self.log.fatal("BAM file does not exists.") cmd = u.Cmd(log=self.log, prog=self.prog, pre_args=["flagstat"], opts={}, post_args=[bam], path=self.path, sink_err=self.sink_err) data = cmd.comm() return data
def view(self, inp=None, outp=None, opts={"-S": None, "-b": None}): """ View SAM/BAM files """ if outp != None: outp = os.path.join(self.rts.base, outp) cmd = u.Cmd(log=self.log, prog=self.prog, pre_args=["view"], opts=opts, post_args=["-"], inp=inp, outp=outp, path=self.path, sink_err=self.sink_err) cmd.comm() # Register output file. if type(outp) == str: self.rts.register(outp) return cmd.output_fh
def sam(self): """ Generate sam file """ # Check input: if len(self.reads) == 0: self.log.fatal("No fastq files specified") (path, base) = os.path.split(self.reads[0]) pattern = re.compile(".fq\d") base = pattern.split(base)[0] # Generate SAM file: sam = os.path.join(self.rts.base, base + ".sam") # Register sam file: self.rts.register(sam) # Construct post args: inp_flags = ["-1", "-2"] inf = [] for i in xrange(len(self.reads)): inf.append(inp_flags[i]) inf.append(self.reads[i]) # Add bwt base: tmp = [self.ref] tmp.extend(inf) inf = tmp # Construct command object: cmd = u.Cmd(self.log, prog="bowtie2", opts=self.aln_opts, post_args=inf, outp=sam, cwd=self.rts.base, sink_err=self.sink_err) cmd.comm() return sam