def __init__(self, args, header): self.args = args self.stats = stats.Stats('') self.amplicons = amplicon.load_amplicons(args.amps, self.stats, args) self.clip = args.clip self.exclude_offtarget = args.exclude_offtarget AMS = [] for amp in self.amplicons: AMS.append(json.dumps({ 'type': 'ea', 'id': amp.external_id, 'ac': '%s:%s-%s' % (amp.chr, amp.start, amp.end), 'tc': '%s:%s-%s' % (amp.chr, amp.trim_start, amp.trim_end), 'st': str(amp.strand) })) header['CO'] = header.get('CO', []) + AMS # create a list of lists ref by tid self._amps_by_chr = [] for _ in range(args.input.nreferences): self._amps_by_chr.append([]) for a in self.amplicons: self._amps_by_chr[args.input.gettid(a.chr)].append(a)
def do_mark(self, subcmd, opts, bamfile, amplicons): """${cmd_name}: Mark reads matching amplicons and optionally clip. Walk a BAM file and mark any matching amplicons using the AM tag. Outputs a modified BAM. Use 'clip' if you want only reads matching amplicons in the output. ${cmd_usage} BAMFILE: input reads (use - for stdin) AMPLICONS: a file listing amplicons and trim locations. ${cmd_option_list} """ samfile = pysam.Samfile(bamfile, "rb") stats = Stats(" ".join(sys.argv)) amplicons = load_amplicons(design, stats, opts, samfile=samfile) outfile = pysam.Samfile(opts.outfile, "wb", template=samfile) # we need to reopen the file here to get sequential access after computin the pileups samfile = pysam.Samfile(bamfile, "rb") for read in samfile: # TODO: optimisation of the list of amplicons that are considered for amp in amplicons: if amp.matches(read): amp.clip(read) amp.mark(read) outfile.write(read) stats.report(sys.stderr)
def __init__(self, args, header): self.args = args self.stats = stats.Stats('') self.amplicons = amplicon.load_amplicons(args.amps, self.stats, args) self.clip = args.clip self.exclude_offtarget = args.exclude_offtarget AMS = [] for amp in self.amplicons: AMS.append( json.dumps({ 'type': 'ea', 'id': amp.external_id, 'ac': '%s:%s-%s' % (amp.chr, amp.start, amp.end), 'tc': '%s:%s-%s' % (amp.chr, amp.trim_start, amp.trim_end), 'st': str(amp.strand) })) header['CO'] = header.get('CO', []) + AMS # create a list of lists ref by tid self._amps_by_chr = [] for _ in range(args.input.nreferences): self._amps_by_chr.append([]) for a in self.amplicons: self._amps_by_chr[args.input.gettid(a.chr)].append(a)
def do_clip(self, subcmd, opts, bamfile, amplicons): """${cmd_name}: Find and clip reads matching amplicons. Find reads from amplicons in the input and write clipped reads to output. Writes the AM tag for matches. Use 'mark' if you want all input reads (including non matching) in the output. ${cmd_usage} BAMFILE: input reads (use - for stdin) AMPLICONS: a file listing amplicons and trim locations. ${cmd_option_list} """ stats = Stats(" ".join(sys.argv)) opts.clip = False samfile = pysam.Samfile(bamfile, "rb") amplicons = load_amplicons(design, stats, opts) outfile = pysam.Samfile(opts.outfile, "wb", template=samfile) for amplicon in amplicons: trimmed = amplicon.clipped_reads(samfile, mark=True) map(outfile.write, trimmed) stats.report(sys.stderr)