Exemplo n.º 1
0
    def __init__(self, args, header):
        self.args = args
        self.stats = stats.Stats('')
        self.amplicons = amplicon.load_amplicons(args.amps, self.stats, args)
        self.clip = args.clip
        self.exclude_offtarget = args.exclude_offtarget

        AMS = []
        for amp in self.amplicons:
            AMS.append(json.dumps({
                'type': 'ea',
                'id': amp.external_id,
                'ac': '%s:%s-%s' % (amp.chr, amp.start, amp.end),
                'tc': '%s:%s-%s' % (amp.chr, amp.trim_start, amp.trim_end),
                'st': str(amp.strand)
                }))

        header['CO'] = header.get('CO', []) + AMS

        # create a list of lists ref by tid
        self._amps_by_chr = []
        for _ in range(args.input.nreferences):
            self._amps_by_chr.append([])

        for a in self.amplicons:
            self._amps_by_chr[args.input.gettid(a.chr)].append(a)
Exemplo n.º 2
0
    def do_mark(self, subcmd, opts, bamfile, amplicons):
        """${cmd_name}: Mark reads matching amplicons and optionally clip.
            
            Walk a BAM file and mark any matching amplicons using the AM tag.
            Outputs a modified BAM.  Use 'clip' if you want only reads matching 
            amplicons in the output.
            
            ${cmd_usage}
            BAMFILE: input reads (use - for stdin)
            AMPLICONS: a file listing amplicons and trim locations.
            
            ${cmd_option_list}
        """
        samfile = pysam.Samfile(bamfile, "rb")
        stats = Stats(" ".join(sys.argv))
        amplicons = load_amplicons(design, stats, opts, samfile=samfile)
        outfile = pysam.Samfile(opts.outfile, "wb", template=samfile)

        # we need to reopen the file here to get sequential access after computin the pileups
        samfile = pysam.Samfile(bamfile, "rb")
        for read in samfile:

            # TODO: optimisation of the list of amplicons that are considered
            for amp in amplicons:
                if amp.matches(read):
                    amp.clip(read)
                    amp.mark(read)
            outfile.write(read)

        stats.report(sys.stderr)
Exemplo n.º 3
0
    def __init__(self, args, header):
        self.args = args
        self.stats = stats.Stats('')
        self.amplicons = amplicon.load_amplicons(args.amps, self.stats, args)
        self.clip = args.clip
        self.exclude_offtarget = args.exclude_offtarget

        AMS = []
        for amp in self.amplicons:
            AMS.append(
                json.dumps({
                    'type':
                    'ea',
                    'id':
                    amp.external_id,
                    'ac':
                    '%s:%s-%s' % (amp.chr, amp.start, amp.end),
                    'tc':
                    '%s:%s-%s' % (amp.chr, amp.trim_start, amp.trim_end),
                    'st':
                    str(amp.strand)
                }))

        header['CO'] = header.get('CO', []) + AMS

        # create a list of lists ref by tid
        self._amps_by_chr = []
        for _ in range(args.input.nreferences):
            self._amps_by_chr.append([])

        for a in self.amplicons:
            self._amps_by_chr[args.input.gettid(a.chr)].append(a)
Exemplo n.º 4
0
    def do_clip(self, subcmd, opts, bamfile, amplicons):
        """${cmd_name}: Find and clip reads matching amplicons.
        
            Find reads from amplicons in the input and write clipped reads to 
            output.  Writes the AM tag for matches.  Use 'mark' if you want all 
            input reads (including non matching) in the output.        
        
            ${cmd_usage}
            BAMFILE: input reads (use - for stdin)
            AMPLICONS: a file listing amplicons and trim locations.

            ${cmd_option_list}
        """
        stats = Stats(" ".join(sys.argv))
        opts.clip = False
        samfile = pysam.Samfile(bamfile, "rb")
        amplicons = load_amplicons(design, stats, opts)
        outfile = pysam.Samfile(opts.outfile, "wb", template=samfile)

        for amplicon in amplicons:
            trimmed = amplicon.clipped_reads(samfile, mark=True)
            map(outfile.write, trimmed)

        stats.report(sys.stderr)