示例#1
0
def action(args):
    barcodes = get_barcodes(args.barcodes)
    bc_seqs, bc_labels = zip(*barcodes)
    bc_dict = dict(barcodes)
    bc_rexp = re.compile('|'.join(bc_seqs))

    opener = bz2.BZ2File if args.fastq.endswith('.bz2') else open
    count = Counter()
    with opener(args.fastq) as f, bz2.BZ2File(args.mapfile, 'w') as mapout:
        writer = csv.writer(mapout)
        writer.writerow([
            'name', 'label', 'barcode', 'ratio', 'bc_start', 'bc_stop', 'rle'
        ])

        for seq in SeqIO.parse(f, 'fastq'):
            if not args.min_length <= len(seq) <= args.max_length:
                count['fail_len'] += 1
                continue

            barcode, ratio, bc_start, bc_stop = find_barcode(
                seq,
                bc_rexp,
                bc_seqs,
                exact=False,
                bc_len=10,
                search_window=args.search_window)
            sample = bc_dict.get(barcode)

            name = seq.id.replace(':', '_')
            if barcode:
                if count['matched'] >= args.limit:
                    break
                count['matched'] += 1
                count[barcode] += 1
                seq, counts = homoencode(seq.seq)
                args.outfile.write('>%s %s\n%s\n' % (name, sample, seq))
                writer.writerow([
                    name, sample, barcode, ratio, bc_start, bc_stop,
                    to_ascii(counts)
                ])
            elif args.unmatched:
                count['no_match'] += 1
                args.unmatched.write('>%s\n%s\n' % (name, seq.seq))
                writer.writerow(
                    [name, sample, None, barcode, ratio, bc_start, bc_stop])

    stats = csv.writer(args.stats)
    rows = sorted((bc_dict[k], v) for k, v in count.items() if k in bc_dict)
    stats.writerows(rows)
    stats.writerow(['total_matched', count['matched']])
    stats.writerow(['no_match', count['no_match']])
    stats.writerow(['fail_len', count['fail_len']])
示例#2
0
def action(args):
    barcodes = get_barcodes(args.barcodes)
    bc_seqs, bc_labels = zip(*barcodes)
    bc_dict = dict(barcodes)
    bc_rexp = re.compile('|'.join(bc_seqs))

    opener = bz2.BZ2File if args.fastq.endswith('.bz2') else open
    count = Counter()
    with opener(args.fastq) as f, bz2.BZ2File(args.mapfile, 'w') as mapout:
        writer = csv.writer(mapout)
        writer.writerow(['name','label','barcode','ratio','bc_start','bc_stop','rle'])

        for seq in SeqIO.parse(f, 'fastq'):
            if not args.min_length <= len(seq) <= args.max_length:
                count['fail_len'] += 1
                continue

            barcode, ratio, bc_start, bc_stop = find_barcode(
                seq, bc_rexp, bc_seqs,
                exact = False,
                bc_len = 10,
                search_window = args.search_window)
            sample = bc_dict.get(barcode)

            name = seq.id.replace(':','_')
            if barcode:
                if count['matched'] >= args.limit:
                    break
                count['matched'] += 1
                count[barcode] += 1
                seq, counts = homoencode(seq.seq)
                args.outfile.write('>%s %s\n%s\n' % (name, sample, seq))
                writer.writerow([name, sample, barcode, ratio, bc_start, bc_stop, to_ascii(counts)])
            elif args.unmatched:
                count['no_match'] += 1
                args.unmatched.write('>%s\n%s\n' % (name, seq.seq))
                writer.writerow([name, sample, None, barcode, ratio, bc_start, bc_stop])

    stats = csv.writer(args.stats)
    rows = sorted((bc_dict[k], v) for k, v in count.items() if k in bc_dict)
    stats.writerows(rows)
    stats.writerow(['total_matched', count['matched']])
    stats.writerow(['no_match', count['no_match']])
    stats.writerow(['fail_len', count['fail_len']])
示例#3
0
文件: rlencode.py 项目: crosenth/bioy
def seq_and_homoencode(seq):
    return seq, homoencode(seq.seq)
示例#4
0
文件: rlencode.py 项目: nhoffman/bioy
def seq_and_homoencode(seq):
    return seq, homoencode(seq.seq)
示例#5
0
 def test02(self):
     rle, counts = sequtils.homoencode('GCTTCAAACATA')
     self.assertEquals(rle, 'GCTCACATA')
     self.assertEquals(counts, [1, 1, 2, 1, 3, 1, 1, 1, 1])
示例#6
0
 def test01(self):
     rle, counts = sequtils.homoencode('AAA')
     self.assertEquals(rle, 'A')
     self.assertEquals(counts, [3])
示例#7
0
    def test01(self):
        seq = 'TCTGGACCGTGTCTTTCAGTTCCAAAGTGTGACTGATCCATCCTCTCAGACC'

        e, c = sequtils.homoencode(seq)

        self.assertEquals(seq, sequtils.homodecode(e, c))