Exemplo n.º 1
0
    def tearDown(self):

        # print '\nref:  ', self.ref
        # print 'query:', self.query
        errors = list(sequtils.itemize_errors(self.ref, self.query))
        log.debug(sequtils.show_errors(errors))

        log.debug('\n' + pprint.pformat(self.expected))
        log.debug('\n' + pprint.pformat(errors))

        # indices into ref are as expected
        self.assertEquals(set(e['i']
                              for e in errors), set(self.expected.keys()))

        # individual positions are as expected
        for pos in errors:
            self.assertEquals(pos['ref'], self.expected[pos['i']]['ref'])
            self.assertEquals(pos['query'], self.expected[pos['i']]['query'])
Exemplo n.º 2
0
def action(args):
    fieldnames = ['t_name', 'q_name', 'length', 'snp', 'indel']
    fieldnames += ['homoindel','compound', 'total', 'sw_zscore']
    fieldnames += args.extra_fields.keys()

    if args.output_alignment:
        fieldnames += ['alignment']

    aligns = DictReader(args.aligns)

    itemizer = lambda a: dict({'errors':itemize_errors(a['t_seq'], a['q_seq'])}, **a)

    aligns = imap(itemizer, aligns)

    tallies = DictWriter(args.out,
            fieldnames = fieldnames,
            extrasaction = 'ignore')
    tallies.writeheader()

    homopolymers = defaultdict(Counter)
    gtceil = 'geq{}'.format(args.homopolymer_max)

    # output error counts:
    for a in aligns:
        # instantiate d with zero counts for each error type
        row = {k:0 for k in fieldnames[2:]}
        row['q_name'], row['t_name'] = a['q_name'], a['t_name']
        row.update(a)
        row.update(error_count(a['errors']))

        # create total count to output
        total = row['snp'] + row['indel'] + row['homoindel'] + row['compound']
        row.update({'total':total})

        row.update(args.extra_fields)

        if args.output_alignment:
            row.update({'alignment':show_errors(a['errors'])})

        tallies.writerow(row)

        log.debug(a['q_name'])
        log.debug('\n' + sequtils.format_alignment(a['t_seq'], a['q_seq']))
        log.debug(a['q_seq'].replace('-','').replace('=',''))
        log.debug(show_errors(a['errors']))

        if args.step:
            raw_input()

        # create homopolymer matrix
        for e in a['errors']:
            r, q = e['ref'].strip('=-'), e['query'].strip('=-')
            # count indels or homoindels; exclude compound errors and snps
            base = ''.join(set(r + q))
            if len(base) == 1:
                ref_count = len(r) if len(r) <= args.homopolymer_max else gtceil
                query_count = len(q) if len(q) <= args.homopolymer_max else gtceil
                homopolymers['total'][(ref_count, query_count)] += 1
                homopolymers[base][(ref_count, query_count)] += 1

    # output homopolymer matrix if specified
    if args.matrix:
        # reference counts in rows, query in column
        ii = range(args.homopolymer_max)
        margins = ii + [gtceil]
        for base in sorted(homopolymers):
            args.matrix.writerow([base] + ['q{}'.format(i) for i in ii] + [gtceil])
            for i_ref in margins:
                cols = [homopolymers[base][i_ref, i_query] for i_query in margins]
                args.matrix.writerow(['r{}'.format(i_ref)] + cols)
            args.matrix.writerow([''] * 10)
Exemplo n.º 3
0
def action(args):
    fieldnames = ['t_name', 'q_name', 'length', 'snp', 'indel']
    fieldnames += ['homoindel', 'compound', 'total', 'sw_zscore']
    fieldnames += args.extra_fields.keys()

    if args.output_alignment:
        fieldnames += ['alignment']

    aligns = DictReader(args.aligns)

    itemizer = lambda a: dict(
        {'errors': itemize_errors(a['t_seq'], a['q_seq'])}, **a)

    aligns = imap(itemizer, aligns)

    tallies = DictWriter(args.out,
                         fieldnames=fieldnames,
                         extrasaction='ignore')
    tallies.writeheader()

    homopolymers = defaultdict(Counter)
    gtceil = 'geq{}'.format(args.homopolymer_max)

    # output error counts:
    for a in aligns:
        # instantiate d with zero counts for each error type
        row = {k: 0 for k in fieldnames[2:]}
        row['q_name'], row['t_name'] = a['q_name'], a['t_name']
        row.update(a)
        row.update(error_count(a['errors']))

        # create total count to output
        total = row['snp'] + row['indel'] + row['homoindel'] + row['compound']
        row.update({'total': total})

        row.update(args.extra_fields)

        if args.output_alignment:
            row.update({'alignment': show_errors(a['errors'])})

        tallies.writerow(row)

        log.debug(a['q_name'])
        log.debug('\n' + sequtils.format_alignment(a['t_seq'], a['q_seq']))
        log.debug(a['q_seq'].replace('-', '').replace('=', ''))
        log.debug(show_errors(a['errors']))

        if args.step:
            raw_input()

        # create homopolymer matrix
        for e in a['errors']:
            r, q = e['ref'].strip('=-'), e['query'].strip('=-')
            # count indels or homoindels; exclude compound errors and snps
            base = ''.join(set(r + q))
            if len(base) == 1:
                ref_count = len(
                    r) if len(r) <= args.homopolymer_max else gtceil
                query_count = len(
                    q) if len(q) <= args.homopolymer_max else gtceil
                homopolymers['total'][(ref_count, query_count)] += 1
                homopolymers[base][(ref_count, query_count)] += 1

    # output homopolymer matrix if specified
    if args.matrix:
        # reference counts in rows, query in column
        ii = range(args.homopolymer_max)
        margins = ii + [gtceil]
        for base in sorted(homopolymers):
            args.matrix.writerow([base] + ['q{}'.format(i)
                                           for i in ii] + [gtceil])
            for i_ref in margins:
                cols = [
                    homopolymers[base][i_ref, i_query] for i_query in margins
                ]
                args.matrix.writerow(['r{}'.format(i_ref)] + cols)
            args.matrix.writerow([''] * 10)