Exemplo n.º 1
0
def action(args):

    seqs = fastalite(args.fasta)
    pairs = list(all_pairwise(seqs))

    if args.distance:
        pairs = [(q, t, 1 - i) for q, t, i in pairs]

    if args.split_info and args.matrix_out:
        primary, secondary = args.primary_group, args.secondary_group
        split_info = list(csv.DictReader(args.split_info))
        info = {r['seqname']: r for r in split_info if r['seqname']}
        tax = {r['tax_id']:r for r in split_info}

        pairs += map(itemgetter(1,0,2), pairs)

        def group(seqname):
            i = info[seqname]
            return i[primary] or i[secondary] if secondary else i[primary]

        pairs = ((group(left), group(right), score) for left,right,score in pairs)

        # sort and group rows
        pairs = list(groupbyl(pairs, key = itemgetter(0)))

        matrix_out = csv.writer(args.matrix_out)

        # this is the tax_id order we will be using for columns
        tax_ids = map(itemgetter(0), pairs)

        # get the species names to output as first row
        matrix_out.writerow([''] + [tax[t]['tax_name'] for t in tax_ids])

        # iterator through the sorted rows (pairs)
        for row_id, columns in pairs:
            # sort and group columns
            columns = dict(groupbyl(columns, key = itemgetter(1)))

            # get the species name
            row = [tax[row_id]['tax_name']]

            for t in tax_ids:
                # if t not in columns that means there is only
                # sequence representing the group
                # therefore the median destance is 0
                if t not in columns:
                    med = 0
                else:
                    col = columns[t]
                    med = median(map(itemgetter(2), col))
                    # percent and round
                    med = math.ceil(med * 100) / 100

                row.append(med)

            matrix_out.writerow(row)
    else:
        writer = csv.writer(args.out)
        writer.writerow(['query', 'target', 'identity'])
        writer.writerows(pairs)
Exemplo n.º 2
0
 def test02(self):
     with open(self.data('two.fasta')) as f:
         seqs = list(sequtils.fastalite(f))
         pairs = list(sequtils.all_pairwise(seqs))
         self.assertEqual(len(pairs), (len(seqs) * (len(seqs) - 1)) / 2)
         self.assertEqual(
             [s.id for s in seqs], list(sequtils.names_from_pairs(pairs)))