def make_most_abundant(seqs): """Makes function that chooses the most abundant seq from group""" seq_to_group = unique_id_map(seqs) groups = invert_dict(seq_to_group) def most_abundant(ids, seqs='ignored'): """Returns most abundant seq from ids""" id_groups = [len(groups[seq_to_group[i]]) for i in ids] return ids[argmax(id_groups)] return most_abundant
def unique_id_map(seqs): """Returns map of seqs:unique representatives. Result is {orig_id:unique_rep_id}. """ groups = invert_dict(seqs) result = {} for v in groups.values(): for i in v: result[i] = v[0] return result