def setUp(self): """setUp: setup method for all tests""" self.true = Pairs([(0,40),(1,39),(2,38),(3,37),(10,20),\ (11,19),(12,18),(13,17),(26,33),(27,32)]) self.predicted = Pairs([(0,40),(1,39),(2,38),(3,37),(4,36),\ (5,35),(10,22),(11,20),(14,29),(15,28)]) self.seq = ['>seq1\n', 'agguugaaggggauccgauccacuccccggcuggucaaccu']
def test_compare_pairs(self): """compare_pairs: should work on simple case""" #all the same p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)]) p2 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)]) self.assertEqual(compare_pairs(p1, p2), 1) #all different p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)]) p2 = Pairs([(1, 2), (3, 4), (5, 6)]) self.assertEqual(compare_pairs(p1, p2), 0) #one empty p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)]) p2 = Pairs([]) self.assertEqual(compare_pairs(p1, p2), 0) #partially different p1 = Pairs([(1, 2), (3, 4), (5, 6), (7, 8)]) p2 = Pairs([(1, 2), (3, 4), (9, 10), (11, 12)]) self.assertFloatEqual(compare_pairs(p1, p2), .33333333333333333) #partially different p1 = Pairs([(1, 2), (3, 4), (5, 6)]) p2 = Pairs([(1, 2), (3, 4), (9, 10)]) self.assertFloatEqual(compare_pairs(p1, p2), .5)
def test_get_counts(self): """get_counts: should work with all parameters""" seq = RnaSequence('UCAG-NAUGU') seq2 = RnaSequence('UAAG-CACGC') p = Pairs([(1, 8), (2, 7)]) p2 = Pairs([ (1, 8), (2, 6), (3, 6), (4, 9), ]) exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\ 'FP_INCONS':0, 'FP_CONTRA':0, 'FP_COMP':0} self.assertEqual(get_counts(p, p2), exp) exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\ 'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1} self.assertEqual(get_counts(p, p2, split_fp=True), exp) seq = RnaSequence('UCAG-NACGU') exp = {'TP':1,'TN':7, 'FN':1,'FP':3,\ 'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1} self.assertEqual(get_counts(p, p2, split_fp=True,\ sequences=[seq], min_dist=2), exp) # check against compare_ct.pm exp = {'TP':4,'TN':266, 'FN':6,'FP':6,\ 'FP_INCONS':2, 'FP_CONTRA':2, 'FP_COMP':2} seq = 'agguugaaggggauccgauccacuccccggcuggucaaccu'.upper() self.assertEqual(get_counts(self.true, self.predicted, split_fp=True,\ sequences=[seq], min_dist=4), exp)
def test_selectivity_dupl(self): """selectivity: duplicates and Nones shouldn't influence the calc. """ ref = Pairs([(1, 6), (2, 5), (10, 13), (6, 1), (7, None), (None, None)]) pred = Pairs([(6, 1), (3, 4), (10, 12)]) self.assertFloatEqual(selectivity(ref, pred), 0.5)
def test_sensitivity_dupl(self): """sensitivity: should handle duplicates, pseudo, None""" ref = Pairs([(1,6),(2,5),(3,10),(7,None),(None,None),(5,2),(4,9)]) pred = Pairs([(6,1),(10,11),(3,12)]) self.assertFloatEqual(sensitivity(ref, pred), 0.25) pred = Pairs([(6,1),(10,11),(3,12),(20,None),(None,None),(1,6)]) self.assertFloatEqual(sensitivity(ref, pred), 0.25)
def test_adjust_base_None(self): """adjust_base: should keep Nones or duplicates, ignore conflicts""" pairs = Pairs([(2,8),(3,7),(6,None),(None,None),(2,10)]) expected = Pairs([(1,7),(2,6),(5,None),(None, None),(1,9)]) self.assertEqual(adjust_base(pairs,-1), expected) p = Pairs([(1,2),(2,1),(1,2),(2,None)]) self.assertEqual(adjust_base(p, 1), [(2,3),(3,2),(2,3),(3,None)])
def test_ungapped_to_gapped(self): """ungapped_to_gapped: Sequence, ModelSequence, old_cogent, string """ p = Pairs([(0, 6), (1, 5), (3, 9)]) exp = Pairs([(0, 5), (1, 4), (3, 7)]) f = ungapped_to_gapped self.assertEqual(f(self.rna1, exp)[1], p) self.assertEqual(f(self.m1, exp)[1], p) self.assertEqual(f(self.s1, exp)[1], p)
def test_sensitivity_empty(self): """sensitivity: should work on emtpy Pairs""" # both empty self.assertFloatEqual(sensitivity(Pairs(), Pairs()), 1) pred = Pairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)]) # prediction emtpy self.assertFloatEqual(sensitivity(Pairs(), pred), 0) # reference empty self.assertFloatEqual(sensitivity(pred, Pairs()), 0)
def test_selectivity_general(self): """selectivity: should work in general""" ref = Pairs([(1, 6), (2, 5), (10, 13)]) pred = Pairs([(6, 1), (3, 4), (10, 12)]) # one good prediction self.assertFloatEqual(selectivity(ref, pred), 0.5) # over-prediction not penalized pred = Pairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)]) self.assertFloatEqual(selectivity(ref, pred), 0.25)
def test_symmetric(self): """Pairs symmetric() should add (down,up) for each (up,down)""" self.assertEqual(self.Empty.symmetric(), []) self.assertEqualItems(self.OneTuple.symmetric(), [(2, 1), (1, 2)]) self.assertEqualItems( Pairs([(1, 2), (1, 2)]).symmetric(), [(1, 2), (2, 1)]) self.assertEqualItems(Pairs([(1,2),(3,4)]).symmetric(),\ [(1,2),(2,1),(3,4),(4,3)]) self.assertEqualItems(Pairs([(1, None)]).symmetric(), [])
def test_selectivity_empty(self): """selectivity: should handle empty reference/predicted structure""" # both empty self.assertFloatEqual(selectivity(Pairs(), Pairs()), 1) pred = Pairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)]) # prediction emtpy self.assertFloatEqual(selectivity(Pairs(), pred), 0) # reference empty self.assertFloatEqual(selectivity(pred, Pairs()), 0)
def test_delete_gaps_from_pairs_weird(self): """delete_gaps_from_pairs: should ignore conflicts etc""" r = delete_gaps_from_pairs gap_list = [0, 1, 4, 5, 7, 9] p = Pairs([(2, 6), (3, 8)]) self.assertEqualItems(r(p, gap_list), [(0, 2), (1, 3)]) p = Pairs([(2, 6), (3, 8), (3, None), (6, 2), (3, 8), (None, None)]) self.assertEqualItems(r(p, gap_list),\ [(0,2),(1,3),(1,None),(2,0),(1,3),(None, None)])
def test_directed(self): """Pairs directed() should change all pairs so that a<b in (a,b)""" self.assertEqual(self.Empty.directed(), []) res = self.Undirected.directed() res.sort() self.assertEqual(res, Pairs([(1, 2), (1, 7), (3, 8), (4, 6)])) res = self.UndirectedNone.directed() self.assertEqual(res, Pairs([])) res = self.UndirectedDouble.directed() self.assertEqual(res, Pairs([(1, 2)]))
def test_get_counts_pseudo(self): """get_counts: should work when pseudo in ref -> classification off""" # pairs that would normally be compatible, are now contradicting ref = Pairs([(0, 8), (1, 7), (4, 10)]) pred = Pairs([(0, 8), (3, 6), (4, 10)]) seq = 'GACUGUGUCAU' exp = {'TP':2,'TN':13-2-1, 'FN':1,'FP':1,\ 'FP_INCONS':0, 'FP_CONTRA':1, 'FP_COMP':0} self.assertEqual(get_counts(ref, pred, split_fp=True,\ sequences=[seq], min_dist=4), exp)
def pairs_union(one, other): """Returns the intersection of one and other one: list of tuples or Pairs object other: list of tuples or Pairs object one and other should map onto a sequence of the same length. """ pairs1 = frozenset(Pairs(one).directed()) #removes duplicates pairs2 = frozenset(Pairs(other).directed()) return Pairs(pairs1 | pairs2)
def get_counts(ref, predicted, split_fp=False, sequences=None, min_dist=4): """Return TP, TN, FPcont, FPconf FPcomp, FN counts""" result = dict.fromkeys(['TP','TN','FN','FP',\ 'FP_INCONS','FP_CONTRA','FP_COMP'],0) ref_set = frozenset(Pairs(ref).directed()) pred_set = frozenset(Pairs(predicted).directed()) ref_dict = dict(ref.symmetric()) pred_dict = dict(predicted.symmetric()) tp_pairs = ref_set.intersection(pred_set) fn_pairs = ref_set.difference(pred_set) fp_pairs = pred_set.difference(ref_set) result['TP'] = len(tp_pairs) result['FN'] = len(fn_pairs) result['FP'] = len(fp_pairs) if split_fp: fp_incons = [] fp_contra = [] fp_comp = [] for x,y in fp_pairs: if x in ref_dict or y in ref_dict: #print "Conflicting: %d - %d"%(x,y) fp_incons.append((x,y)) else: five_prime = x three_prime = y contr_found = False for idx in range(x,y+1): if idx in ref_dict and\ (ref_dict[idx] < five_prime or\ ref_dict[idx] > three_prime): #print "Contradicting: %d - %d"%(x,y) contr_found = True fp_contra.append((x,y)) break if not contr_found: #print "Comatible: %d - %d"%(x,y) fp_comp.append((x,y)) result['FP_INCONS'] = len(fp_incons) result['FP_CONTRA'] = len(fp_contra) result['FP_COMP'] = len(fp_comp) assert result['FP_INCONS'] + result['FP_CONTRA'] + result['FP_COMP'] ==\ result['FP'] if sequences: num_possible_pairs = get_all_pairs(sequences, min_dist) result['TN'] = num_possible_pairs - result['TP'] -\ result['FP_INCONS'] - result['FP_CONTRA'] return result
def test_all_metrics_pseudo(self): """all_metrics: pseudoknot in ref, check against compare_ct.pm""" ref = Pairs([(0, 8), (1, 7), (4, 10)]) pred = Pairs([(0, 8), (3, 6), (4, 10)]) seq = 'GACUGUGUCAU' exp = {'SENSITIVITY':0.6666667, 'SELECTIVITY':0.6666667,\ 'AC':0.6666667, 'CC':0.57575758, 'MCC':0.57575758} obs = all_metrics(ref, pred, seqs=[seq], min_dist=4) self.assertEqualItems(obs.keys(), exp.keys()) for k in exp: self.assertFloatEqual(obs[k], exp[k])
def test_pairs_intersection_duplicates(self): """pairs_intersection: should work on flipped pairs and duplicates """ p1 = Pairs([(3,10),(4,9),(5,8),(20,24)]) p2 = Pairs([(10,3),(4,9),(5,8),(9,4),(4,9),(23,30)]) self.assertEqualItems(pairs_intersection(p1,p2),[(3,10),(4,9),(5,8)]) # Conflicts, duplicates, None, pseudoknots p1 = Pairs([(3,10),(4,9),(5,8),(20,24),(22,26),(3,2),(9,4),(6,None)]) p2 = Pairs([(1,12),(4,9),(5,8)]) self.assertEqualItems(pairs_intersection(p1,p2),\ [(4,9),(5,8)])
def test_compare_random_to_correct(self): """comapre_random_to_correct: should return correct fraction """ p1 = Pairs([(1, 8), (2, 7), (3, 6), (4, 5)]) p2 = Pairs([(1, 8)]) p3 = Pairs([(1, 8), (2, 7), (4, 5)]) p4 = Pairs([(1, 8), (2, 7), (9, 10), (11, 12)]) self.assertFloatEqual(compare_random_to_correct(p2, p1), 1) self.assertFloatEqual(compare_random_to_correct(p3, p1), 1) self.assertFloatEqual(compare_random_to_correct(p4, p1), 0.5) self.assertFloatEqual(compare_random_to_correct([], p1), 0) self.assertFloatEqual(compare_random_to_correct(p2, []), 0) self.assertFloatEqual(compare_random_to_correct([], []), 1)
def test_adjust_pairs_from_mapping_confl(self): """adjust_pairs_from_mapping: should handle conflicts, pseudo, dupl """ f = adjust_pairs_from_mapping p = Pairs([(0,6),(1,5),(2,None),(None,None),(1,4),(3,7),(6,0)]) m = {0:1,1:3,2:6,3:7,4:8,5:10,6:11,7:12} exp = Pairs([(1,11),(3,10),(6,None),(None,None),(3,8),(7,12),(11,1)]) self.assertEqual(f(p, m), exp) p = Pairs([(1,11),(3,10),(7,12),(6,None),(None,None),(5,8)]) m = {1: 0, 3: 1, 6: 2, 7: 3, 8: 4, 10: 5, 11: 6, 12: 7} exp = Pairs([(0,6),(1,5),(3,7),(2,None),(None,None)]) self.assertEqual(f(p,m), exp)
def test_insert_gaps_in_pairs(self): """insert_gaps_in_pairs: should work with normal and conflicts""" p = Pairs([(0, 3), (1, 2), (1, 4), (3, None)]) gaps = [0, 1, 4, 5, 7] self.assertEqual(insert_gaps_in_pairs(p, gaps),\ [(2,8),(3,6),(3,9),(8,None)]) p = Pairs([(0, 6), (1, 5), (2, None), (3, 7), (0, 1), (5, 1)]) gaps = [0, 2, 6, 9] self.assertEqual(insert_gaps_in_pairs(p, gaps),\ [(1,10),(3,8),(4,None),(5,11),(1,3),(8,3)]) gaps = [2, 3, 4, 9] self.assertEqual(insert_gaps_in_pairs(p, gaps),\ [(0,10),(1,8),(5,None),(6,11),(0,1),(8,1)])
def ct_parser(lines=None): """Ct format parser Takes lines from a ct file as input Returns a list containing sequence,structure and if available the energy. [[seq1,[struct1],energy1],[seq2,[struct2],energy2],...] """ count = 0 length = '' energy = None seq = '' struct = [] result = [] for line in lines: count+=1 sline = line.split(None,6) #sline = split line if count==1 or new_struct(line):#first line or new struct line. if count > 1: struct = adjust_base(struct,-1) struct = Pairs(struct).directed() struct.sort() if energy is not None: result.append([seq,struct,energy]) energy = None else: result.append([seq,pairs]) struct = [] seq = '' #checks if energy for predicted struct is given if sline.__contains__('dG') or sline.__contains__('ENERGY'): energy = atof(sline[3]) if sline.__contains__('Structure'): energy = atof(sline[2]) else: seq = ''.join([seq,sline[1]]) if not int(sline[4]) == 0:#unpaired base pair = ( int(sline[0]),int(sline[4]) ) struct.append(pair) #structs are one(1) based, adjust to zero based struct = adjust_base(struct,-1) struct = Pairs(struct).directed() struct.sort() if energy is not None: result.append([seq,struct,energy]) else: result.append([seq,struct]) return result
def test_delete_gaps_from_pairs(self): """delete_gaps_from_pairs: should work on standard input""" r = delete_gaps_from_pairs # empty list p = Pairs([]) self.assertEqual(r(p, [1, 2, 3]), []) # normal list p1 = Pairs([(2, 8), (3, 6)]) gap_list = [0, 1, 4, 5, 7, 9] self.assertEqualItems(r(p1, gap_list), [(0, 3), (1, 2)]) p2 = Pairs([(2, 8), (3, 6), (4, 9)]) self.assertEqualItems(r(p2, gap_list), [(0, 3), (1, 2)]) p3 = Pairs([(2, 8), (3, 6), (4, 10)]) self.assertEqualItems(r(p3, gap_list), [(0, 3), (1, 2)])
def test_tuples(self): """Pairs tuples() should transform the elements of list to tuples""" x = Pairs([]) x.tuples() assert x == [] x = Pairs([[1, 2], [3, 4]]) x.tuples() assert x == [(1, 2), (3, 4)] x = Pairs([(1, 2), (3, 4)]) x.tuples() assert x == [(1, 2), (3, 4)] assert x != [[1, 2], [3, 4]]
def test_mismatches(self): """Pairs mismatches() should return #pairs that can't be formed""" # with plain string self.assertEqual(Pairs([(0, 1)]).mismatches('AC', {}), 1) self.assertEqual( Pairs([(0, 1)]).mismatches('AC', {('A', 'C'): None}), 0) self.assertEqual( Pairs([(0, 1)]).mismatches('AC', {('A', 'G'): None}), 1) self.assertEqual(Pairs([(0,1),(2,3),(3,1)]).\ mismatches('ACGU',{('A','U'):None}),3) # using sequence with alphabet sequence = Rna('ACGUA') self.assertEqual( Pairs([(0, 1), (0, 4), (0, 3)]).mismatches(sequence), 2)
def compare_random_to_correct(one, other): """Returns fraction of bp in one that is in other (correct) one: list of tuples or Pairs object other: list of tuples or Pairs object Note: the second structure is the one compared against (the correct structure) """ if not one and not other: return 1.0 if not one or not other: return 0.0 pairs1 = frozenset(Pairs(one).directed()) #removes duplicates pairs2 = frozenset(Pairs(other).directed()) return len(pairs1 & pairs2) / len(pairs1)
def adjust_base(pairs, offset): """Returns new Pairs with values shifted by offset pairs: Pairs object or list of tuples offset: integer Adjusts the base of a pairs object or a list of pairs according to the given offset. There's no validation in here! It is possible negative values are returned -> user responsibility. This method treats all pairs as equal. It'll return a pairs object of exactly the same length as the input, including pairs containing None, and duplicates. Example: adjust_base(Pairs([(2,8),(4,None)]), 2) --> [(4,10),(6,None)] """ if not isinstance(offset, int): raise PairsAdjustmentError("adjust_base: offset should be integer") result = Pairs() for x, y in pairs: if x is not None: new_x = x + offset else: new_x = x if y is not None: new_y = y + offset else: new_y = y result.append((new_x, new_y)) assert len(result) == len(pairs) return result
def adjust_pairs_from_mapping(pairs, mapping): """Returns new Pairs object with numbers adjusted according to map pairs: list of tuples or Pairs object mapping: dictionary containing mapping of positions from one state to the other (e.g. ungapped to gapped) For example: {0: 0, 1: 1, 2: 3, 3: 4, 4: 6, 5: 7, 6: 9, 7: 10, 8: 12} When the Pairs object corresponds to an ungapped sequence and you want to insert gaps, use a mapping from ungapped to gapped. When the Pairs object corresponds to a gapped sequence and you want to degap it, use a mapping from gapped to ungapped. """ result = Pairs() for x, y in pairs: if x is None: new_x = None elif x not in mapping: continue else: new_x = mapping[x] if y is None: new_y = None elif y not in mapping: continue else: new_y = mapping[y] result.append((new_x, new_y)) return result
def test_pairs_intersection(self): """pairs_intersection: should work on simple case """ p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)]) p2 = Pairs([(1, 12), (4, 9), (5, 8)]) self.assertEqualItems(pairs_intersection(p1, p2), [(4, 9), (5, 8)]) #works when one is empty p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)]) p2 = Pairs([]) self.assertEqualItems(pairs_intersection(p1, p2), []) #works also on lists (not Pairs) p1 = [(3, 10), (4, 9), (5, 8), (20, 24)] p2 = [(1, 12), (4, 9), (5, 8)] self.assertEqualItems(pairs_intersection(p1, p2), [(4, 9), (5, 8)])
def test_adjust_base(self): """adjust_base: should work for pairs object or list of pairs""" p = Pairs() self.assertEqual(adjust_base(p, 10), []) pairs = [(1, 21), (2, 15), (3, 13), (4, 11), (5, 10), (6, 9)] offset = -1 expected = [(0, 20), (1, 14), (2, 12), (3, 10), (4, 9), (5, 8)] obs_pairs = adjust_base(pairs, offset) self.assertEqual(obs_pairs, expected) pairs = Pairs([(0, 10), (1, 9)]) self.assertEqual(adjust_base(pairs, -1), Pairs([(-1, 9), (0, 8)])) self.assertEqual(adjust_base(pairs, 5), Pairs([(5, 15), (6, 14)])) self.assertRaises(PairsAdjustmentError, adjust_base, pairs, 3.5)