Пример #1
0
 def setUp(self):
     """setUp: setup method for all tests"""
     self.true = Pairs([(0,40),(1,39),(2,38),(3,37),(10,20),\
         (11,19),(12,18),(13,17),(26,33),(27,32)])
     self.predicted = Pairs([(0,40),(1,39),(2,38),(3,37),(4,36),\
         (5,35),(10,22),(11,20),(14,29),(15,28)])
     self.seq = ['>seq1\n', 'agguugaaggggauccgauccacuccccggcuggucaaccu']
Пример #2
0
    def test_compare_pairs(self):
        """compare_pairs: should work on simple case"""
        #all the same
        p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)])
        p2 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)])
        self.assertEqual(compare_pairs(p1, p2), 1)

        #all different
        p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)])
        p2 = Pairs([(1, 2), (3, 4), (5, 6)])
        self.assertEqual(compare_pairs(p1, p2), 0)

        #one empty
        p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)])
        p2 = Pairs([])
        self.assertEqual(compare_pairs(p1, p2), 0)

        #partially different
        p1 = Pairs([(1, 2), (3, 4), (5, 6), (7, 8)])
        p2 = Pairs([(1, 2), (3, 4), (9, 10), (11, 12)])
        self.assertFloatEqual(compare_pairs(p1, p2), .33333333333333333)

        #partially different
        p1 = Pairs([(1, 2), (3, 4), (5, 6)])
        p2 = Pairs([(1, 2), (3, 4), (9, 10)])
        self.assertFloatEqual(compare_pairs(p1, p2), .5)
Пример #3
0
 def test_get_counts(self):
     """get_counts: should work with all parameters"""
     seq = RnaSequence('UCAG-NAUGU')
     seq2 = RnaSequence('UAAG-CACGC')
     p = Pairs([(1, 8), (2, 7)])
     p2 = Pairs([
         (1, 8),
         (2, 6),
         (3, 6),
         (4, 9),
     ])
     exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\
         'FP_INCONS':0, 'FP_CONTRA':0, 'FP_COMP':0}
     self.assertEqual(get_counts(p, p2), exp)
     exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\
         'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1}
     self.assertEqual(get_counts(p, p2, split_fp=True), exp)
     seq = RnaSequence('UCAG-NACGU')
     exp = {'TP':1,'TN':7, 'FN':1,'FP':3,\
         'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1}
     self.assertEqual(get_counts(p, p2, split_fp=True,\
         sequences=[seq], min_dist=2), exp)
     # check against compare_ct.pm
     exp = {'TP':4,'TN':266, 'FN':6,'FP':6,\
         'FP_INCONS':2, 'FP_CONTRA':2, 'FP_COMP':2}
     seq = 'agguugaaggggauccgauccacuccccggcuggucaaccu'.upper()
     self.assertEqual(get_counts(self.true, self.predicted, split_fp=True,\
         sequences=[seq], min_dist=4), exp)
Пример #4
0
 def test_selectivity_dupl(self):
     """selectivity: duplicates and Nones shouldn't influence the calc.
     """
     ref = Pairs([(1, 6), (2, 5), (10, 13), (6, 1), (7, None),
                  (None, None)])
     pred = Pairs([(6, 1), (3, 4), (10, 12)])
     self.assertFloatEqual(selectivity(ref, pred), 0.5)
Пример #5
0
    def test_sensitivity_dupl(self):
        """sensitivity: should handle duplicates, pseudo, None"""
        ref = Pairs([(1,6),(2,5),(3,10),(7,None),(None,None),(5,2),(4,9)])
        pred = Pairs([(6,1),(10,11),(3,12)])
        self.assertFloatEqual(sensitivity(ref, pred), 0.25)

        pred = Pairs([(6,1),(10,11),(3,12),(20,None),(None,None),(1,6)])
        self.assertFloatEqual(sensitivity(ref, pred), 0.25)
Пример #6
0
    def test_adjust_base_None(self):
        """adjust_base: should keep Nones or duplicates, ignore conflicts"""
        pairs = Pairs([(2,8),(3,7),(6,None),(None,None),(2,10)])
        expected = Pairs([(1,7),(2,6),(5,None),(None, None),(1,9)])
        self.assertEqual(adjust_base(pairs,-1), expected)

        p = Pairs([(1,2),(2,1),(1,2),(2,None)])
        self.assertEqual(adjust_base(p, 1), [(2,3),(3,2),(2,3),(3,None)])
Пример #7
0
 def test_ungapped_to_gapped(self):
     """ungapped_to_gapped: Sequence, ModelSequence, old_cogent, string
     """
     p = Pairs([(0, 6), (1, 5), (3, 9)])
     exp = Pairs([(0, 5), (1, 4), (3, 7)])
     f = ungapped_to_gapped
     self.assertEqual(f(self.rna1, exp)[1], p)
     self.assertEqual(f(self.m1, exp)[1], p)
     self.assertEqual(f(self.s1, exp)[1], p)
Пример #8
0
 def test_sensitivity_empty(self):
     """sensitivity: should work on emtpy Pairs"""
     # both empty
     self.assertFloatEqual(sensitivity(Pairs(), Pairs()), 1)
     pred = Pairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)])
     # prediction emtpy
     self.assertFloatEqual(sensitivity(Pairs(), pred), 0)
     # reference empty
     self.assertFloatEqual(sensitivity(pred, Pairs()), 0)
Пример #9
0
 def test_selectivity_general(self):
     """selectivity: should work in general"""
     ref = Pairs([(1, 6), (2, 5), (10, 13)])
     pred = Pairs([(6, 1), (3, 4), (10, 12)])
     # one good prediction
     self.assertFloatEqual(selectivity(ref, pred), 0.5)
     # over-prediction not penalized
     pred = Pairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)])
     self.assertFloatEqual(selectivity(ref, pred), 0.25)
Пример #10
0
 def test_symmetric(self):
     """Pairs symmetric() should add (down,up) for each (up,down)"""
     self.assertEqual(self.Empty.symmetric(), [])
     self.assertEqualItems(self.OneTuple.symmetric(), [(2, 1), (1, 2)])
     self.assertEqualItems(
         Pairs([(1, 2), (1, 2)]).symmetric(), [(1, 2), (2, 1)])
     self.assertEqualItems(Pairs([(1,2),(3,4)]).symmetric(),\
     [(1,2),(2,1),(3,4),(4,3)])
     self.assertEqualItems(Pairs([(1, None)]).symmetric(), [])
Пример #11
0
 def test_selectivity_empty(self):
     """selectivity: should handle empty reference/predicted structure"""
     # both empty
     self.assertFloatEqual(selectivity(Pairs(), Pairs()), 1)
     pred = Pairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)])
     # prediction emtpy
     self.assertFloatEqual(selectivity(Pairs(), pred), 0)
     # reference empty
     self.assertFloatEqual(selectivity(pred, Pairs()), 0)
Пример #12
0
 def test_delete_gaps_from_pairs_weird(self):
     """delete_gaps_from_pairs: should ignore conflicts etc"""
     r = delete_gaps_from_pairs
     gap_list = [0, 1, 4, 5, 7, 9]
     p = Pairs([(2, 6), (3, 8)])
     self.assertEqualItems(r(p, gap_list), [(0, 2), (1, 3)])
     p = Pairs([(2, 6), (3, 8), (3, None), (6, 2), (3, 8), (None, None)])
     self.assertEqualItems(r(p, gap_list),\
         [(0,2),(1,3),(1,None),(2,0),(1,3),(None, None)])
Пример #13
0
 def test_directed(self):
     """Pairs directed() should change all pairs so that a<b in (a,b)"""
     self.assertEqual(self.Empty.directed(), [])
     res = self.Undirected.directed()
     res.sort()
     self.assertEqual(res, Pairs([(1, 2), (1, 7), (3, 8), (4, 6)]))
     res = self.UndirectedNone.directed()
     self.assertEqual(res, Pairs([]))
     res = self.UndirectedDouble.directed()
     self.assertEqual(res, Pairs([(1, 2)]))
Пример #14
0
 def test_get_counts_pseudo(self):
     """get_counts: should work when pseudo in ref -> classification off"""
     # pairs that would normally be compatible, are now contradicting
     ref = Pairs([(0, 8), (1, 7), (4, 10)])
     pred = Pairs([(0, 8), (3, 6), (4, 10)])
     seq = 'GACUGUGUCAU'
     exp = {'TP':2,'TN':13-2-1, 'FN':1,'FP':1,\
         'FP_INCONS':0, 'FP_CONTRA':1, 'FP_COMP':0}
     self.assertEqual(get_counts(ref, pred, split_fp=True,\
         sequences=[seq], min_dist=4), exp)
Пример #15
0
def pairs_union(one, other):
    """Returns the intersection of one and other

    one: list of tuples or Pairs object
    other: list of tuples or Pairs object

    one and other should map onto a sequence of the same length.
    """
    pairs1 = frozenset(Pairs(one).directed())  #removes duplicates
    pairs2 = frozenset(Pairs(other).directed())
    return Pairs(pairs1 | pairs2)
Пример #16
0
def get_counts(ref, predicted, split_fp=False, sequences=None, min_dist=4):
    """Return TP, TN, FPcont, FPconf FPcomp, FN counts"""

    result = dict.fromkeys(['TP','TN','FN','FP',\
        'FP_INCONS','FP_CONTRA','FP_COMP'],0)

    ref_set = frozenset(Pairs(ref).directed())
    pred_set = frozenset(Pairs(predicted).directed())

    ref_dict = dict(ref.symmetric())
    pred_dict = dict(predicted.symmetric())

    tp_pairs = ref_set.intersection(pred_set)
    fn_pairs = ref_set.difference(pred_set)
    fp_pairs = pred_set.difference(ref_set)
    result['TP'] = len(tp_pairs)
    result['FN'] = len(fn_pairs)
    result['FP'] = len(fp_pairs)
    if split_fp:
        fp_incons = []
        fp_contra = []
        fp_comp = []
        for x,y in fp_pairs:
            if x in ref_dict or y in ref_dict:
                #print "Conflicting: %d - %d"%(x,y)
                fp_incons.append((x,y))
            else:
                five_prime = x
                three_prime = y
                contr_found = False
                for idx in range(x,y+1):
                    if idx in ref_dict and\
                        (ref_dict[idx] < five_prime or\
                            ref_dict[idx] > three_prime):
                        #print "Contradicting: %d - %d"%(x,y)
                        contr_found = True
                        fp_contra.append((x,y))
                        break
                if not contr_found:
                    #print "Comatible: %d - %d"%(x,y)
                    fp_comp.append((x,y))


        result['FP_INCONS'] = len(fp_incons)
        result['FP_CONTRA'] = len(fp_contra)
        result['FP_COMP'] = len(fp_comp)
        assert result['FP_INCONS'] + result['FP_CONTRA'] + result['FP_COMP'] ==\
            result['FP']
    if sequences:
        num_possible_pairs = get_all_pairs(sequences, min_dist)
        result['TN'] = num_possible_pairs - result['TP'] -\
            result['FP_INCONS'] - result['FP_CONTRA']

    return result
Пример #17
0
 def test_all_metrics_pseudo(self):
     """all_metrics: pseudoknot in ref, check against compare_ct.pm"""
     ref = Pairs([(0, 8), (1, 7), (4, 10)])
     pred = Pairs([(0, 8), (3, 6), (4, 10)])
     seq = 'GACUGUGUCAU'
     exp = {'SENSITIVITY':0.6666667, 'SELECTIVITY':0.6666667,\
         'AC':0.6666667, 'CC':0.57575758, 'MCC':0.57575758}
     obs = all_metrics(ref, pred, seqs=[seq], min_dist=4)
     self.assertEqualItems(obs.keys(), exp.keys())
     for k in exp:
         self.assertFloatEqual(obs[k], exp[k])
Пример #18
0
    def test_pairs_intersection_duplicates(self):
        """pairs_intersection: should work on flipped pairs and duplicates
        """
        p1 = Pairs([(3,10),(4,9),(5,8),(20,24)])
        p2 = Pairs([(10,3),(4,9),(5,8),(9,4),(4,9),(23,30)])
        self.assertEqualItems(pairs_intersection(p1,p2),[(3,10),(4,9),(5,8)])

        # Conflicts, duplicates, None, pseudoknots 
        p1 = Pairs([(3,10),(4,9),(5,8),(20,24),(22,26),(3,2),(9,4),(6,None)])
        p2 = Pairs([(1,12),(4,9),(5,8)])
        self.assertEqualItems(pairs_intersection(p1,p2),\
            [(4,9),(5,8)])
Пример #19
0
 def test_compare_random_to_correct(self):
     """comapre_random_to_correct: should return correct fraction
     """
     p1 = Pairs([(1, 8), (2, 7), (3, 6), (4, 5)])
     p2 = Pairs([(1, 8)])
     p3 = Pairs([(1, 8), (2, 7), (4, 5)])
     p4 = Pairs([(1, 8), (2, 7), (9, 10), (11, 12)])
     self.assertFloatEqual(compare_random_to_correct(p2, p1), 1)
     self.assertFloatEqual(compare_random_to_correct(p3, p1), 1)
     self.assertFloatEqual(compare_random_to_correct(p4, p1), 0.5)
     self.assertFloatEqual(compare_random_to_correct([], p1), 0)
     self.assertFloatEqual(compare_random_to_correct(p2, []), 0)
     self.assertFloatEqual(compare_random_to_correct([], []), 1)
Пример #20
0
    def test_adjust_pairs_from_mapping_confl(self):
        """adjust_pairs_from_mapping: should handle conflicts, pseudo, dupl
        """
        f = adjust_pairs_from_mapping
        p = Pairs([(0,6),(1,5),(2,None),(None,None),(1,4),(3,7),(6,0)])
        m = {0:1,1:3,2:6,3:7,4:8,5:10,6:11,7:12}
        exp = Pairs([(1,11),(3,10),(6,None),(None,None),(3,8),(7,12),(11,1)])
        self.assertEqual(f(p, m), exp)

        p = Pairs([(1,11),(3,10),(7,12),(6,None),(None,None),(5,8)])
        m = {1: 0, 3: 1, 6: 2, 7: 3, 8: 4, 10: 5, 11: 6, 12: 7}
        exp = Pairs([(0,6),(1,5),(3,7),(2,None),(None,None)])
        self.assertEqual(f(p,m), exp)
Пример #21
0
 def test_insert_gaps_in_pairs(self):
     """insert_gaps_in_pairs: should work with normal and conflicts"""
     p = Pairs([(0, 3), (1, 2), (1, 4), (3, None)])
     gaps = [0, 1, 4, 5, 7]
     self.assertEqual(insert_gaps_in_pairs(p, gaps),\
         [(2,8),(3,6),(3,9),(8,None)])
     p = Pairs([(0, 6), (1, 5), (2, None), (3, 7), (0, 1), (5, 1)])
     gaps = [0, 2, 6, 9]
     self.assertEqual(insert_gaps_in_pairs(p, gaps),\
         [(1,10),(3,8),(4,None),(5,11),(1,3),(8,3)])
     gaps = [2, 3, 4, 9]
     self.assertEqual(insert_gaps_in_pairs(p, gaps),\
         [(0,10),(1,8),(5,None),(6,11),(0,1),(8,1)])
Пример #22
0
def ct_parser(lines=None):
    """Ct format parser

    Takes lines from a ct file as input
    
    Returns a list containing sequence,structure and if available the energy.
    [[seq1,[struct1],energy1],[seq2,[struct2],energy2],...]
    """

    count = 0
    length = ''
    energy = None
    seq = ''
    struct = []
    result = []

    for line in lines:
        count+=1
        sline = line.split(None,6) #sline = split line
        if count==1 or new_struct(line):#first line or new struct line.
            if count > 1:
                struct = adjust_base(struct,-1)
                struct = Pairs(struct).directed()
                struct.sort()
                if energy is not None:
                    result.append([seq,struct,energy])
                    energy = None
                else:
                    result.append([seq,pairs])
                struct = []
                seq = ''
            #checks if energy for predicted struct is given
            if sline.__contains__('dG') or sline.__contains__('ENERGY'):
                energy = atof(sline[3])
            if sline.__contains__('Structure'):
                energy = atof(sline[2])
        else:
            seq = ''.join([seq,sline[1]])
            if not int(sline[4]) == 0:#unpaired base
                pair = ( int(sline[0]),int(sline[4]) )
                struct.append(pair) 
    #structs are one(1) based, adjust to zero based
    struct = adjust_base(struct,-1)
    struct = Pairs(struct).directed()
    struct.sort()

    if energy is not None:
        result.append([seq,struct,energy])
    else:
        result.append([seq,struct])
    return result 
Пример #23
0
 def test_delete_gaps_from_pairs(self):
     """delete_gaps_from_pairs: should work on standard input"""
     r = delete_gaps_from_pairs
     # empty list
     p = Pairs([])
     self.assertEqual(r(p, [1, 2, 3]), [])
     # normal list
     p1 = Pairs([(2, 8), (3, 6)])
     gap_list = [0, 1, 4, 5, 7, 9]
     self.assertEqualItems(r(p1, gap_list), [(0, 3), (1, 2)])
     p2 = Pairs([(2, 8), (3, 6), (4, 9)])
     self.assertEqualItems(r(p2, gap_list), [(0, 3), (1, 2)])
     p3 = Pairs([(2, 8), (3, 6), (4, 10)])
     self.assertEqualItems(r(p3, gap_list), [(0, 3), (1, 2)])
Пример #24
0
    def test_tuples(self):
        """Pairs tuples() should transform the elements of list to tuples"""
        x = Pairs([])
        x.tuples()
        assert x == []

        x = Pairs([[1, 2], [3, 4]])
        x.tuples()
        assert x == [(1, 2), (3, 4)]

        x = Pairs([(1, 2), (3, 4)])
        x.tuples()
        assert x == [(1, 2), (3, 4)]
        assert x != [[1, 2], [3, 4]]
Пример #25
0
    def test_mismatches(self):
        """Pairs mismatches() should return #pairs that can't be formed"""
        # with plain string
        self.assertEqual(Pairs([(0, 1)]).mismatches('AC', {}), 1)
        self.assertEqual(
            Pairs([(0, 1)]).mismatches('AC', {('A', 'C'): None}), 0)
        self.assertEqual(
            Pairs([(0, 1)]).mismatches('AC', {('A', 'G'): None}), 1)
        self.assertEqual(Pairs([(0,1),(2,3),(3,1)]).\
        mismatches('ACGU',{('A','U'):None}),3)

        # using sequence with alphabet
        sequence = Rna('ACGUA')
        self.assertEqual(
            Pairs([(0, 1), (0, 4), (0, 3)]).mismatches(sequence), 2)
Пример #26
0
def compare_random_to_correct(one, other):
    """Returns fraction of bp in one that is in other (correct)
    
    one: list of tuples or Pairs object
    other: list of tuples or Pairs object

    Note: the second structure is the one compared against (the correct
        structure)
    """
    if not one and not other:
        return 1.0
    if not one or not other:
        return 0.0
    pairs1 = frozenset(Pairs(one).directed())  #removes duplicates
    pairs2 = frozenset(Pairs(other).directed())
    return len(pairs1 & pairs2) / len(pairs1)
Пример #27
0
def adjust_base(pairs, offset):
    """Returns new Pairs with values shifted by offset

    pairs: Pairs object or list of tuples
    offset: integer

    Adjusts the base of a pairs object or a list of pairs according to
        the given offset.
    There's no validation in here! It is possible negative values are
        returned -> user responsibility.
    This method treats all pairs as equal. It'll return a pairs object
        of exactly the same length as the input, including pairs containing
        None, and duplicates.

    Example: adjust_base(Pairs([(2,8),(4,None)]), 2) --> [(4,10),(6,None)]
    """
    if not isinstance(offset, int):
        raise PairsAdjustmentError("adjust_base: offset should be integer")
    result = Pairs()
    for x, y in pairs:
        if x is not None:
            new_x = x + offset
        else:
            new_x = x
        if y is not None:
            new_y = y + offset
        else:
            new_y = y
        result.append((new_x, new_y))
    assert len(result) == len(pairs)
    return result
Пример #28
0
def adjust_pairs_from_mapping(pairs, mapping):
    """Returns new Pairs object with numbers adjusted according to map

    pairs: list of tuples or Pairs object
    mapping: dictionary containing mapping of positions from
        one state to the other (e.g. ungapped to gapped)
         For example:
        {0: 0, 1: 1, 2: 3, 3: 4, 4: 6, 5: 7, 6: 9, 7: 10, 8: 12}

    When the Pairs object corresponds to an ungapped sequence and
        you want to insert gaps, use a mapping from ungapped to gapped.
    When the Pairs object corresponds to a gapped sequence and you
        want to degap it, use a mapping from gapped to ungapped.
    """
    result = Pairs()
    for x, y in pairs:
        if x is None:
            new_x = None
        elif x not in mapping:
            continue
        else:
            new_x = mapping[x]
        if y is None:
            new_y = None
        elif y not in mapping:
            continue
        else:
            new_y = mapping[y]
        result.append((new_x, new_y))

    return result
Пример #29
0
    def test_pairs_intersection(self):
        """pairs_intersection: should work on simple case
        """
        p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)])
        p2 = Pairs([(1, 12), (4, 9), (5, 8)])
        self.assertEqualItems(pairs_intersection(p1, p2), [(4, 9), (5, 8)])

        #works when one is empty
        p1 = Pairs([(3, 10), (4, 9), (5, 8), (20, 24)])
        p2 = Pairs([])
        self.assertEqualItems(pairs_intersection(p1, p2), [])

        #works also on lists (not Pairs)
        p1 = [(3, 10), (4, 9), (5, 8), (20, 24)]
        p2 = [(1, 12), (4, 9), (5, 8)]
        self.assertEqualItems(pairs_intersection(p1, p2), [(4, 9), (5, 8)])
Пример #30
0
    def test_adjust_base(self):
        """adjust_base: should work for pairs object or list of pairs"""
        p = Pairs()
        self.assertEqual(adjust_base(p, 10), [])

        pairs = [(1, 21), (2, 15), (3, 13), (4, 11), (5, 10), (6, 9)]
        offset = -1
        expected = [(0, 20), (1, 14), (2, 12), (3, 10), (4, 9), (5, 8)]
        obs_pairs = adjust_base(pairs, offset)
        self.assertEqual(obs_pairs, expected)

        pairs = Pairs([(0, 10), (1, 9)])
        self.assertEqual(adjust_base(pairs, -1), Pairs([(-1, 9), (0, 8)]))
        self.assertEqual(adjust_base(pairs, 5), Pairs([(5, 15), (6, 14)]))

        self.assertRaises(PairsAdjustmentError, adjust_base, pairs, 3.5)