Пример #1
0
 def setUp(self):
     """setUp: setup method for all tests"""
     self.true = BasePairs([(0,40),(1,39),(2,38),(3,37),(10,20),\
         (11,19),(12,18),(13,17),(26,33),(27,32)])
     self.predicted = BasePairs([(0,40),(1,39),(2,38),(3,37),(4,36),\
         (5,35),(10,22),(11,20),(14,29),(15,28)])
     self.seq = ['>seq1\n', 'agguugaaggggauccgauccacuccccggcuggucaaccu']
Пример #2
0
 def test_hasConflicts(self):
     """BasePairs.hasConflicts should return True if conflicts exist"""
     self.assertFalse(BasePairs([]).hasConflicts())
     self.assertFalse(BasePairs([(1,2),(3,4)]).hasConflicts())
     self.assertTrue(BasePairs([(1,2),(2,3)]).hasConflicts())
     self.assertTrue(BasePairs([(1,2),(2,None)]).hasConflicts())
     self.assertTrue(self.bplist_with_conflicts.hasConflicts())
Пример #3
0
 def test_selectivity_dupl(self):
     """selectivity: duplicates and Nones shouldn't influence the calc.
     """
     ref = BasePairs([(1, 6), (2, 5), (10, 13), (6, 1), (7, None),
                      (None, None)])
     pred = BasePairs([(6, 1), (3, 4), (10, 12)])
     self.assertFloatEqual(selectivity(ref, pred), 0.5)
Пример #4
0
 def test_get_counts(self):
     """get_counts: should work with all parameters"""
     seq = RnaSequence('UCAG-NAUGU')
     p = BasePairs([(1, 8), (2, 7)])
     p2 = BasePairs([
         (1, 8),
         (2, 6),
         (3, 6),
         (4, 9),
     ])
     exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\
         'FP_INCONS':0, 'FP_CONTRA':0, 'FP_COMP':0}
     self.assertEqual(get_counts(p, p2, False), exp)
     exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\
         'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1}
     self.assertEqual(get_counts(p, p2, split_fp=True), exp)
     seq = RnaSequence('UCAG-NACGU')
     exp = {'TP':1,'TN':7, 'FN':1,'FP':3,\
         'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1}
     self.assertEqual(get_counts(p, p2, split_fp=True,\
         sequences=[seq], min_dist=2), exp)
     # check against compare_ct.pm
     exp = {'TP':4,'TN':266, 'FN':6,'FP':6,\
         'FP_INCONS':2, 'FP_CONTRA':2, 'FP_COMP':2}
     seq = 'agguugaaggggauccgauccacuccccggcuggucaaccu'.upper()
     self.assertEqual(get_counts(self.true, self.predicted, split_fp=True,\
         sequences=[seq], min_dist=4), exp)
Пример #5
0
    def test_seq_simple(self):
        """get_bps_for_aligned_seq should work for simple case
        """
        aln_seq = "--U--------------A"
        pred = BasePairs([(2, 17)])

        result = get_bps_for_aligned_seq(aln_seq, pred)
        self.assertEqual(result, BasePairs([(0, 1)]))
Пример #6
0
    def test_simple_offset_ok(self):
        """get_bps_for_aligned_seq should work when first_index=1, part 2
        """
        aln_seq = "-U--------------A-"
        pred = BasePairs([(2, 17)])

        result = get_bps_for_aligned_seq(aln_seq, pred, 1)
        self.assertEqual(result, BasePairs([(1, 2)]))
Пример #7
0
    def test_seq_conflict(self):
        """get_bps_for_aligned_seq should work for conflicted case
        """
        aln_seq = "--U--A-----------A"
        pred = BasePairs([(2, 5), (2, 17)])

        result = get_bps_for_aligned_seq(aln_seq, pred)
        self.assertEqual(result, BasePairs([(0, 1), (0, 2)]))
Пример #8
0
    def test_seq_conflict_offset(self):
        """get_bps_for_aligned_seq should conflict first_index=1, part 1
        """
        aln_seq = "--U--A-----------A"
        pred = BasePairs([(2, 5), (2, 17)])

        result = get_bps_for_aligned_seq(aln_seq, pred, 1)
        self.assertEqual(result, BasePairs([]))
Пример #9
0
    def test_aligned_seq_skip(self):
        """get_bps_for_aligned_seq should work when some base pairs are skipped
        """
        aln_seq = "ACUAGCUG-----ACUGA"
        pred = BasePairs([(2, 10), (2, 17)])

        result = get_bps_for_aligned_seq(aln_seq, pred)
        self.assertEqual(result, BasePairs([(2, 12)]))
Пример #10
0
    def test_toVienna_toPairs(self):
        """BasePairs.toVienna.toPairs() should generate the same BasePairs
        """
        bps = BasePairs(((1,3), (4,5), (7,12)))
        self.assertEqual(bps.toVienna(15).toPairs(), bps)

        bps = BasePairs(((1,3), (4,5), (7,121)))
        self.assertEqual(bps.toVienna(150).toPairs(), bps)
Пример #11
0
    def test_sensitivity_dupl(self):
        """sensitivity: should handle duplicates, pseudo, None"""
        ref = BasePairs([(1, 6), (2, 5), (3, 10), (7, None), (None, None),
                         (5, 2), (4, 9)])
        pred = BasePairs([(6, 1), (10, 11), (3, 12)])
        self.assertFloatEqual(sensitivity(ref, pred), 0.25)

        pred = BasePairs([(6, 1), (10, 11), (3, 12), (20, None), (None, None),
                          (1, 6)])
        self.assertFloatEqual(sensitivity(ref, pred), 0.25)
Пример #12
0
 def test_sensitivity_empty(self):
     """sensitivity: should work on emtpy BasePairs"""
     # both empty
     self.assertFloatEqual(sensitivity(BasePairs([]), BasePairs([])), 1)
     pred = BasePairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19),
                       (15, 18)])
     # prediction emtpy
     self.assertFloatEqual(sensitivity(BasePairs([]), pred), 0)
     # reference empty
     self.assertFloatEqual(sensitivity(pred, BasePairs([])), 0)
Пример #13
0
 def test_selectivity_empty(self):
     """selectivity: should handle empty reference/predicted structure"""
     # both empty
     self.assertFloatEqual(selectivity(BasePairs([]), BasePairs([])), 1)
     pred = BasePairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19),
                       (15, 18)])
     # prediction emtpy
     self.assertFloatEqual(selectivity(BasePairs([]), pred), 0)
     # reference empty
     self.assertFloatEqual(selectivity(pred, BasePairs([])), 0)
Пример #14
0
 def test_selectivity_general(self):
     """selectivity: should work in general"""
     ref = BasePairs([(1, 6), (2, 5), (10, 13)])
     pred = BasePairs([(6, 1), (3, 4), (10, 12)])
     # one good prediction
     self.assertFloatEqual(selectivity(ref, pred), 0.5)
     # over-prediction not penalized
     pred = BasePairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19),
                       (15, 18)])
     self.assertFloatEqual(selectivity(ref, pred), 0.25)
Пример #15
0
 def test_get_counts_pseudo(self):
     """get_counts: should work when pseudo in ref -> classification off"""
     # pairs that would normally be compatible, are now contradicting
     ref = BasePairs([(0, 8), (1, 7), (4, 10)])
     pred = BasePairs([(0, 8), (3, 6), (4, 10)])
     seq = 'GACUGUGUCAU'
     exp = {'TP':2,'TN':13-2-1, 'FN':1,'FP':1,\
         'FP_INCONS':0, 'FP_CONTRA':1, 'FP_COMP':0}
     self.assertEqual(get_counts(ref, pred, split_fp=True,\
         sequences=[seq], min_dist=4), exp)
Пример #16
0
 def test_symmetric(self):
     """BasePairs.symmetric should add (down,up) for each (up,down)"""
     self.assertEqual(BasePairs([]).symmetric(),[])
     for item in BasePairs([(1,2)]).symmetric():
         self.assert_(item in [(2,1),(1,2)])
     for item in BasePairs([(1,2),(1,2)]).symmetric():
         self.assert_(item in [(1,2),(2,1)])
     for item in BasePairs([(1,2),(3,4)]).symmetric():
         self.assert_(item in [(1,2),(2,1),(3,4),(4,3)])
     for item in BasePairs([(1,None)]).symmetric():
         self.assert_(item in [])
Пример #17
0
    def test_many_gaps_seq(self):
        """get_bps_for_aligned_seq test on seq with many gaps
        """
        aln_seq = "ACUAGCUG-----ACU-A---------CGCGC---A"
        pred = BasePairs([(2, 10), (2, 17), (4, 36)])

        result_offset = get_bps_for_aligned_seq(aln_seq, pred, 1)
        self.assertEqual(result_offset, BasePairs([(4, 18)]))

        result_offset = get_bps_for_aligned_seq(aln_seq, pred, 2)
        self.assertEqual(result_offset, BasePairs([(2, 12)]))
Пример #18
0
    def test_conflicts(self):
        """all metrics should raise error when conflicts in one of the structs
        """
        ref = BasePairs([(1, 6), (2, 5), (3, 10), (7, None), (None, None),
                         (5, 2), (1, 12)])
        pred = BasePairs([(6, 1), (10, 11), (3, 12)])

        self.assertRaises(ValueError, sensitivity, ref, pred)
        self.assertRaises(ValueError, sensitivity, pred, ref)
        self.assertRaises(ValueError, selectivity, ref, pred)
        self.assertRaises(ValueError, selectivity, pred, ref)
        self.assertRaises(ValueError, mcc, ref, pred, self.seq)
        self.assertRaises(ValueError, mcc, pred, ref, self.seq)
Пример #19
0
    def test_mismatches(self):
        """BasePairs.mismatches should return base pairs that can't be made"""
        # with plain string
        self.assertEqual(BasePairs([(0,1)]).mismatches('AC',{}),1)
        self.assertEqual(\
            BasePairs([(0,1)]).mismatches('AC',{('A','C'):None}),0)
        self.assertEqual(\
            BasePairs([(0,1)]).mismatches('AC',{('A','G'):None}),1)
        self.assertEqual(BasePairs([(0,1),(2,3),(3,1)]).\
        mismatches('ACGU',{('A','U'):None}),3)

        # using sequence with alphabet
        self.assertEqual(\
            BasePairs([(0,1),(0,4),(0,3)]).mismatches(Rna('ACGUA')),2)
Пример #20
0
def parse_pknots(fh):
    """Returns a list of base pairs and their corresponding energy parsed
        from a pknots format line iterator.
        
        fh: a file open for reading
    """
    energy = None
    last_number = '-1'
    pairs_str = []
    for line in fh:
        stripped = line.strip()
        if stripped.startswith('0') or \
        stripped.startswith(str(int(last_number) + 1)):

            first_line = stripped.split()
            second_line = fh.next().strip().split()
            for op, cl in zip(first_line, second_line):
                if cl == '.':
                    continue
                if (cl, op) not in pairs_str:
                    pairs_str.append((op, cl))
            last_number = first_line[-1]

        if stripped.startswith('energy'):
            energy = float(line.split()[-1].strip())
            break

    return (BasePairs([(int(x) + 1, int(y) + 1)
                       for x, y in pairs_str]), energy)
Пример #21
0
 def test_make_non_conflicting_viennas_no_conflicts(self):
     """BasePairs.make_non_conflicting_viennas - no conflicts in input
     """
     viennas = \
         BasePairs([(1,2),(3,4),(5,6)]).make_non_conflicting_viennas(10)
     self.assertEqual(len(viennas), 1)
     self.assertEqual(viennas[0], '()()()....')
Пример #22
0
    def _parse(self):

        tmp_ct = []
        defline = ">"
        for line in open(self.ct_path):
            if line.startswith('#'):
                defline += line.strip() + '|'
            else:
                tmp_ct.append(line)
        defline = defline.replace('\n', '').replace('#', '')

        # just one entry per file
        result = ct_parser(tmp_ct)[0]
        sequence = result[0]

        # IMPORTANT!
        # ct_parser starts numbering of base pairs from 0, not from 1, as
        # elsewhere in nucleic.secstruc. To make things consistent, here
        # we'll add 1 to each index in base pairs list!
        result_1 = [(pair[0] + 1, pair[1] + 1) for pair in result[1]]
        pairs = BasePairs(result_1)

        if self._is_valid_sequence(sequence):
            sequence = Sequence(sequence.upper()).seq_without_modifications
            # Second check, because ModeRNA might also mix a bit at this stage
            # including for exampel X in sequences without modifications!
            if self._is_valid_sequence(sequence):
                try:
                    vienna = pairs.toVienna(len(sequence))
                except PseudoknotTokensError:
                    vienna = None
                finally:
                    return defline, sequence, vienna, pairs

        return None, None, None, None
Пример #23
0
 def test_toVienna_conflict(self):
     """BasePairs.toVienna should raise ConflictInBasePairsError"""
     self.assertRaises(ConflictInBasePairsError, \
         self.bplist_with_conflicts.toVienna, 100)
     self.assertRaises(ConflictInBasePairsError, \
         self.bplist_with_conflicts.toVienna, 100, -2)
     self.assertRaises(ConflictInBasePairsError, \
         BasePairs([(1,2),(2,3)]).toVienna, 4)
Пример #24
0
def parse_sfold(data):
    """Returns a list of base pairs parsed from an Sfold format line iterator."""
    bpairs = []
    for line in data:
        if re.search("\d+\s+\d+\n*\Z", line):
            a, b = line.strip().split()
            bpairs.append((int(a), int(b)))
    return BasePairs(bpairs)
Пример #25
0
 def test_directed(self):
     """BasePairs.directed should change all pairs so that a<b in (a,b)"""
     self.assertEqual(BasePairs([]).directed(),[])
     self.assertEqual(BasePairs([(2,1),(6,4),(1,7),(8,3)]).directed(),\
         BasePairs([(1,2),(1,7),(3,8),(4,6)]))
     self.assertEqual\
         (BasePairs([(5,None),(None,3)]).directed(), BasePairs([]))
     self.assertEqual(\
         BasePairs([(2,1),(1,2)]).directed(), BasePairs([(1,2)]))
Пример #26
0
def is_bp_a_pseudoknot(bp0, bp1, bplist):
    """Returns True if a given base pair is a pseudoknot, False otherwise.

    bp0: int, number of the first base
    bp1: int, number of the second base
    bplist: a list of base pair indices
    """
    for pair in BasePairs(bplist).directed():
        if bp0 < pair[0] < bp1 < pair[1] or pair[0] < bp0 < pair[1] < bp1:
            return True
    return False
Пример #27
0
 def setUp(self):
     self.conflicting_bps = BasePairs([(1, 41), (2, 40), (3, 39), (4, 38),
         (5, 37), (7, 35), (8, 34), (9, 33), (11, 31), (12, 30), (14, 28),
         (15, 27), (16, 24), (17, 23), (20, 35), (21, 34), (22, 33),
         (23, 32)])
     
     # two lists should be created
     self.ref_combinations = [[(1, 41), (2, 40), (3, 39), (4, 38), (5, 37),
         (7, 35), (8, 34), (9, 33), (11, 31), (12, 30), (14, 28), (15, 27),
         (16, 24), (17, 23)], [(21, 34), (22, 33), (23, 32), (20, 35)]]
     
     self.even_more_conflicting_bps = \
         BasePairs(self.conflicting_bps + [(22, 35)])
     self.even_more_ref_combinations = [
         [(1, 41), (2, 40), (3, 39), (4, 38), (5, 37), (7, 35), (8, 34),
             (9, 33), (11, 31), (12, 30), (14, 28), (15, 27), (16, 24),
             (17, 23)],
         [(22, 35), (21, 34), (23, 32)],
         [(22, 33), (20, 35)]
         ]
Пример #28
0
    def test_toPartners(self):
        """BasePairs.toPartners should return a Partners object"""
        a = BasePairs([(1,5),(3,4),(6,9),(7,8)]) #normal
        b = BasePairs([(0,4),(2,6)]) #pseudoknot
        c = BasePairs([(1,6),(3,6),(4,5)]) #conflict

        self.assertEqual(a.toPartners(10),
                         [None, (5,), None, (4,), (3,), (1,), (9,), (8,), (7,),
                          (6,)])
        self.assertEqual(a.toPartners(13,3),\
        [None, None, None, None, (8,), None, (7,), (6,), (4,), (12,), (11,),
         (10,), (9,)])
        assert isinstance(a.toPartners(10),Partners)
        self.assertEqual(b.toPartners(7),
                         [(4,), None, (6,), None, (0,), None, (2,)])
        #self.assertRaises(ValueError,c.toPartners,7, strict=True)
        self.assertEqual(c.toPartners(7),
                         [None, (6,), None, (6,), (5,), (4,), (1,3)])

        #raises an error when try to insert something at non-existing indices
        self.assertRaises(IndexError, c.toPartners, 0)
Пример #29
0
def parse_afold(data):
    """Returns a tuple consisting of a list of base pairs parsed from an Afold
    format line iterator and an energy for that folding."""
    bpairs = []
    energy = None
    for line in data:
        if line.find('Multidomain') != -1:
            energy = float(line.split('=')[1].split()[0].strip())
        if line.strip() == '': continue
        first = line.split()[0]
        tokens = line.replace('.', ' ').split()
        bpairs.append((int(tokens[1]), int(tokens[2])))
    return (BasePairs(bpairs), energy)
Пример #30
0
def get_bps_for_aligned_seq(aligned_seq, bps, first_index=0):
    """Extracts from an aligned sequence base pairs which map to that sequence.
        e.g.
        ACUAGCUG-----ACUGA
        BasePairs((2, 17))
        
        will return BasePairs mapped to the unaligned sequence:
        BasePairs((2, 12)
        
        aligned_seq: str, aligned sequence e.g. 'ACUGACUAGC---ACGUACGU'
        bps: BasePairs instance
        first_index: int, sets the starting number i.e. usually 0 or 1
    """
    assert isinstance(aligned_seq, str)
    assert isinstance(bps, BasePairs)
    
    partners = bps.toPartners(len(aligned_seq) + first_index)
    new_base_pairs = set()
    
    for i, nt, pos in zip(range(first_index, len(aligned_seq) + first_index),
                          aligned_seq, partners[first_index:]):
        if nt == '-' or pos is None:
            continue
        
        for partner in pos:
            partner_updated_pos = partner - first_index
            if i in partners[partner] and \
            aligned_seq[partner_updated_pos] != '-':
                
                nr_gaps_before_i = aligned_seq[:i].count('-')
                nr_gaps_from_i_to_partner = \
                    aligned_seq[i:partner_updated_pos].count('-')
                
                if i > partner:
                    continue
                
                total_gaps_to_partner = \
                    nr_gaps_before_i + nr_gaps_from_i_to_partner
                
                new_base_pairs.add(tuple(
                    sorted((i - nr_gaps_before_i,
                            partner - total_gaps_to_partner))))
                
    return BasePairs(new_base_pairs).directed()