def setUp(self): """setUp: setup method for all tests""" self.true = BasePairs([(0,40),(1,39),(2,38),(3,37),(10,20),\ (11,19),(12,18),(13,17),(26,33),(27,32)]) self.predicted = BasePairs([(0,40),(1,39),(2,38),(3,37),(4,36),\ (5,35),(10,22),(11,20),(14,29),(15,28)]) self.seq = ['>seq1\n', 'agguugaaggggauccgauccacuccccggcuggucaaccu']
def test_hasConflicts(self): """BasePairs.hasConflicts should return True if conflicts exist""" self.assertFalse(BasePairs([]).hasConflicts()) self.assertFalse(BasePairs([(1,2),(3,4)]).hasConflicts()) self.assertTrue(BasePairs([(1,2),(2,3)]).hasConflicts()) self.assertTrue(BasePairs([(1,2),(2,None)]).hasConflicts()) self.assertTrue(self.bplist_with_conflicts.hasConflicts())
def test_selectivity_dupl(self): """selectivity: duplicates and Nones shouldn't influence the calc. """ ref = BasePairs([(1, 6), (2, 5), (10, 13), (6, 1), (7, None), (None, None)]) pred = BasePairs([(6, 1), (3, 4), (10, 12)]) self.assertFloatEqual(selectivity(ref, pred), 0.5)
def test_get_counts(self): """get_counts: should work with all parameters""" seq = RnaSequence('UCAG-NAUGU') p = BasePairs([(1, 8), (2, 7)]) p2 = BasePairs([ (1, 8), (2, 6), (3, 6), (4, 9), ]) exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\ 'FP_INCONS':0, 'FP_CONTRA':0, 'FP_COMP':0} self.assertEqual(get_counts(p, p2, False), exp) exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\ 'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1} self.assertEqual(get_counts(p, p2, split_fp=True), exp) seq = RnaSequence('UCAG-NACGU') exp = {'TP':1,'TN':7, 'FN':1,'FP':3,\ 'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1} self.assertEqual(get_counts(p, p2, split_fp=True,\ sequences=[seq], min_dist=2), exp) # check against compare_ct.pm exp = {'TP':4,'TN':266, 'FN':6,'FP':6,\ 'FP_INCONS':2, 'FP_CONTRA':2, 'FP_COMP':2} seq = 'agguugaaggggauccgauccacuccccggcuggucaaccu'.upper() self.assertEqual(get_counts(self.true, self.predicted, split_fp=True,\ sequences=[seq], min_dist=4), exp)
def test_seq_simple(self): """get_bps_for_aligned_seq should work for simple case """ aln_seq = "--U--------------A" pred = BasePairs([(2, 17)]) result = get_bps_for_aligned_seq(aln_seq, pred) self.assertEqual(result, BasePairs([(0, 1)]))
def test_simple_offset_ok(self): """get_bps_for_aligned_seq should work when first_index=1, part 2 """ aln_seq = "-U--------------A-" pred = BasePairs([(2, 17)]) result = get_bps_for_aligned_seq(aln_seq, pred, 1) self.assertEqual(result, BasePairs([(1, 2)]))
def test_seq_conflict(self): """get_bps_for_aligned_seq should work for conflicted case """ aln_seq = "--U--A-----------A" pred = BasePairs([(2, 5), (2, 17)]) result = get_bps_for_aligned_seq(aln_seq, pred) self.assertEqual(result, BasePairs([(0, 1), (0, 2)]))
def test_seq_conflict_offset(self): """get_bps_for_aligned_seq should conflict first_index=1, part 1 """ aln_seq = "--U--A-----------A" pred = BasePairs([(2, 5), (2, 17)]) result = get_bps_for_aligned_seq(aln_seq, pred, 1) self.assertEqual(result, BasePairs([]))
def test_aligned_seq_skip(self): """get_bps_for_aligned_seq should work when some base pairs are skipped """ aln_seq = "ACUAGCUG-----ACUGA" pred = BasePairs([(2, 10), (2, 17)]) result = get_bps_for_aligned_seq(aln_seq, pred) self.assertEqual(result, BasePairs([(2, 12)]))
def test_toVienna_toPairs(self): """BasePairs.toVienna.toPairs() should generate the same BasePairs """ bps = BasePairs(((1,3), (4,5), (7,12))) self.assertEqual(bps.toVienna(15).toPairs(), bps) bps = BasePairs(((1,3), (4,5), (7,121))) self.assertEqual(bps.toVienna(150).toPairs(), bps)
def test_sensitivity_dupl(self): """sensitivity: should handle duplicates, pseudo, None""" ref = BasePairs([(1, 6), (2, 5), (3, 10), (7, None), (None, None), (5, 2), (4, 9)]) pred = BasePairs([(6, 1), (10, 11), (3, 12)]) self.assertFloatEqual(sensitivity(ref, pred), 0.25) pred = BasePairs([(6, 1), (10, 11), (3, 12), (20, None), (None, None), (1, 6)]) self.assertFloatEqual(sensitivity(ref, pred), 0.25)
def test_sensitivity_empty(self): """sensitivity: should work on emtpy BasePairs""" # both empty self.assertFloatEqual(sensitivity(BasePairs([]), BasePairs([])), 1) pred = BasePairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)]) # prediction emtpy self.assertFloatEqual(sensitivity(BasePairs([]), pred), 0) # reference empty self.assertFloatEqual(sensitivity(pred, BasePairs([])), 0)
def test_selectivity_empty(self): """selectivity: should handle empty reference/predicted structure""" # both empty self.assertFloatEqual(selectivity(BasePairs([]), BasePairs([])), 1) pred = BasePairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)]) # prediction emtpy self.assertFloatEqual(selectivity(BasePairs([]), pred), 0) # reference empty self.assertFloatEqual(selectivity(pred, BasePairs([])), 0)
def test_selectivity_general(self): """selectivity: should work in general""" ref = BasePairs([(1, 6), (2, 5), (10, 13)]) pred = BasePairs([(6, 1), (3, 4), (10, 12)]) # one good prediction self.assertFloatEqual(selectivity(ref, pred), 0.5) # over-prediction not penalized pred = BasePairs([(6, 1), (10, 11), (3, 12), (13, 20), (14, 19), (15, 18)]) self.assertFloatEqual(selectivity(ref, pred), 0.25)
def test_get_counts_pseudo(self): """get_counts: should work when pseudo in ref -> classification off""" # pairs that would normally be compatible, are now contradicting ref = BasePairs([(0, 8), (1, 7), (4, 10)]) pred = BasePairs([(0, 8), (3, 6), (4, 10)]) seq = 'GACUGUGUCAU' exp = {'TP':2,'TN':13-2-1, 'FN':1,'FP':1,\ 'FP_INCONS':0, 'FP_CONTRA':1, 'FP_COMP':0} self.assertEqual(get_counts(ref, pred, split_fp=True,\ sequences=[seq], min_dist=4), exp)
def test_symmetric(self): """BasePairs.symmetric should add (down,up) for each (up,down)""" self.assertEqual(BasePairs([]).symmetric(),[]) for item in BasePairs([(1,2)]).symmetric(): self.assert_(item in [(2,1),(1,2)]) for item in BasePairs([(1,2),(1,2)]).symmetric(): self.assert_(item in [(1,2),(2,1)]) for item in BasePairs([(1,2),(3,4)]).symmetric(): self.assert_(item in [(1,2),(2,1),(3,4),(4,3)]) for item in BasePairs([(1,None)]).symmetric(): self.assert_(item in [])
def test_many_gaps_seq(self): """get_bps_for_aligned_seq test on seq with many gaps """ aln_seq = "ACUAGCUG-----ACU-A---------CGCGC---A" pred = BasePairs([(2, 10), (2, 17), (4, 36)]) result_offset = get_bps_for_aligned_seq(aln_seq, pred, 1) self.assertEqual(result_offset, BasePairs([(4, 18)])) result_offset = get_bps_for_aligned_seq(aln_seq, pred, 2) self.assertEqual(result_offset, BasePairs([(2, 12)]))
def test_conflicts(self): """all metrics should raise error when conflicts in one of the structs """ ref = BasePairs([(1, 6), (2, 5), (3, 10), (7, None), (None, None), (5, 2), (1, 12)]) pred = BasePairs([(6, 1), (10, 11), (3, 12)]) self.assertRaises(ValueError, sensitivity, ref, pred) self.assertRaises(ValueError, sensitivity, pred, ref) self.assertRaises(ValueError, selectivity, ref, pred) self.assertRaises(ValueError, selectivity, pred, ref) self.assertRaises(ValueError, mcc, ref, pred, self.seq) self.assertRaises(ValueError, mcc, pred, ref, self.seq)
def test_mismatches(self): """BasePairs.mismatches should return base pairs that can't be made""" # with plain string self.assertEqual(BasePairs([(0,1)]).mismatches('AC',{}),1) self.assertEqual(\ BasePairs([(0,1)]).mismatches('AC',{('A','C'):None}),0) self.assertEqual(\ BasePairs([(0,1)]).mismatches('AC',{('A','G'):None}),1) self.assertEqual(BasePairs([(0,1),(2,3),(3,1)]).\ mismatches('ACGU',{('A','U'):None}),3) # using sequence with alphabet self.assertEqual(\ BasePairs([(0,1),(0,4),(0,3)]).mismatches(Rna('ACGUA')),2)
def parse_pknots(fh): """Returns a list of base pairs and their corresponding energy parsed from a pknots format line iterator. fh: a file open for reading """ energy = None last_number = '-1' pairs_str = [] for line in fh: stripped = line.strip() if stripped.startswith('0') or \ stripped.startswith(str(int(last_number) + 1)): first_line = stripped.split() second_line = fh.next().strip().split() for op, cl in zip(first_line, second_line): if cl == '.': continue if (cl, op) not in pairs_str: pairs_str.append((op, cl)) last_number = first_line[-1] if stripped.startswith('energy'): energy = float(line.split()[-1].strip()) break return (BasePairs([(int(x) + 1, int(y) + 1) for x, y in pairs_str]), energy)
def test_make_non_conflicting_viennas_no_conflicts(self): """BasePairs.make_non_conflicting_viennas - no conflicts in input """ viennas = \ BasePairs([(1,2),(3,4),(5,6)]).make_non_conflicting_viennas(10) self.assertEqual(len(viennas), 1) self.assertEqual(viennas[0], '()()()....')
def _parse(self): tmp_ct = [] defline = ">" for line in open(self.ct_path): if line.startswith('#'): defline += line.strip() + '|' else: tmp_ct.append(line) defline = defline.replace('\n', '').replace('#', '') # just one entry per file result = ct_parser(tmp_ct)[0] sequence = result[0] # IMPORTANT! # ct_parser starts numbering of base pairs from 0, not from 1, as # elsewhere in nucleic.secstruc. To make things consistent, here # we'll add 1 to each index in base pairs list! result_1 = [(pair[0] + 1, pair[1] + 1) for pair in result[1]] pairs = BasePairs(result_1) if self._is_valid_sequence(sequence): sequence = Sequence(sequence.upper()).seq_without_modifications # Second check, because ModeRNA might also mix a bit at this stage # including for exampel X in sequences without modifications! if self._is_valid_sequence(sequence): try: vienna = pairs.toVienna(len(sequence)) except PseudoknotTokensError: vienna = None finally: return defline, sequence, vienna, pairs return None, None, None, None
def test_toVienna_conflict(self): """BasePairs.toVienna should raise ConflictInBasePairsError""" self.assertRaises(ConflictInBasePairsError, \ self.bplist_with_conflicts.toVienna, 100) self.assertRaises(ConflictInBasePairsError, \ self.bplist_with_conflicts.toVienna, 100, -2) self.assertRaises(ConflictInBasePairsError, \ BasePairs([(1,2),(2,3)]).toVienna, 4)
def parse_sfold(data): """Returns a list of base pairs parsed from an Sfold format line iterator.""" bpairs = [] for line in data: if re.search("\d+\s+\d+\n*\Z", line): a, b = line.strip().split() bpairs.append((int(a), int(b))) return BasePairs(bpairs)
def test_directed(self): """BasePairs.directed should change all pairs so that a<b in (a,b)""" self.assertEqual(BasePairs([]).directed(),[]) self.assertEqual(BasePairs([(2,1),(6,4),(1,7),(8,3)]).directed(),\ BasePairs([(1,2),(1,7),(3,8),(4,6)])) self.assertEqual\ (BasePairs([(5,None),(None,3)]).directed(), BasePairs([])) self.assertEqual(\ BasePairs([(2,1),(1,2)]).directed(), BasePairs([(1,2)]))
def is_bp_a_pseudoknot(bp0, bp1, bplist): """Returns True if a given base pair is a pseudoknot, False otherwise. bp0: int, number of the first base bp1: int, number of the second base bplist: a list of base pair indices """ for pair in BasePairs(bplist).directed(): if bp0 < pair[0] < bp1 < pair[1] or pair[0] < bp0 < pair[1] < bp1: return True return False
def setUp(self): self.conflicting_bps = BasePairs([(1, 41), (2, 40), (3, 39), (4, 38), (5, 37), (7, 35), (8, 34), (9, 33), (11, 31), (12, 30), (14, 28), (15, 27), (16, 24), (17, 23), (20, 35), (21, 34), (22, 33), (23, 32)]) # two lists should be created self.ref_combinations = [[(1, 41), (2, 40), (3, 39), (4, 38), (5, 37), (7, 35), (8, 34), (9, 33), (11, 31), (12, 30), (14, 28), (15, 27), (16, 24), (17, 23)], [(21, 34), (22, 33), (23, 32), (20, 35)]] self.even_more_conflicting_bps = \ BasePairs(self.conflicting_bps + [(22, 35)]) self.even_more_ref_combinations = [ [(1, 41), (2, 40), (3, 39), (4, 38), (5, 37), (7, 35), (8, 34), (9, 33), (11, 31), (12, 30), (14, 28), (15, 27), (16, 24), (17, 23)], [(22, 35), (21, 34), (23, 32)], [(22, 33), (20, 35)] ]
def test_toPartners(self): """BasePairs.toPartners should return a Partners object""" a = BasePairs([(1,5),(3,4),(6,9),(7,8)]) #normal b = BasePairs([(0,4),(2,6)]) #pseudoknot c = BasePairs([(1,6),(3,6),(4,5)]) #conflict self.assertEqual(a.toPartners(10), [None, (5,), None, (4,), (3,), (1,), (9,), (8,), (7,), (6,)]) self.assertEqual(a.toPartners(13,3),\ [None, None, None, None, (8,), None, (7,), (6,), (4,), (12,), (11,), (10,), (9,)]) assert isinstance(a.toPartners(10),Partners) self.assertEqual(b.toPartners(7), [(4,), None, (6,), None, (0,), None, (2,)]) #self.assertRaises(ValueError,c.toPartners,7, strict=True) self.assertEqual(c.toPartners(7), [None, (6,), None, (6,), (5,), (4,), (1,3)]) #raises an error when try to insert something at non-existing indices self.assertRaises(IndexError, c.toPartners, 0)
def parse_afold(data): """Returns a tuple consisting of a list of base pairs parsed from an Afold format line iterator and an energy for that folding.""" bpairs = [] energy = None for line in data: if line.find('Multidomain') != -1: energy = float(line.split('=')[1].split()[0].strip()) if line.strip() == '': continue first = line.split()[0] tokens = line.replace('.', ' ').split() bpairs.append((int(tokens[1]), int(tokens[2]))) return (BasePairs(bpairs), energy)
def get_bps_for_aligned_seq(aligned_seq, bps, first_index=0): """Extracts from an aligned sequence base pairs which map to that sequence. e.g. ACUAGCUG-----ACUGA BasePairs((2, 17)) will return BasePairs mapped to the unaligned sequence: BasePairs((2, 12) aligned_seq: str, aligned sequence e.g. 'ACUGACUAGC---ACGUACGU' bps: BasePairs instance first_index: int, sets the starting number i.e. usually 0 or 1 """ assert isinstance(aligned_seq, str) assert isinstance(bps, BasePairs) partners = bps.toPartners(len(aligned_seq) + first_index) new_base_pairs = set() for i, nt, pos in zip(range(first_index, len(aligned_seq) + first_index), aligned_seq, partners[first_index:]): if nt == '-' or pos is None: continue for partner in pos: partner_updated_pos = partner - first_index if i in partners[partner] and \ aligned_seq[partner_updated_pos] != '-': nr_gaps_before_i = aligned_seq[:i].count('-') nr_gaps_from_i_to_partner = \ aligned_seq[i:partner_updated_pos].count('-') if i > partner: continue total_gaps_to_partner = \ nr_gaps_before_i + nr_gaps_from_i_to_partner new_base_pairs.add(tuple( sorted((i - nr_gaps_before_i, partner - total_gaps_to_partner)))) return BasePairs(new_base_pairs).directed()