def test_toProbs(self): """Counts toProbs should return valid prob matrix.""" c = Counts([1,2,3,4,2,2,2,2,0.2,0.4,0.6,0.8,1,0,0,0], RnaPairs) p = c.toProbs() assert isinstance(p, Probs) self.assertEqual(p, Probs([0.1,0.2,0.3,0.4,0.25,0.25,0.25,0.25, \ 0.1,0.2,0.3,0.4,1.0,0.0,0.0,0.0], RnaPairs)) self.assertEqual(p['U','U'], 0.1) self.assertEqual(p['G','U'], 1.0) self.assertEqual(p['G','G'], 0.0)
def test_toProbs(self): """Counts toProbs should return valid prob matrix.""" c = Counts([1, 2, 3, 4, 2, 2, 2, 2, 0.2, 0.4, 0.6, 0.8, 1, 0, 0, 0], RnaPairs) p = c.toProbs() assert isinstance(p, Probs) self.assertEqual(p, Probs([0.1,0.2,0.3,0.4,0.25,0.25,0.25,0.25, \ 0.1,0.2,0.3,0.4,1.0,0.0,0.0,0.0], RnaPairs)) self.assertEqual(p['U', 'U'], 0.1) self.assertEqual(p['G', 'U'], 1.0) self.assertEqual(p['G', 'G'], 0.0)
def test_toCounts(self): """Probs toCounts should return counts object w/ right numbers""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) obs = m.toCounts(30) assert isinstance(obs, Counts) exp = Counts([[5., 2.5, 2.5, 1, 8, 1, 3, 6, 1]], a) self.assertEqual(obs, exp)
def test_fromPair(self): """Counts fromPair should return correct counts.""" s = Counts.fromPair( RnaSequence('UCCGAUCGAUUAUCGGGUACGUA'), \ RnaSequence('GUCGAGUAUAGCGUACGGCUACG'), RnaPairs) assert isinstance(s, Counts) vals = [ ('U','U',0),('U','C',2.5),('U','A',1),('U','G',2.5), ('C','U',2.5),('C','C',1),('C','A',1),('C','G',0.5), ('A','U',1),('A','C',1),('A','A',1),('A','G',2), ('G','U',2.5),('G','C',0.5),('G','A',2),('G','G',2), ] for i, j, val in vals: self.assertFloatEqual(s[i,j], val) #check that it works for big seqs s = Counts.fromPair( RnaSequence('UCAG'*1000), \ RnaSequence('UGAG'*1000), RnaPairs) assert isinstance(s, Counts) vals = [ ('U','U',1000),('U','C',0),('U','A',0),('U','G',0), ('C','U',0),('C','C',0),('C','A',0),('C','G',500), ('A','U',0),('A','C',0),('A','A',1000),('A','G',0), ('G','U',0),('G','C',500),('G','A',0),('G','G',1000), ] for i, j, val in vals: self.assertFloatEqual(s[i,j], val) #check that it works for codon seqs s1 = ModelRnaCodonSequence('UUCGCG') s2 = ModelRnaCodonSequence('UUUGGG') c = Counts.fromPair(s1, s2, RNA.Alphabet.Triples**2) self.assertEqual(c._data.sum(), 2) self.assertEqual(c._data[0,1], 0.5) self.assertEqual(c._data[1,0], 0.5) self.assertEqual(c._data[55,63], 0.5) self.assertEqual(c._data[63,55], 0.5)
def test_counts_to_probs(self): """counts_to_probs should skip cases with zero rows""" counts = { (0,1): Counts(array([[0,1],[1,0]]), ABPairs), (1,2): Counts(array([[0,0],[1,0]]), ABPairs), #bad row (0,3): Counts(array([[0,0],[0,0]]), ABPairs), #bad row (0,4): Counts(array([[0.0,0.0],[0.0,0.0]]), ABPairs), #bad row (0,5): Counts(array([[0.1,0.3],[0.0,0.0]]), ABPairs), #bad row (3,4): Counts(array([[0.1,0.3],[0.4,0.1]]), ABPairs), (2,1): Counts(array([[0,5],[1,0]]), ABPairs), } result = counts_to_probs(counts) self.assertEqual(len(result), 3) self.assertFloatEqual(result[(0,1)]._data, array([[0,1],[1,0]])) self.assertFloatEqual(result[(3,4)]._data, \ array([[0.25,0.75],[0.8,0.2]])) self.assertFloatEqual(result[(2,1)]._data, array([[0,1],[1,0]]))
def test_fromTriple(self): """Counts fromTriple should return correct counts.""" cft = Counts.fromTriple rs = RnaSequence A, C, G, U = map(rs, 'ACGU') #counts if different from both the other groups s = cft(A, C, C, RnaPairs) assert isinstance(s, Counts) self.assertEqual(s['C','A'], 1) self.assertEqual(s['A','C'], 0) self.assertEqual(s['C','C'], 0) #try it with longer sequences AAA, CCC = map(rs, ['AAA', 'CCC']) s = cft(AAA, CCC, CCC, RnaPairs) self.assertEqual(s['C','A'], 3) self.assertEqual(s['A','C'], 0) #doesn't count if all three differ ACG, CGA, GAC = map(rs, ['ACG','CGA','GAC']) s = cft(ACG, CGA, GAC, RnaPairs) self.assertEqual(s['C','A'], 0) self.assertEqual(s['A','C'], 0) self.assertEqual(s, Counts.empty(RnaPairs)) #counts as no change if same as other sequence... s = cft(AAA, AAA, CCC, RnaPairs) self.assertEqual(s['A','A'], 3) self.assertEqual(s['A','C'], 0) #...or same as the outgroup s = cft(AAA, CCC, AAA, RnaPairs) self.assertEqual(s['A','A'], 3) self.assertEqual(s['A','C'], 0) #spot-check a mixed example s = cft( \ rs('AUCGCUAGCAUACGUCA'), rs('AAGCUGCGUAGCGCAUA'), rs('GCGCAUAUGACGAUAGC'), RnaPairs ) vals = [ ('U','U',1),('U','C',0),('U','A',0),('U','G',0), ('C','U',0),('C','C',0),('C','A',0),('C','G',1), ('A','U',1),('A','C',0),('A','A',4),('A','G',0), ('G','U',0),('G','C',1),('G','A',0),('G','G',1), ] for i, j, val in vals: self.assertFloatEqual(s[i,j], val) #check a long sequence s = cft( \ rs('AUCGCUAGCAUACGUCA'*1000), rs('AAGCUGCGUAGCGCAUA'*1000), rs('GCGCAUAUGACGAUAGC'*1000), RnaPairs ) vals = [ ('U','U',1000),('U','C',0),('U','A',0),('U','G',0), ('C','U',0),('C','C',0),('C','A',0),('C','G',1000), ('A','U',1000),('A','C',0),('A','A',4000),('A','G',0), ('G','U',0),('G','C',1000),('G','A',0),('G','G',1000), ] for i, j, val in vals: self.assertFloatEqual(s[i,j], val) #check that it works when forced to use both variants of fromTriple s = cft( \ rs('AUCGCUAGCAUACGUCA'*1000), rs('AAGCUGCGUAGCGCAUA'*1000), rs('GCGCAUAUGACGAUAGC'*1000), RnaPairs, threshold=0 #forces "large" method ) vals = [ ('U','U',1000),('U','C',0),('U','A',0),('U','G',0), ('C','U',0),('C','C',0),('C','A',0),('C','G',1000), ('A','U',1000),('A','C',0),('A','A',4000),('A','G',0), ('G','U',0),('G','C',1000),('G','A',0),('G','G',1000), ] for i, j, val in vals: self.assertFloatEqual(s[i,j], val) s = cft( \ rs('AUCGCUAGCAUACGUCA'*1000), rs('AAGCUGCGUAGCGCAUA'*1000), rs('GCGCAUAUGACGAUAGC'*1000), RnaPairs, threshold=1e12 #forces "small" method ) vals = [ ('U','U',1000),('U','C',0),('U','A',0),('U','G',0), ('C','U',0),('C','C',0),('C','A',0),('C','G',1000), ('A','U',1000),('A','C',0),('A','A',4000),('A','G',0), ('G','U',0),('G','C',1000),('G','A',0),('G','G',1000), ] for i, j, val in vals: self.assertFloatEqual(s[i,j], val) #check that it works for codon seqs s1 = ModelRnaCodonSequence('UUCGCG') s2 = ModelRnaCodonSequence('UUUGGG') s3 = s2 c = Counts.fromTriple(s1, s2, s3, RNA.Alphabet.Triples**2) self.assertEqual(c._data.sum(), 2) self.assertEqual(c._data[0,1], 1) self.assertEqual(c._data[63,55], 1)
def test_fromTriple(self): """Counts fromTriple should return correct counts.""" cft = Counts.fromTriple rs = RnaSequence A, C, G, U = map(rs, 'ACGU') #counts if different from both the other groups s = cft(A, C, C, RnaPairs) assert isinstance(s, Counts) self.assertEqual(s['C', 'A'], 1) self.assertEqual(s['A', 'C'], 0) self.assertEqual(s['C', 'C'], 0) #try it with longer sequences AAA, CCC = map(rs, ['AAA', 'CCC']) s = cft(AAA, CCC, CCC, RnaPairs) self.assertEqual(s['C', 'A'], 3) self.assertEqual(s['A', 'C'], 0) #doesn't count if all three differ ACG, CGA, GAC = map(rs, ['ACG', 'CGA', 'GAC']) s = cft(ACG, CGA, GAC, RnaPairs) self.assertEqual(s['C', 'A'], 0) self.assertEqual(s['A', 'C'], 0) self.assertEqual(s, Counts.empty(RnaPairs)) #counts as no change if same as other sequence... s = cft(AAA, AAA, CCC, RnaPairs) self.assertEqual(s['A', 'A'], 3) self.assertEqual(s['A', 'C'], 0) #...or same as the outgroup s = cft(AAA, CCC, AAA, RnaPairs) self.assertEqual(s['A', 'A'], 3) self.assertEqual(s['A', 'C'], 0) #spot-check a mixed example s = cft( \ rs('AUCGCUAGCAUACGUCA'), rs('AAGCUGCGUAGCGCAUA'), rs('GCGCAUAUGACGAUAGC'), RnaPairs ) vals = [ ('U', 'U', 1), ('U', 'C', 0), ('U', 'A', 0), ('U', 'G', 0), ('C', 'U', 0), ('C', 'C', 0), ('C', 'A', 0), ('C', 'G', 1), ('A', 'U', 1), ('A', 'C', 0), ('A', 'A', 4), ('A', 'G', 0), ('G', 'U', 0), ('G', 'C', 1), ('G', 'A', 0), ('G', 'G', 1), ] for i, j, val in vals: self.assertFloatEqual(s[i, j], val) #check a long sequence s = cft( \ rs('AUCGCUAGCAUACGUCA'*1000), rs('AAGCUGCGUAGCGCAUA'*1000), rs('GCGCAUAUGACGAUAGC'*1000), RnaPairs ) vals = [ ('U', 'U', 1000), ('U', 'C', 0), ('U', 'A', 0), ('U', 'G', 0), ('C', 'U', 0), ('C', 'C', 0), ('C', 'A', 0), ('C', 'G', 1000), ('A', 'U', 1000), ('A', 'C', 0), ('A', 'A', 4000), ('A', 'G', 0), ('G', 'U', 0), ('G', 'C', 1000), ('G', 'A', 0), ('G', 'G', 1000), ] for i, j, val in vals: self.assertFloatEqual(s[i, j], val) #check that it works when forced to use both variants of fromTriple s = cft( \ rs('AUCGCUAGCAUACGUCA'*1000), rs('AAGCUGCGUAGCGCAUA'*1000), rs('GCGCAUAUGACGAUAGC'*1000), RnaPairs, threshold=0 #forces "large" method ) vals = [ ('U', 'U', 1000), ('U', 'C', 0), ('U', 'A', 0), ('U', 'G', 0), ('C', 'U', 0), ('C', 'C', 0), ('C', 'A', 0), ('C', 'G', 1000), ('A', 'U', 1000), ('A', 'C', 0), ('A', 'A', 4000), ('A', 'G', 0), ('G', 'U', 0), ('G', 'C', 1000), ('G', 'A', 0), ('G', 'G', 1000), ] for i, j, val in vals: self.assertFloatEqual(s[i, j], val) s = cft( \ rs('AUCGCUAGCAUACGUCA'*1000), rs('AAGCUGCGUAGCGCAUA'*1000), rs('GCGCAUAUGACGAUAGC'*1000), RnaPairs, threshold=1e12 #forces "small" method ) vals = [ ('U', 'U', 1000), ('U', 'C', 0), ('U', 'A', 0), ('U', 'G', 0), ('C', 'U', 0), ('C', 'C', 0), ('C', 'A', 0), ('C', 'G', 1000), ('A', 'U', 1000), ('A', 'C', 0), ('A', 'A', 4000), ('A', 'G', 0), ('G', 'U', 0), ('G', 'C', 1000), ('G', 'A', 0), ('G', 'G', 1000), ] for i, j, val in vals: self.assertFloatEqual(s[i, j], val) #check that it works for codon seqs s1 = ModelRnaCodonSequence('UUCGCG') s2 = ModelRnaCodonSequence('UUUGGG') s3 = s2 c = Counts.fromTriple(s1, s2, s3, RNA.Alphabet.Triples**2) self.assertEqual(c._data.sum(), 2) self.assertEqual(c._data[0, 1], 1) self.assertEqual(c._data[63, 55], 1)
def test_fromPair(self): """Counts fromPair should return correct counts.""" s = Counts.fromPair( RnaSequence('UCCGAUCGAUUAUCGGGUACGUA'), \ RnaSequence('GUCGAGUAUAGCGUACGGCUACG'), RnaPairs) assert isinstance(s, Counts) vals = [ ('U', 'U', 0), ('U', 'C', 2.5), ('U', 'A', 1), ('U', 'G', 2.5), ('C', 'U', 2.5), ('C', 'C', 1), ('C', 'A', 1), ('C', 'G', 0.5), ('A', 'U', 1), ('A', 'C', 1), ('A', 'A', 1), ('A', 'G', 2), ('G', 'U', 2.5), ('G', 'C', 0.5), ('G', 'A', 2), ('G', 'G', 2), ] for i, j, val in vals: self.assertFloatEqual(s[i, j], val) #check that it works for big seqs s = Counts.fromPair( RnaSequence('UCAG'*1000), \ RnaSequence('UGAG'*1000), RnaPairs) assert isinstance(s, Counts) vals = [ ('U', 'U', 1000), ('U', 'C', 0), ('U', 'A', 0), ('U', 'G', 0), ('C', 'U', 0), ('C', 'C', 0), ('C', 'A', 0), ('C', 'G', 500), ('A', 'U', 0), ('A', 'C', 0), ('A', 'A', 1000), ('A', 'G', 0), ('G', 'U', 0), ('G', 'C', 500), ('G', 'A', 0), ('G', 'G', 1000), ] for i, j, val in vals: self.assertFloatEqual(s[i, j], val) #check that it works for codon seqs s1 = ModelRnaCodonSequence('UUCGCG') s2 = ModelRnaCodonSequence('UUUGGG') c = Counts.fromPair(s1, s2, RNA.Alphabet.Triples**2) self.assertEqual(c._data.sum(), 2) self.assertEqual(c._data[0, 1], 0.5) self.assertEqual(c._data[1, 0], 0.5) self.assertEqual(c._data[55, 63], 0.5) self.assertEqual(c._data[63, 55], 0.5)