def test_subset_positions_DenseAlignment(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('YCG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) model3 = ModelSequence('CAR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) sub_da = DenseAlignment([model1, model2, model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) full_data = array([[0, 1, 2, 3, 3, 3], [15, 1, 0, 4, 12, 3], [1, 2, 2, 4, 10, 12]]) sub_data = array([[0, 1, 3], [15, 1, 3], [1, 2, 12]]) # First check some data self.assertEqual(self.da.ArraySeqs, full_data) self.assertEqual(self.da.ArrayPositions, transpose(full_data)) self.assertEqual(sub_da.ArraySeqs, sub_data) self.assertEqual(sub_da.ArrayPositions, transpose(sub_data)) obs_sub_da_TP = self.da.takePositions([0, 1, 5]) obs_sub_da_SA = self.da.getSubAlignment(pos=[0, 1, 5]) # When using the getSubAlignment method the data is right self.assertEqual(obs_sub_da_SA, sub_da) self.assertNotEqual(obs_sub_da_SA, self.da) self.assertEqual(obs_sub_da_SA.ArraySeqs, sub_data) self.assertEqual(obs_sub_da_SA.ArrayPositions, transpose(sub_data)) # For the takePositions method: Why does this work self.assertEqual(obs_sub_da_TP, sub_da) self.assertNotEqual(obs_sub_da_TP, self.da) # If the data doesn't match? self.assertEqual(obs_sub_da_TP.ArraySeqs, sub_data) self.assertEqual(obs_sub_da_TP.ArrayPositions, transpose(sub_data))
def test_seq_equality(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) # Shouldn't the above two sequences be equal? self.assertEqual(model1, model2) # string comparison is True self.assertEqual(str(model1), str(model2))
def test_DenseAlignment_without_moltype(self): """Expect MolType to be picked up from the sequences.""" m1 = ModelSequence('UCAG',Alphabet=RNA.Alphabets.DegenGapped,\ Name='rna1') m2 = ModelSequence('CCCR',Alphabet=RNA.Alphabets.DegenGapped,\ Name='rna2') da = DenseAlignment([m1, m2]) exp_lines = ['>rna1', 'UCAG', '>rna2', 'CCCR'] self.assertEqual(str(da), '\n'.join(exp_lines) + '\n')
def test_seq_ungapping(self): rna1 = RnaSequence('U-C-A-G-', Name='rna1') model1 = ModelSequence('U-C-A-G-', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) self.assertEqual(rna1, 'U-C-A-G-') self.assertEqual(rna1.degap(), 'UCAG') # check is produces the right string from the beginning self.assertEqual(str(model1), 'U-C-A-G-') self.assertEqual(model1._data, [0, 4, 1, 4, 2, 4, 3, 4]) # ModelSequence should maybe have the same degap method as normal Seq self.assertEqual(str(model1.degap()), 'UCAG')
def test_seq_ungapping(self): rna1 = RnaSequence('U-C-A-G-', Name='rna1') model1 = ModelSequence('U-C-A-G-', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) self.assertEqual(rna1, 'U-C-A-G-') self.assertEqual(rna1.degap(), 'UCAG') # check is produces the right string from the beginning self.assertEqual(str(model1), 'U-C-A-G-') self.assertEqual(model1._data, [0,4,1,4,2,4,3,4]) # ModelSequence should maybe have the same degap method as normal Seq self.assertEqual(str(model1.degap()), 'UCAG')
def setUp(self): """setUp: set up method for all tests""" self.rna1 = RnaSequence('UCAG-RYN-N', Name='rna1') self.m1 = ModelSequence('UCAG-RYN-N', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) self.s1 = 'UCAG-RYN-N'
def test_extract_seqs(self): """extract_seqs: should handle different input formats""" s1 = ">seq1\nACGUAGC\n>seq2\nGGUAGCG" s2 = [">seq1", "ACGUAGC", ">seq2", "GGUAGCG"] s3 = ['ACGUAGC', 'GGUAGCG'] s4 = [RnaSequence('ACGUAGC'), RnaSequence('GGUAGCG')] m1 = ModelSequence('ACGUAGC', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) m2 = ModelSequence('GGUAGCG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) s5 = [m1, m2] f = extract_seqs self.assertEqual(f(s1), ['ACGUAGC', 'GGUAGCG']) self.assertEqual(f(s2), ['ACGUAGC', 'GGUAGCG']) self.assertEqual(f(s3), ['ACGUAGC', 'GGUAGCG']) self.assertEqual(f(s4), ['ACGUAGC', 'GGUAGCG']) self.assertEqual(f(s5), ['ACGUAGC', 'GGUAGCG'])
def test_subset_seqs_DenseAlignment(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('YCG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) model3 = ModelSequence('CAR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) sub_da = DenseAlignment([model1, model2, model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) # takeSeqs by name should have the same effect as # getSubAlignment by seq idx? obs_sub_da_TS = self.da.takeSeqs(['rna1']) obs_sub_da_SA = self.da.getSubAlignment(seqs=[0]) # These two are now the same. Fixed mapping of key to char array. self.assertEqual(obs_sub_da_TS, obs_sub_da_SA) self.assertEqual(str(obs_sub_da_TS), str(obs_sub_da_SA))
def test_score_sequence_object(self): """score: should work correctly on Sequence object as input """ # DnaSequence object ds = self.score1.score(DNA.Sequence("ATTCAC"),offset=0) self.assertEqual(ds, [6,2,-3,0]) # ModelSequence object ms = self.score1.score(ModelSequence("ATTCAC", Alphabet=DNA.Alphabet),\ offset=0) self.assertEqual(ms, [6,2,-3,0])
def setUp(self): """setUp method for all tests""" # named sequences self.rna1 = RnaSequence('UCAGGG', Name='rna1') self.rna2 = RnaSequence('YCU-RG', Name='rna2') self.rna3 = RnaSequence('CAA-NR', Name='rna3') self.model1 = ModelSequence('UCAGGG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) self.model2 = ModelSequence('YCU-RG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) self.model3 = ModelSequence('CAA-NR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) self.aln = Alignment([self.rna1, self.rna2, self.rna3], MolType=RNA) self.da = DenseAlignment([self.model1, self.model2, self.model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) # seqs no name self.nn_rna1 = RnaSequence('UCAGGG') self.nn_rna2 = RnaSequence('YCU-RG') self.nn_rna3 = RnaSequence('CAA-NR') self.nn_model1 = ModelSequence('UCAGGG',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_model2 = ModelSequence('YCU-RG',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_model3 = ModelSequence('CAA-NR',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3],\ MolType=RNA) self.nn_da = DenseAlignment([self.nn_model1, self.nn_model2,\ self.nn_model3], MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped)