def test_subset_positions_Alignment(self): rna1 = RnaSequence("UCG", name="rna1") rna2 = RnaSequence("YCG", name="rna2") rna3 = RnaSequence("CAR", name="rna3") sub_aln = Alignment([rna1, rna2, rna3], moltype=RNA) obs_sub_aln = self.aln.take_positions([0, 1, 5]) self.assertEqual(obs_sub_aln, sub_aln) self.assertNotEqual(obs_sub_aln, self.aln) # string representations should be the same. This fails right # now, because sequence order is not maintained. See separate test. self.assertEqual(str(obs_sub_aln), str(sub_aln))
def test_seq_ungapping(self): rna1 = RnaSequence("U-C-A-G-", name="rna1") model1 = ArraySequence("U-C-A-G-", name="rna1", alphabet=RNA.alphabets.degen_gapped) self.assertEqual(rna1, "U-C-A-G-") self.assertEqual(rna1.degap(), "UCAG") # check is produces the right string from the beginning self.assertEqual(str(model1), "U-C-A-G-") self.assertEqual(model1._data, [0, 4, 1, 4, 2, 4, 3, 4]) # ArraySequence should maybe have the same degap method as normal seq self.assertEqual(str(model1.degap()), "UCAG")
def test_full(self): """RdbParser: full data, valid and invalid""" # when only good record, should work independent of strict r1 = RnaSequence( "-??GG-UGAA--CGCU---ACGU-N???---", info=Info({ "Species": "unidentified Thermus OPB AF027020", "Refs": { "rRNA": ["AF027020"] }, "OriginalSeq": "-o[oGG-U{G}AA--C^GC]U---ACGU-Nooo---", }), ) r2 = RnaSequence( "---CGAUCG--UAUACG-N???-", info=Info({ "Species": "Thermus silvanus X84211", "Refs": { "rRNA": ["X84211"] }, "OriginalSeq": "---CGAU[C(G){--UA}U]ACG-Nooo-", }), ) obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split("\n"), strict=True)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].info, r1.info) self.assertEqual(obs[1], r2) self.assertEqual(str(obs[1]), str(r2)) self.assertEqual(obs[1].info, r2.info) obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split("\n"), strict=False)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].info, r1.info) # when strict, should raise error on invalid record f = RdbParser(RDB_LINES_GOOD_BAD.split("\n"), strict=True) self.assertRaises(RecordError, list, f) # when not strict, malicious record is skipped obs = list(RdbParser(RDB_LINES_GOOD_BAD.split("\n"), strict=False)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].info, r1.info) self.assertEqual(obs[1], r2) self.assertEqual(str(obs[1]), str(r2)) self.assertEqual(obs[1].info, r2.info)
def test_subset_seqs_Alignment(self): rna1 = RnaSequence("UCG", name="rna1") rna2 = RnaSequence("YCG", name="rna2") rna3 = RnaSequence("CAR", name="rna3") sub_aln = Alignment([rna2, rna3], moltype=RNA) aln = Alignment([rna1, rna2, rna3], moltype=RNA) obs_sub_aln = aln.take_seqs(["rna2", "rna3"]) self.assertEqual(obs_sub_aln, sub_aln) self.assertEqual(str(obs_sub_aln), str(sub_aln)) # Selected sequences should be in specified order? obs_sub_aln_1 = self.aln.take_seqs(["rna3", "rna2"]) obs_sub_aln_2 = self.aln.take_seqs(["rna2", "rna3"]) self.assertNotEqual(str(obs_sub_aln_1), str(obs_sub_aln_2))
def test_feature_copy_annotations_to(self): """test correct copy of annotations""" orig = DnaSequence("TTTTTTTTTTAAAA", name="Orig") annot = orig.add_annotation(Feature, "exon", "fred", [(0, 14)]) seq = RnaSequence("UUUUUUUUUUAAAA", name="Test") got = annot.copy_annotations_to(seq) self.assertEqual(len(orig.annotations), len(got.annotations)) for src, dest in zip(orig.annotations, got.annotations): self.assertEqual(src.get_coordinates(), dest.get_coordinates()) self.assertIsInstance(src, dest.__class__) self.assertIs(dest.parent, seq) with self.assertRaises(AssertionError): _ = annot.copy_annotations_to(seq[:-2])
def setUp(self): """setUp method for all tests""" # named sequences self.rna1 = RnaSequence("UCAGGG", name="rna1") self.rna2 = RnaSequence("YCU-RG", name="rna2") self.rna3 = RnaSequence("CAA-NR", name="rna3") self.model1 = ArraySequence("UCAGGG", name="rna1", alphabet=RNA.alphabets.degen_gapped) self.model2 = ArraySequence("YCU-RG", name="rna2", alphabet=RNA.alphabets.degen_gapped) self.model3 = ArraySequence("CAA-NR", name="rna3", alphabet=RNA.alphabets.degen_gapped) self.aln = Alignment([self.rna1, self.rna2, self.rna3], moltype=RNA) self.da = ArrayAlignment( [self.model1, self.model2, self.model3], moltype=RNA, alphabet=RNA.alphabets.degen_gapped, ) # seqs no name self.nn_rna1 = RnaSequence("UCAGGG") self.nn_rna2 = RnaSequence("YCU-RG") self.nn_rna3 = RnaSequence("CAA-NR") self.nn_model1 = ArraySequence("UCAGGG", alphabet=RNA.alphabets.degen_gapped) self.nn_model2 = ArraySequence("YCU-RG", alphabet=RNA.alphabets.degen_gapped) self.nn_model3 = ArraySequence("CAA-NR", alphabet=RNA.alphabets.degen_gapped) self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3], moltype=RNA) self.nn_da = ArrayAlignment( [self.nn_model1, self.nn_model2, self.nn_model3], moltype=RNA, alphabet=RNA.alphabets.degen_gapped, )