示例#1
0
 def test_seq_equality(self):
     model1 = ArraySequence("UCG", name="rna1", alphabet=RNA.alphabets.degen_gapped)
     model2 = ArraySequence("UCG", name="rna1", alphabet=RNA.alphabets.degen_gapped)
     # Shouldn't the above two sequences be equal?
     self.assertEqual(model1, model2)
     # string comparison is True
     self.assertEqual(str(model1), str(model2))
示例#2
0
    def test_ArrayAlignment_without_moltype(self):
        """Expect MolType to be picked up from the sequences."""

        m1 = ArraySequence("UCAG", alphabet=RNA.alphabets.degen_gapped, name="rna1")
        m2 = ArraySequence("CCCR", alphabet=RNA.alphabets.degen_gapped, name="rna2")
        da = ArrayAlignment([m1, m2])
        exp_lines = [">rna1", "UCAG", ">rna2", "CCCR"]
        self.assertEqual(str(da), "\n".join(exp_lines) + "\n")
示例#3
0
    def test_subset_positions_ArrayAlignment(self):
        # because dict order volatile, need to grab the
        # the index for ambig characters from the object
        # The full data comes from these seqs
        # 'UCAGGG'
        # 'YCU-RG'
        # 'CAA-NR'
        get_index = RNA.alphabets.degen_gapped.index
        G = get_index("-")
        N = get_index("N")
        R = get_index("R")
        Y = get_index("Y")
        full_data = array([[0, 1, 2, 3, 3, 3], [Y, 1, 0, G, R, 3],
                           [1, 2, 2, G, N, R]])

        model1 = ArraySequence("UCG",
                               name="rna1",
                               alphabet=RNA.alphabets.degen_gapped)
        model2 = ArraySequence("YCG",
                               name="rna2",
                               alphabet=RNA.alphabets.degen_gapped)
        model3 = ArraySequence("CAR",
                               name="rna3",
                               alphabet=RNA.alphabets.degen_gapped)
        sub_da = ArrayAlignment([model1, model2, model3],
                                moltype=RNA,
                                alphabet=RNA.alphabets.degen_gapped)

        sub_data = array([[0, 1, 3], [Y, 1, 3], [1, 2, R]])

        # First check some data
        self.assertEqual(self.da.array_seqs, full_data)
        self.assertEqual(self.da.array_positions, transpose(full_data))
        self.assertEqual(sub_da.array_seqs, sub_data)
        self.assertEqual(sub_da.array_positions, transpose(sub_data))

        obs_sub_da_TP = self.da.take_positions([0, 1, 5])
        obs_sub_da_SA = self.da.get_sub_alignment(pos=[0, 1, 5])

        # When using the get_sub_alignment method the data is right
        self.assertEqual(obs_sub_da_SA, sub_da)
        self.assertNotEqual(obs_sub_da_SA, self.da)
        self.assertEqual(obs_sub_da_SA.array_seqs, sub_data)
        self.assertEqual(obs_sub_da_SA.array_positions, transpose(sub_data))

        # For the take_positions method: Why does this work
        self.assertEqual(obs_sub_da_TP, sub_da)
        self.assertNotEqual(obs_sub_da_TP, self.da)
        # If the data doesn't match?
        self.assertEqual(obs_sub_da_TP.array_seqs, sub_data)
        self.assertEqual(obs_sub_da_TP.array_positions, transpose(sub_data))
示例#4
0
    def test_seq_ungapping(self):
        rna1 = RnaSequence("U-C-A-G-", name="rna1")
        model1 = ArraySequence("U-C-A-G-",
                               name="rna1",
                               alphabet=RNA.alphabets.degen_gapped)

        self.assertEqual(rna1, "U-C-A-G-")
        self.assertEqual(rna1.degap(), "UCAG")

        # check is produces the right string from the beginning
        self.assertEqual(str(model1), "U-C-A-G-")
        self.assertEqual(model1._data, [0, 4, 1, 4, 2, 4, 3, 4])
        # ArraySequence should maybe have the same degap method as normal seq
        self.assertEqual(str(model1.degap()), "UCAG")
示例#5
0
    def test_subset_seqs_ArrayAlignment(self):
        model1 = ArraySequence("UCG", name="rna1", alphabet=RNA.alphabets.degen_gapped)
        model2 = ArraySequence("YCG", name="rna2", alphabet=RNA.alphabets.degen_gapped)
        model3 = ArraySequence("CAR", name="rna3", alphabet=RNA.alphabets.degen_gapped)
        sub_da = ArrayAlignment(
            [model1, model2, model3], moltype=RNA, alphabet=RNA.alphabets.degen_gapped
        )

        # take_seqs by name should have the same effect as
        # get_sub_alignment by seq idx?
        obs_sub_da_TS = self.da.take_seqs(["rna1"])
        obs_sub_da_SA = self.da.get_sub_alignment(seqs=[0])

        # These two are now the same. Fixed mapping of key to char array.
        self.assertEqual(obs_sub_da_TS, obs_sub_da_SA)
        self.assertEqual(str(obs_sub_da_TS), str(obs_sub_da_SA))
示例#6
0
    def setUp(self):
        """setUp method for all tests"""
        # named sequences
        self.rna1 = RnaSequence("UCAGGG", name="rna1")
        self.rna2 = RnaSequence("YCU-RG", name="rna2")
        self.rna3 = RnaSequence("CAA-NR", name="rna3")
        self.model1 = ArraySequence("UCAGGG",
                                    name="rna1",
                                    alphabet=RNA.alphabets.degen_gapped)
        self.model2 = ArraySequence("YCU-RG",
                                    name="rna2",
                                    alphabet=RNA.alphabets.degen_gapped)
        self.model3 = ArraySequence("CAA-NR",
                                    name="rna3",
                                    alphabet=RNA.alphabets.degen_gapped)

        self.aln = Alignment([self.rna1, self.rna2, self.rna3], moltype=RNA)
        self.da = ArrayAlignment(
            [self.model1, self.model2, self.model3],
            moltype=RNA,
            alphabet=RNA.alphabets.degen_gapped,
        )

        # seqs no name
        self.nn_rna1 = RnaSequence("UCAGGG")
        self.nn_rna2 = RnaSequence("YCU-RG")
        self.nn_rna3 = RnaSequence("CAA-NR")

        self.nn_model1 = ArraySequence("UCAGGG",
                                       alphabet=RNA.alphabets.degen_gapped)
        self.nn_model2 = ArraySequence("YCU-RG",
                                       alphabet=RNA.alphabets.degen_gapped)
        self.nn_model3 = ArraySequence("CAA-NR",
                                       alphabet=RNA.alphabets.degen_gapped)

        self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3],
                                moltype=RNA)
        self.nn_da = ArrayAlignment(
            [self.nn_model1, self.nn_model2, self.nn_model3],
            moltype=RNA,
            alphabet=RNA.alphabets.degen_gapped,
        )