Пример #1
0
    def test_to_file(self):
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS,
                                gr=self.GR)

        with tempfile.NamedTemporaryFile('r+') as temp_file:
            st.to_file(temp_file)
            temp_file.flush()
            temp_file.seek(0)
            obs = temp_file.read()
            exp = ('# STOCKHOLM 1.0\n'
                   '#=GF AC RF00360\n'
                   '#=GF BM cmbuild  -F CM SEED\n'
                   '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
                   '#=GF SQ 9\n'
                   '#=GF RN [1]\n'
                   '#=GF RM 11469857\n'
                   '#=GF RT TITLE1\n'
                   '#=GF RA Auth1;\n'
                   '#=GF RL J Mol Biol\n'
                   '#=GF RN [2]\n'
                   '#=GF RM 12007400\n'
                   '#=GF RT TITLE2\n'
                   '#=GF RA Auth2;\n'
                   '#=GF RL Cell\n'
                   '#=GS seq1 AC 111\n'
                   '#=GS seq2 AC 222\n'
                   'seq1          ACC-G-GGTA\n'
                   '#=GR seq1 SS  1110101111\n'
                   'seq2          TCC-G-GGCA\n'
                   '#=GR seq2 SS  0110101110\n'
                   '#=GC SS_cons  (((....)))\n//')
        self.assertEqual(obs, exp)
Пример #2
0
    def test_to_file(self):
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS,
                                gr=self.GR)

        with tempfile.NamedTemporaryFile('r+') as temp_file:
            st.to_file(temp_file)
            temp_file.flush()
            temp_file.seek(0)
            obs = temp_file.read()
            exp = ('# STOCKHOLM 1.0\n'
                   '#=GF AC RF00360\n'
                   '#=GF BM cmbuild  -F CM SEED\n'
                   '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
                   '#=GF SQ 9\n'
                   '#=GF RN [1]\n'
                   '#=GF RM 11469857\n'
                   '#=GF RT TITLE1\n'
                   '#=GF RA Auth1;\n'
                   '#=GF RL J Mol Biol\n'
                   '#=GF RN [2]\n'
                   '#=GF RM 12007400\n'
                   '#=GF RT TITLE2\n'
                   '#=GF RA Auth2;\n'
                   '#=GF RL Cell\n'
                   '#=GS seq1 AC 111\n'
                   '#=GS seq2 AC 222\n'
                   'seq1          ACC-G-GGTA\n'
                   '#=GR seq1 SS  1110101111\n'
                   'seq2          TCC-G-GGCA\n'
                   '#=GR seq2 SS  0110101110\n'
                   '#=GC SS_cons  (((....)))\n//')
        self.assertEqual(obs, exp)
Пример #3
0
 def test_from_file_GS(self):
     sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
                    "seq1          ACC-G-GGTA\n"
                    "seq2          TCC-G-GGCA\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
     self.assertEqual(obs_sto, exp_sto)
Пример #4
0
 def test_from_file_GC(self):
     sto = StringIO("# STOCKHOLM 1.0\n"
                    "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n"
                    "#=GC SS_cons (((....)))\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs, {}, {}, {}, self.GC)
     self.assertEqual(obs_sto, exp_sto)
Пример #5
0
 def test_from_file_GR(self):
     sto = StringIO("# STOCKHOLM 1.0\nseq1          ACC-G\n"
                    "#=GR seq1 SS  11101\nseq2          TCC-G\n"
                    "#=GR seq2 SS  01101\n\nseq1          -GGTA\n"
                    "#=GR seq1 SS  01111\nseq2          -GGCA\n"
                    "#=GR seq2 SS  01110\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
     self.assertEqual(obs_sto, exp_sto)
Пример #6
0
 def test_from_file_alignment(self):
     """make sure can parse basic sto file with interleaved alignment"""
     sto = StringIO("# STOCKHOLM 1.0\n"
                    "seq1      ACC-G\n"
                    "seq2      TCC-G\n\n"
                    "seq1      -GGTA\n"
                    "seq2      -GGCA\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs)
     self.assertEqual(obs_sto, exp_sto)
Пример #7
0
 def test_from_file_GF(self):
     # remove rn line to make sure auto-added
     self.GF.pop("RN")
     sto = StringIO("# STOCKHOLM 1.0\n#=GF RN [1]\n#=GF RM 11469857\n"
                    "#=GF RT TITLE1\n#=GF RA Auth1;\n#=GF RL J Mol Biol\n"
                    "#=GF RN [2]\n#=GF RM 12007400\n#=GF RT TITLE2\n"
                    "#=GF RA Auth2;\n#=GF RL Cell\n#=GF AC RF00360\n"
                    "#=GF BM cmbuild  -F CM SEED\n"
                    "#=GF BM cmsearch  -Z 274931 -E 1000000\n#=GF SQ 9\n"
                    "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs, self.GF, {}, {}, {})
     self.assertEqual(obs_sto, exp_sto)
Пример #8
0
 def test_str(self):
     """ Make sure stockholm with all information contained is formatted
     correctly """
     st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS,
                             gr=self.GR)
     obs = str(st)
     exp = ('# STOCKHOLM 1.0\n'
            '#=GF AC RF00360\n'
            '#=GF BM cmbuild  -F CM SEED\n'
            '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
            '#=GF SQ 9\n'
            '#=GF RN [1]\n'
            '#=GF RM 11469857\n'
            '#=GF RT TITLE1\n'
            '#=GF RA Auth1;\n'
            '#=GF RL J Mol Biol\n'
            '#=GF RN [2]\n'
            '#=GF RM 12007400\n'
            '#=GF RT TITLE2\n'
            '#=GF RA Auth2;\n'
            '#=GF RL Cell\n'
            '#=GS seq1 AC 111\n'
            '#=GS seq2 AC 222\n'
            'seq1          ACC-G-GGTA\n'
            '#=GR seq1 SS  1110101111\n'
            'seq2          TCC-G-GGCA\n'
            '#=GR seq2 SS  0110101110\n'
            '#=GC SS_cons  (((....)))\n//')
     self.assertEqual(obs, exp)
Пример #9
0
 def test_from_file_GC(self):
     sto = StringIO("# STOCKHOLM 1.0\n"
                    "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n"
                    "#=GC SS_cons (((....)))\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs, {}, {}, {}, self.GC)
     self.assertEqual(obs_sto, exp_sto)
Пример #10
0
 def test_from_file_GS(self):
     sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
                    "seq1          ACC-G-GGTA\n"
                    "seq2          TCC-G-GGCA\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
     self.assertEqual(obs_sto, exp_sto)
Пример #11
0
 def test_str_gf(self):
     st = StockholmAlignment(self.seqs,
                             gc=None,
                             gf=self.GF,
                             gs=None,
                             gr=None)
     obs = str(st)
     exp = ('# STOCKHOLM 1.0\n'
            '#=GF AC RF00360\n'
            '#=GF BM cmbuild  -F CM SEED\n'
            '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
            '#=GF SQ 9\n'
            '#=GF RN [1]\n'
            '#=GF RM 11469857\n'
            '#=GF RT TITLE1\n'
            '#=GF RA Auth1;\n'
            '#=GF RL J Mol Biol\n'
            '#=GF RN [2]\n'
            '#=GF RM 12007400\n'
            '#=GF RT TITLE2\n'
            '#=GF RA Auth2;\n'
            '#=GF RL Cell\n'
            'seq1          ACC-G-GGTA\n'
            'seq2          TCC-G-GGCA\n//')
     self.assertEqual(obs, exp)
Пример #12
0
 def test_from_file_alignment(self):
     """make sure can parse basic sto file with interleaved alignment"""
     sto = StringIO(
         "# STOCKHOLM 1.0\n" "seq1      ACC-G\n" "seq2      TCC-G\n\n" "seq1      -GGTA\n" "seq2      -GGCA\n//"
     )
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs)
     self.assertEqual(obs_sto, exp_sto)
Пример #13
0
 def test_from_file_GR(self):
     sto = StringIO("# STOCKHOLM 1.0\nseq1          ACC-G\n"
                    "#=GR seq1 SS  11101\nseq2          TCC-G\n"
                    "#=GR seq2 SS  01101\n\nseq1          -GGTA\n"
                    "#=GR seq1 SS  01111\nseq2          -GGCA\n"
                    "#=GR seq2 SS  01110\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
     self.assertEqual(obs_sto, exp_sto)
Пример #14
0
 def test_from_file_multi(self):
     sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
                    "seq1          ACC-G-GGTA\n"
                    "seq2          TCC-G-GGCA\n//\n"
                    "# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
                    "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
                    "#=GR seq2 SS  0110101110\n//")
     obs_sto = StockholmAlignment.from_file(sto, DNA)
     count = 0
     for obs in obs_sto:
         if count == 0:
             exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
             self.assertEqual(obs, exp_sto)
         elif count == 1:
             exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
             self.assertEqual(obs, exp_sto)
         else:
             raise AssertionError("More than 2 sto alignments parsed!")
         count += 1
Пример #15
0
 def test_str_gc(self):
     """ Make sure stockholm with only GC information contained is formatted
     correctly """
     st = StockholmAlignment(self.seqs, gc=self.GC, gf=None, gs=None,
                             gr=None)
     obs = str(st)
     exp = ("# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
            "seq2          TCC-G-GGCA\n"
            "#=GC SS_cons  (((....)))\n//")
     self.assertEqual(obs, exp)
Пример #16
0
 def test_str_gr(self):
     """ Make sure stockholm with only GR information contained is formatted
     correctly """
     st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=None,
                             gr=self.GR)
     obs = str(st)
     exp = ("# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
            "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
            "#=GR seq2 SS  0110101110\n//")
     self.assertEqual(obs, exp)
Пример #17
0
 def test_str_gr(self):
     st = StockholmAlignment(self.seqs,
                             gc=None,
                             gf=None,
                             gs=None,
                             gr=self.GR)
     obs = str(st)
     exp = ("# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
            "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
            "#=GR seq2 SS  0110101110\n//")
     self.assertEqual(obs, exp)
Пример #18
0
 def test_str_gc(self):
     st = StockholmAlignment(self.seqs,
                             gc=self.GC,
                             gf=None,
                             gs=None,
                             gr=None)
     obs = str(st)
     exp = ("# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
            "seq2          TCC-G-GGCA\n"
            "#=GC SS_cons  (((....)))\n//")
     self.assertEqual(obs, exp)
Пример #19
0
    def test_str_trees(self):
        """ Make sure stockholm with trees printed correctly"""
        GF = OrderedDict({"NH": ["IMATREE", "IMATREETOO"],
                          "TN": ["Tree2", "Tree1"]})
        st = StockholmAlignment(self.seqs, gc=None, gf=GF, gs=None,
                                gr=None)
        obs = str(st)
        exp = ("# STOCKHOLM 1.0\n#=GF TN Tree2\n#=GF NH IMATREE\n#=GF TN Tree1"
               "\n#=GF NH IMATREETOO\nseq1          ACC-G-GGTA\n"
               "seq2          TCC-G-GGCA\n//")

        self.assertEqual(obs, exp)
Пример #20
0
 def test_str_gs(self):
     """ Make sure stockholm with only GS information contained is formatted
     correctly """
     st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=self.GS,
                             gr=None)
     obs = str(st)
     exp = ('# STOCKHOLM 1.0\n'
            '#=GS seq1 AC 111\n'
            '#=GS seq2 AC 222\n'
            'seq1          ACC-G-GGTA\n'
            'seq2          TCC-G-GGCA\n//')
     self.assertEqual(obs, exp)
Пример #21
0
 def test_from_file_GF(self):
     # remove rn line to make sure auto-added
     self.GF.pop("RN")
     sto = StringIO("# STOCKHOLM 1.0\n#=GF RN [1]\n#=GF RM 11469857\n"
                    "#=GF RT TITLE1\n#=GF RA Auth1;\n#=GF RL J Mol Biol\n"
                    "#=GF RN [2]\n#=GF RM 12007400\n#=GF RT TITLE2\n"
                    "#=GF RA Auth2;\n#=GF RL Cell\n#=GF AC RF00360\n"
                    "#=GF BM cmbuild  -F CM SEED\n"
                    "#=GF BM cmsearch  -Z 274931 -E 1000000\n#=GF SQ 9\n"
                    "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n//")
     obs_sto = next(StockholmAlignment.from_file(sto, DNA))
     exp_sto = StockholmAlignment(self.seqs, self.GF, {}, {}, {})
     self.assertEqual(obs_sto, exp_sto)
Пример #22
0
 def setUp(self):
     """Setup for stockholm tests."""
     self.seqs = [DNASequence("ACC-G-GGTA", id="seq1"),
                  DNASequence("TCC-G-GGCA", id="seq2")]
     self.GF = OrderedDict([
         ("AC", "RF00360"),
         ("BM", ["cmbuild  -F CM SEED",
                 "cmsearch  -Z 274931 -E 1000000"]),
         ("SQ", "9"),
         ("RT", ["TITLE1",  "TITLE2"]),
         ("RN", ["[1]", "[2]"]),
         ("RA", ["Auth1;", "Auth2;"]),
         ("RL", ["J Mol Biol", "Cell"]),
         ("RM", ["11469857", "12007400"]),
         ('RN', ['[1]', '[2]'])
     ])
     self.GS = {"AC": OrderedDict([("seq1", "111"), ("seq2", "222")])}
     self.GR = {"SS": OrderedDict([("seq1", "1110101111"),
                                   ("seq2", "0110101110")])}
     self.GC = {"SS_cons": "(((....)))"}
     self.st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF,
                                  gs=self.GS, gr=self.GR)
Пример #23
0
 def test_str_gs(self):
     st = StockholmAlignment(self.seqs,
                             gc=None,
                             gf=None,
                             gs=self.GS,
                             gr=None)
     obs = str(st)
     exp = ('# STOCKHOLM 1.0\n'
            '#=GS seq1 AC 111\n'
            '#=GS seq2 AC 222\n'
            'seq1          ACC-G-GGTA\n'
            'seq2          TCC-G-GGCA\n//')
     self.assertEqual(obs, exp)
Пример #24
0
 def test_from_file_multi(self):
     sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
                    "seq1          ACC-G-GGTA\n"
                    "seq2          TCC-G-GGCA\n//\n"
                    "# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
                    "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
                    "#=GR seq2 SS  0110101110\n//")
     obs_sto = StockholmAlignment.from_file(sto, DNA)
     count = 0
     for obs in obs_sto:
         if count == 0:
             exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
             self.assertEqual(obs, exp_sto)
         elif count == 1:
             exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
             self.assertEqual(obs, exp_sto)
         else:
             raise AssertionError("More than 2 sto alignments parsed!")
         count += 1
Пример #25
0
 def setUp(self):
     """Setup for stockholm tests."""
     self.seqs = [DNASequence("ACC-G-GGTA", id="seq1"), DNASequence("TCC-G-GGCA", id="seq2")]
     self.GF = OrderedDict(
         [
             ("AC", "RF00360"),
             ("BM", ["cmbuild  -F CM SEED", "cmsearch  -Z 274931 -E 1000000"]),
             ("SQ", "9"),
             ("RT", ["TITLE1", "TITLE2"]),
             ("RN", ["[1]", "[2]"]),
             ("RA", ["Auth1;", "Auth2;"]),
             ("RL", ["J Mol Biol", "Cell"]),
             ("RM", ["11469857", "12007400"]),
             ("RN", ["[1]", "[2]"]),
         ]
     )
     self.GS = {"AC": OrderedDict([("seq1", "111"), ("seq2", "222")])}
     self.GR = {"SS": OrderedDict([("seq1", "1110101111"), ("seq2", "0110101110")])}
     self.GC = {"SS_cons": "(((....)))"}
     self.st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS, gr=self.GR)
Пример #26
0
 def setUp(self):
     self.seqs = [DNA("ACC-G-GGTA", metadata={'id': "seq1"}),
                  DNA("TCC-G-GGCA", metadata={'id': "seq2"})]
     self.GF = OrderedDict([
         ("AC", "RF00360"),
         ("BM", ["cmbuild  -F CM SEED",
                 "cmsearch  -Z 274931 -E 1000000"]),
         ("SQ", "9"),
         ("RT", ["TITLE1",  "TITLE2"]),
         ("RN", ["[1]", "[2]"]),
         ("RA", ["Auth1;", "Auth2;"]),
         ("RL", ["J Mol Biol", "Cell"]),
         ("RM", ["11469857", "12007400"]),
         ('RN', ['[1]', '[2]'])
     ])
     self.GS = {"AC": OrderedDict([("seq1", "111"), ("seq2", "222")])}
     self.GR = {"SS": OrderedDict([("seq1", "1110101111"),
                                   ("seq2", "0110101110")])}
     self.GC = {"SS_cons": "(((....)))"}
     self.st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF,
                                  gs=self.GS, gr=self.GR)
Пример #27
0
class StockholmAlignmentTests(TestCase):

    """Tests for stockholmAlignment object"""

    def setUp(self):
        """Setup for stockholm tests."""
        self.seqs = [DNASequence("ACC-G-GGTA", id="seq1"), DNASequence("TCC-G-GGCA", id="seq2")]
        self.GF = OrderedDict(
            [
                ("AC", "RF00360"),
                ("BM", ["cmbuild  -F CM SEED", "cmsearch  -Z 274931 -E 1000000"]),
                ("SQ", "9"),
                ("RT", ["TITLE1", "TITLE2"]),
                ("RN", ["[1]", "[2]"]),
                ("RA", ["Auth1;", "Auth2;"]),
                ("RL", ["J Mol Biol", "Cell"]),
                ("RM", ["11469857", "12007400"]),
                ("RN", ["[1]", "[2]"]),
            ]
        )
        self.GS = {"AC": OrderedDict([("seq1", "111"), ("seq2", "222")])}
        self.GR = {"SS": OrderedDict([("seq1", "1110101111"), ("seq2", "0110101110")])}
        self.GC = {"SS_cons": "(((....)))"}
        self.st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS, gr=self.GR)

    def test_retrieve_metadata(self):
        self.assertEqual(self.st.gc, self.GC)
        self.assertEqual(self.st.gf, self.GF)
        self.assertEqual(self.st.gs, self.GS)
        self.assertEqual(self.st.gr, self.GR)

    def test_from_file_alignment(self):
        """make sure can parse basic sto file with interleaved alignment"""
        sto = StringIO(
            "# STOCKHOLM 1.0\n" "seq1      ACC-G\n" "seq2      TCC-G\n\n" "seq1      -GGTA\n" "seq2      -GGCA\n//"
        )
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs)
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GF(self):
        """Make sure GF lines are parsed correctly"""
        # remove rn line to make sure auto-added
        self.GF.pop("RN")
        sto = StringIO(
            "# STOCKHOLM 1.0\n#=GF RN [1]\n#=GF RM 11469857\n"
            "#=GF RT TITLE1\n#=GF RA Auth1;\n#=GF RL J Mol Biol\n"
            "#=GF RN [2]\n#=GF RM 12007400\n#=GF RT TITLE2\n"
            "#=GF RA Auth2;\n#=GF RL Cell\n#=GF AC RF00360\n"
            "#=GF BM cmbuild  -F CM SEED\n"
            "#=GF BM cmsearch  -Z 274931 -E 1000000\n#=GF SQ 9\n"
            "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n//"
        )
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, self.GF, {}, {}, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GC(self):
        """Make sure GC lines are parsed correctly"""
        sto = StringIO(
            "# STOCKHOLM 1.0\n" "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n" "#=GC SS_cons (((....)))\n//"
        )
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, {}, {}, self.GC)
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GS(self):
        """Make sure GS lines are parsed correctly"""
        sto = StringIO(
            "# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
            "seq1          ACC-G-GGTA\n"
            "seq2          TCC-G-GGCA\n//"
        )
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GR(self):
        """Make sure GR lines are parsed correctly"""
        sto = StringIO(
            "# STOCKHOLM 1.0\nseq1          ACC-G\n"
            "#=GR seq1 SS  11101\nseq2          TCC-G\n"
            "#=GR seq2 SS  01101\n\nseq1          -GGTA\n"
            "#=GR seq1 SS  01111\nseq2          -GGCA\n"
            "#=GR seq2 SS  01110\n//"
        )
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_multi(self):
        """Make sure yield works correctly with multi-alignment sto files"""
        sto = StringIO(
            "# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
            "seq1          ACC-G-GGTA\n"
            "seq2          TCC-G-GGCA\n//\n"
            "# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
            "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
            "#=GR seq2 SS  0110101110\n//"
        )
        obs_sto = StockholmAlignment.from_file(sto, DNA)
        count = 0
        for obs in obs_sto:
            if count == 0:
                exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
                self.assertEqual(obs, exp_sto)
            elif count == 1:
                exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
                self.assertEqual(obs, exp_sto)
            else:
                raise AssertionError("More than 2 sto alignments parsed!")
            count += 1

    def test_parse_gf_multiline_nh(self):
        """Makes sure a multiline NH code is parsed correctly"""
        sto = ["#=GF TN MULTILINE TREE", "#=GF NH THIS IS FIRST", "#=GF NH THIS IS SECOND", "#=GF AC 1283394"]
        exp = {"TN": "MULTILINE TREE", "NH": "THIS IS FIRST THIS IS SECOND", "AC": "1283394"}
        self.assertEqual(self.st._parse_gf_info(sto), exp)

    def test_parse_gf_multiline_cc(self):
        """Makes sure a multiline CC code is parsed correctly"""
        sto = ["#=GF CC THIS IS FIRST", "#=GF CC THIS IS SECOND"]
        exp = {"CC": "THIS IS FIRST THIS IS SECOND"}
        self.assertEqual(self.st._parse_gf_info(sto), exp)

    def test_parse_gf_info_nongf(self):
        """Makes sure error raised if non-GF line passed"""
        sto = ["#=GF AC BLAAAAAAAHHH", "#=GC HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gf_info_malformed(self):
        """Makes sure error raised if too short a line passed"""
        sto = ["#=GF AC", "#=GF"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gc_info_nongf(self):
        """Makes sure error raised if non-GC line passed"""
        sto = ["#=GC AC BLAAAAAAAHHH", "#=GF HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gc_info_strict_len(self):
        """Make sure error raised if GC lines bad length and strict parsing"""
        sto = ["#=GC SS_cons (((..)))"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto, seqlen=20, strict=True)

    def test_parse_gc_info_strict_duplicate(self):
        """Make sure error raised if GC lines repeated"""
        sto = ["#=GC SS_cons (((..)))", "#=GC SS_cons (((..)))"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto, seqlen=8, strict=True)

    def test_parse_gc_info_malformed(self):
        """Makes sure error raised if too short a line passed"""
        sto = ["#=GC AC BLAAAAAAAHHH", "#=GC"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto)

    def test_parse_gs_gr_info_mixed(self):
        """Makes sure error raised if mixed GS and GR lines passed"""
        sto = ["#=GS seq1 AC BLAAA", "#=GR seq2 HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto)

    def test_parse_gs_gr_info_malformed(self):
        """Makes sure error raised if too short a line passed"""
        sto = ["#=GS AC BLAAAAAAAHHH", "#=GS"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto)

    def test_parse_gs_gr_info_strict(self):
        """Make sure error raised if GR lines bad length and strict parsing"""
        sto = ["#=GR seq1 SS  10101111", "#=GR seq2 SS  01101"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto, seqlen=20, strict=True)

    def test_str(self):
        """ Make sure stockholm with all information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS, gr=self.GR)
        obs = str(st)
        exp = (
            "# STOCKHOLM 1.0\n"
            "#=GF AC RF00360\n"
            "#=GF BM cmbuild  -F CM SEED\n"
            "#=GF BM cmsearch  -Z 274931 -E 1000000\n"
            "#=GF SQ 9\n"
            "#=GF RN [1]\n"
            "#=GF RM 11469857\n"
            "#=GF RT TITLE1\n"
            "#=GF RA Auth1;\n"
            "#=GF RL J Mol Biol\n"
            "#=GF RN [2]\n"
            "#=GF RM 12007400\n"
            "#=GF RT TITLE2\n"
            "#=GF RA Auth2;\n"
            "#=GF RL Cell\n"
            "#=GS seq1 AC 111\n"
            "#=GS seq2 AC 222\n"
            "seq1          ACC-G-GGTA\n"
            "#=GR seq1 SS  1110101111\n"
            "seq2          TCC-G-GGCA\n"
            "#=GR seq2 SS  0110101110\n"
            "#=GC SS_cons  (((....)))\n//"
        )
        self.assertEqual(obs, exp)

    def test_to_file(self):
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS, gr=self.GR)

        with tempfile.NamedTemporaryFile("r+") as temp_file:
            st.to_file(temp_file)
            temp_file.flush()
            temp_file.seek(0)
            obs = temp_file.read()
            exp = (
                "# STOCKHOLM 1.0\n"
                "#=GF AC RF00360\n"
                "#=GF BM cmbuild  -F CM SEED\n"
                "#=GF BM cmsearch  -Z 274931 -E 1000000\n"
                "#=GF SQ 9\n"
                "#=GF RN [1]\n"
                "#=GF RM 11469857\n"
                "#=GF RT TITLE1\n"
                "#=GF RA Auth1;\n"
                "#=GF RL J Mol Biol\n"
                "#=GF RN [2]\n"
                "#=GF RM 12007400\n"
                "#=GF RT TITLE2\n"
                "#=GF RA Auth2;\n"
                "#=GF RL Cell\n"
                "#=GS seq1 AC 111\n"
                "#=GS seq2 AC 222\n"
                "seq1          ACC-G-GGTA\n"
                "#=GR seq1 SS  1110101111\n"
                "seq2          TCC-G-GGCA\n"
                "#=GR seq2 SS  0110101110\n"
                "#=GC SS_cons  (((....)))\n//"
            )
        self.assertEqual(obs, exp)

    def test_str_gc(self):
        """ Make sure stockholm with only GC information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=None, gs=None, gr=None)
        obs = str(st)
        exp = "# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n" "seq2          TCC-G-GGCA\n" "#=GC SS_cons  (((....)))\n//"
        self.assertEqual(obs, exp)

    def test_str_gf(self):
        """ Make sure stockholm with only GF information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=None, gf=self.GF, gs=None, gr=None)
        obs = str(st)
        exp = (
            "# STOCKHOLM 1.0\n"
            "#=GF AC RF00360\n"
            "#=GF BM cmbuild  -F CM SEED\n"
            "#=GF BM cmsearch  -Z 274931 -E 1000000\n"
            "#=GF SQ 9\n"
            "#=GF RN [1]\n"
            "#=GF RM 11469857\n"
            "#=GF RT TITLE1\n"
            "#=GF RA Auth1;\n"
            "#=GF RL J Mol Biol\n"
            "#=GF RN [2]\n"
            "#=GF RM 12007400\n"
            "#=GF RT TITLE2\n"
            "#=GF RA Auth2;\n"
            "#=GF RL Cell\n"
            "seq1          ACC-G-GGTA\n"
            "seq2          TCC-G-GGCA\n//"
        )
        self.assertEqual(obs, exp)

    def test_str_gs(self):
        """ Make sure stockholm with only GS information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=self.GS, gr=None)
        obs = str(st)
        exp = (
            "# STOCKHOLM 1.0\n"
            "#=GS seq1 AC 111\n"
            "#=GS seq2 AC 222\n"
            "seq1          ACC-G-GGTA\n"
            "seq2          TCC-G-GGCA\n//"
        )
        self.assertEqual(obs, exp)

    def test_str_gr(self):
        """ Make sure stockholm with only GR information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=None, gr=self.GR)
        obs = str(st)
        exp = (
            "# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
            "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
            "#=GR seq2 SS  0110101110\n//"
        )
        self.assertEqual(obs, exp)

    def test_str_trees(self):
        """ Make sure stockholm with trees printed correctly"""
        GF = OrderedDict({"NH": ["IMATREE", "IMATREETOO"], "TN": ["Tree2", "Tree1"]})
        st = StockholmAlignment(self.seqs, gc=None, gf=GF, gs=None, gr=None)
        obs = str(st)
        exp = (
            "# STOCKHOLM 1.0\n#=GF TN Tree2\n#=GF NH IMATREE\n#=GF TN Tree1"
            "\n#=GF NH IMATREETOO\nseq1          ACC-G-GGTA\n"
            "seq2          TCC-G-GGCA\n//"
        )

        self.assertEqual(obs, exp)
Пример #28
0
class StockholmAlignmentTests(TestCase):
    def setUp(self):
        self.seqs = [DNA("ACC-G-GGTA", id="seq1"),
                     DNA("TCC-G-GGCA", id="seq2")]
        self.GF = OrderedDict([
            ("AC", "RF00360"),
            ("BM", ["cmbuild  -F CM SEED",
                    "cmsearch  -Z 274931 -E 1000000"]),
            ("SQ", "9"),
            ("RT", ["TITLE1",  "TITLE2"]),
            ("RN", ["[1]", "[2]"]),
            ("RA", ["Auth1;", "Auth2;"]),
            ("RL", ["J Mol Biol", "Cell"]),
            ("RM", ["11469857", "12007400"]),
            ('RN', ['[1]', '[2]'])
        ])
        self.GS = {"AC": OrderedDict([("seq1", "111"), ("seq2", "222")])}
        self.GR = {"SS": OrderedDict([("seq1", "1110101111"),
                                      ("seq2", "0110101110")])}
        self.GC = {"SS_cons": "(((....)))"}
        self.st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF,
                                     gs=self.GS, gr=self.GR)

    def test_retrieve_metadata(self):
        self.assertEqual(self.st.gc, self.GC)
        self.assertEqual(self.st.gf, self.GF)
        self.assertEqual(self.st.gs, self.GS)
        self.assertEqual(self.st.gr, self.GR)

    def test_from_file_alignment(self):
        # test that a basic stockholm file with interleaved alignment can be
        # parsed
        sto = StringIO("# STOCKHOLM 1.0\n"
                       "seq1      ACC-G\n"
                       "seq2      TCC-G\n\n"
                       "seq1      -GGTA\n"
                       "seq2      -GGCA\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs)
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GF(self):
        # remove rn line to make sure auto-added
        self.GF.pop("RN")
        sto = StringIO("# STOCKHOLM 1.0\n#=GF RN [1]\n#=GF RM 11469857\n"
                       "#=GF RT TITLE1\n#=GF RA Auth1;\n#=GF RL J Mol Biol\n"
                       "#=GF RN [2]\n#=GF RM 12007400\n#=GF RT TITLE2\n"
                       "#=GF RA Auth2;\n#=GF RL Cell\n#=GF AC RF00360\n"
                       "#=GF BM cmbuild  -F CM SEED\n"
                       "#=GF BM cmsearch  -Z 274931 -E 1000000\n#=GF SQ 9\n"
                       "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, self.GF, {}, {}, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GC(self):
        sto = StringIO("# STOCKHOLM 1.0\n"
                       "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n"
                       "#=GC SS_cons (((....)))\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, {}, {}, self.GC)
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GS(self):
        sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
                       "seq1          ACC-G-GGTA\n"
                       "seq2          TCC-G-GGCA\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GR(self):
        sto = StringIO("# STOCKHOLM 1.0\nseq1          ACC-G\n"
                       "#=GR seq1 SS  11101\nseq2          TCC-G\n"
                       "#=GR seq2 SS  01101\n\nseq1          -GGTA\n"
                       "#=GR seq1 SS  01111\nseq2          -GGCA\n"
                       "#=GR seq2 SS  01110\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_multi(self):
        sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
                       "seq1          ACC-G-GGTA\n"
                       "seq2          TCC-G-GGCA\n//\n"
                       "# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
                       "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
                       "#=GR seq2 SS  0110101110\n//")
        obs_sto = StockholmAlignment.from_file(sto, DNA)
        count = 0
        for obs in obs_sto:
            if count == 0:
                exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
                self.assertEqual(obs, exp_sto)
            elif count == 1:
                exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
                self.assertEqual(obs, exp_sto)
            else:
                raise AssertionError("More than 2 sto alignments parsed!")
            count += 1

    def test_parse_gf_multiline_nh(self):
        sto = ["#=GF TN MULTILINE TREE",
               "#=GF NH THIS IS FIRST", "#=GF NH THIS IS SECOND",
               "#=GF AC 1283394"]
        exp = {'TN': 'MULTILINE TREE',
               'NH': 'THIS IS FIRST THIS IS SECOND',
               'AC': '1283394'}
        self.assertEqual(self.st._parse_gf_info(sto), exp)

    def test_parse_gf_multiline_cc(self):
        sto = ["#=GF CC THIS IS FIRST", "#=GF CC THIS IS SECOND"]
        exp = {'CC': 'THIS IS FIRST THIS IS SECOND'}
        self.assertEqual(self.st._parse_gf_info(sto), exp)

    def test_parse_gf_info_nongf(self):
        sto = ["#=GF AC BLAAAAAAAHHH", "#=GC HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gf_info_malformed(self):
        # too short of a line
        sto = ["#=GF AC", "#=GF"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gc_info_nongf(self):
        sto = ["#=GC AC BLAAAAAAAHHH", "#=GF HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gc_info_strict_len(self):
        sto = ["#=GC SS_cons (((..)))"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto, seqlen=20, strict=True)

    def test_parse_gc_info_strict_duplicate(self):
        sto = ["#=GC SS_cons (((..)))", "#=GC SS_cons (((..)))"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto, seqlen=8, strict=True)

    def test_parse_gc_info_malformed(self):
        # too short of a line
        sto = ["#=GC AC BLAAAAAAAHHH", "#=GC"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto)

    def test_parse_gs_gr_info_mixed(self):
        sto = ["#=GS seq1 AC BLAAA", "#=GR seq2 HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto)

    def test_parse_gs_gr_info_malformed(self):
        # too short of a line
        sto = ["#=GS AC BLAAAAAAAHHH", "#=GS"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto)

    def test_parse_gs_gr_info_strict(self):
        sto = ["#=GR seq1 SS  10101111", "#=GR seq2 SS  01101"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto, seqlen=20, strict=True)

    def test_str(self):
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS,
                                gr=self.GR)
        obs = str(st)
        exp = ('# STOCKHOLM 1.0\n'
               '#=GF AC RF00360\n'
               '#=GF BM cmbuild  -F CM SEED\n'
               '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
               '#=GF SQ 9\n'
               '#=GF RN [1]\n'
               '#=GF RM 11469857\n'
               '#=GF RT TITLE1\n'
               '#=GF RA Auth1;\n'
               '#=GF RL J Mol Biol\n'
               '#=GF RN [2]\n'
               '#=GF RM 12007400\n'
               '#=GF RT TITLE2\n'
               '#=GF RA Auth2;\n'
               '#=GF RL Cell\n'
               '#=GS seq1 AC 111\n'
               '#=GS seq2 AC 222\n'
               'seq1          ACC-G-GGTA\n'
               '#=GR seq1 SS  1110101111\n'
               'seq2          TCC-G-GGCA\n'
               '#=GR seq2 SS  0110101110\n'
               '#=GC SS_cons  (((....)))\n//')
        self.assertEqual(obs, exp)

    def test_to_file(self):
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS,
                                gr=self.GR)

        with tempfile.NamedTemporaryFile('r+') as temp_file:
            st.to_file(temp_file)
            temp_file.flush()
            temp_file.seek(0)
            obs = temp_file.read()
            exp = ('# STOCKHOLM 1.0\n'
                   '#=GF AC RF00360\n'
                   '#=GF BM cmbuild  -F CM SEED\n'
                   '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
                   '#=GF SQ 9\n'
                   '#=GF RN [1]\n'
                   '#=GF RM 11469857\n'
                   '#=GF RT TITLE1\n'
                   '#=GF RA Auth1;\n'
                   '#=GF RL J Mol Biol\n'
                   '#=GF RN [2]\n'
                   '#=GF RM 12007400\n'
                   '#=GF RT TITLE2\n'
                   '#=GF RA Auth2;\n'
                   '#=GF RL Cell\n'
                   '#=GS seq1 AC 111\n'
                   '#=GS seq2 AC 222\n'
                   'seq1          ACC-G-GGTA\n'
                   '#=GR seq1 SS  1110101111\n'
                   'seq2          TCC-G-GGCA\n'
                   '#=GR seq2 SS  0110101110\n'
                   '#=GC SS_cons  (((....)))\n//')
        self.assertEqual(obs, exp)

    def test_str_gc(self):
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=None, gs=None,
                                gr=None)
        obs = str(st)
        exp = ("# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
               "seq2          TCC-G-GGCA\n"
               "#=GC SS_cons  (((....)))\n//")
        self.assertEqual(obs, exp)

    def test_str_gf(self):
        st = StockholmAlignment(self.seqs, gc=None, gf=self.GF, gs=None,
                                gr=None)
        obs = str(st)
        exp = ('# STOCKHOLM 1.0\n'
               '#=GF AC RF00360\n'
               '#=GF BM cmbuild  -F CM SEED\n'
               '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
               '#=GF SQ 9\n'
               '#=GF RN [1]\n'
               '#=GF RM 11469857\n'
               '#=GF RT TITLE1\n'
               '#=GF RA Auth1;\n'
               '#=GF RL J Mol Biol\n'
               '#=GF RN [2]\n'
               '#=GF RM 12007400\n'
               '#=GF RT TITLE2\n'
               '#=GF RA Auth2;\n'
               '#=GF RL Cell\n'
               'seq1          ACC-G-GGTA\n'
               'seq2          TCC-G-GGCA\n//')
        self.assertEqual(obs, exp)

    def test_str_gs(self):
        st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=self.GS,
                                gr=None)
        obs = str(st)
        exp = ('# STOCKHOLM 1.0\n'
               '#=GS seq1 AC 111\n'
               '#=GS seq2 AC 222\n'
               'seq1          ACC-G-GGTA\n'
               'seq2          TCC-G-GGCA\n//')
        self.assertEqual(obs, exp)

    def test_str_gr(self):
        st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=None,
                                gr=self.GR)
        obs = str(st)
        exp = ("# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
               "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
               "#=GR seq2 SS  0110101110\n//")
        self.assertEqual(obs, exp)

    def test_str_trees(self):
        GF = OrderedDict({"NH": ["IMATREE", "IMATREETOO"],
                          "TN": ["Tree2", "Tree1"]})
        st = StockholmAlignment(self.seqs, gc=None, gf=GF, gs=None,
                                gr=None)
        obs = str(st)
        exp = ("# STOCKHOLM 1.0\n#=GF TN Tree2\n#=GF NH IMATREE\n#=GF TN Tree1"
               "\n#=GF NH IMATREETOO\nseq1          ACC-G-GGTA\n"
               "seq2          TCC-G-GGCA\n//")

        self.assertEqual(obs, exp)
Пример #29
0
class StockholmAlignmentTests(TestCase):

    """Tests for stockholmAlignment object"""

    def setUp(self):
        """Setup for stockholm tests."""
        self.seqs = [DNASequence("ACC-G-GGTA", id="seq1"),
                     DNASequence("TCC-G-GGCA", id="seq2")]
        self.GF = OrderedDict([
            ("AC", "RF00360"),
            ("BM", ["cmbuild  -F CM SEED",
                    "cmsearch  -Z 274931 -E 1000000"]),
            ("SQ", "9"),
            ("RT", ["TITLE1",  "TITLE2"]),
            ("RN", ["[1]", "[2]"]),
            ("RA", ["Auth1;", "Auth2;"]),
            ("RL", ["J Mol Biol", "Cell"]),
            ("RM", ["11469857", "12007400"]),
            ('RN', ['[1]', '[2]'])
        ])
        self.GS = {"AC": OrderedDict([("seq1", "111"), ("seq2", "222")])}
        self.GR = {"SS": OrderedDict([("seq1", "1110101111"),
                                      ("seq2", "0110101110")])}
        self.GC = {"SS_cons": "(((....)))"}
        self.st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF,
                                     gs=self.GS, gr=self.GR)

    def test_retrieve_metadata(self):
        self.assertEqual(self.st.gc, self.GC)
        self.assertEqual(self.st.gf, self.GF)
        self.assertEqual(self.st.gs, self.GS)
        self.assertEqual(self.st.gr, self.GR)

    def test_from_file_alignment(self):
        """make sure can parse basic sto file with interleaved alignment"""
        sto = StringIO("# STOCKHOLM 1.0\n"
                       "seq1      ACC-G\n"
                       "seq2      TCC-G\n\n"
                       "seq1      -GGTA\n"
                       "seq2      -GGCA\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs)
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GF(self):
        """Make sure GF lines are parsed correctly"""
        # remove rn line to make sure auto-added
        self.GF.pop("RN")
        sto = StringIO("# STOCKHOLM 1.0\n#=GF RN [1]\n#=GF RM 11469857\n"
                       "#=GF RT TITLE1\n#=GF RA Auth1;\n#=GF RL J Mol Biol\n"
                       "#=GF RN [2]\n#=GF RM 12007400\n#=GF RT TITLE2\n"
                       "#=GF RA Auth2;\n#=GF RL Cell\n#=GF AC RF00360\n"
                       "#=GF BM cmbuild  -F CM SEED\n"
                       "#=GF BM cmsearch  -Z 274931 -E 1000000\n#=GF SQ 9\n"
                       "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, self.GF, {}, {}, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GC(self):
        """Make sure GC lines are parsed correctly"""
        sto = StringIO("# STOCKHOLM 1.0\n"
                       "seq1         ACC-G-GGTA\nseq2         TCC-G-GGCA\n"
                       "#=GC SS_cons (((....)))\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, {}, {}, self.GC)
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GS(self):
        """Make sure GS lines are parsed correctly"""
        sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
                       "seq1          ACC-G-GGTA\n"
                       "seq2          TCC-G-GGCA\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_GR(self):
        """Make sure GR lines are parsed correctly"""
        sto = StringIO("# STOCKHOLM 1.0\nseq1          ACC-G\n"
                       "#=GR seq1 SS  11101\nseq2          TCC-G\n"
                       "#=GR seq2 SS  01101\n\nseq1          -GGTA\n"
                       "#=GR seq1 SS  01111\nseq2          -GGCA\n"
                       "#=GR seq2 SS  01110\n//")
        obs_sto = next(StockholmAlignment.from_file(sto, DNA))
        exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
        self.assertEqual(obs_sto, exp_sto)

    def test_from_file_multi(self):
        """Make sure yield works correctly with multi-alignment sto files"""
        sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n"
                       "seq1          ACC-G-GGTA\n"
                       "seq2          TCC-G-GGCA\n//\n"
                       "# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
                       "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
                       "#=GR seq2 SS  0110101110\n//")
        obs_sto = StockholmAlignment.from_file(sto, DNA)
        count = 0
        for obs in obs_sto:
            if count == 0:
                exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {})
                self.assertEqual(obs, exp_sto)
            elif count == 1:
                exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {})
                self.assertEqual(obs, exp_sto)
            else:
                raise AssertionError("More than 2 sto alignments parsed!")
            count += 1

    def test_parse_gf_multiline_nh(self):
        """Makes sure a multiline NH code is parsed correctly"""
        sto = ["#=GF TN MULTILINE TREE",
               "#=GF NH THIS IS FIRST", "#=GF NH THIS IS SECOND",
               "#=GF AC 1283394"]
        exp = {'TN': 'MULTILINE TREE',
               'NH': 'THIS IS FIRST THIS IS SECOND',
               'AC': '1283394'}
        self.assertEqual(self.st._parse_gf_info(sto), exp)

    def test_parse_gf_multiline_cc(self):
        """Makes sure a multiline CC code is parsed correctly"""
        sto = ["#=GF CC THIS IS FIRST", "#=GF CC THIS IS SECOND"]
        exp = {'CC': 'THIS IS FIRST THIS IS SECOND'}
        self.assertEqual(self.st._parse_gf_info(sto), exp)

    def test_parse_gf_info_nongf(self):
        """Makes sure error raised if non-GF line passed"""
        sto = ["#=GF AC BLAAAAAAAHHH", "#=GC HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gf_info_malformed(self):
        """Makes sure error raised if too short a line passed"""
        sto = ["#=GF AC", "#=GF"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gc_info_nongf(self):
        """Makes sure error raised if non-GC line passed"""
        sto = ["#=GC AC BLAAAAAAAHHH", "#=GF HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gf_info(sto)

    def test_parse_gc_info_strict_len(self):
        """Make sure error raised if GC lines bad length and strict parsing"""
        sto = ["#=GC SS_cons (((..)))"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto, seqlen=20, strict=True)

    def test_parse_gc_info_strict_duplicate(self):
        """Make sure error raised if GC lines repeated"""
        sto = ["#=GC SS_cons (((..)))", "#=GC SS_cons (((..)))"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto, seqlen=8, strict=True)

    def test_parse_gc_info_malformed(self):
        """Makes sure error raised if too short a line passed"""
        sto = ["#=GC AC BLAAAAAAAHHH", "#=GC"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gc_info(sto)

    def test_parse_gs_gr_info_mixed(self):
        """Makes sure error raised if mixed GS and GR lines passed"""
        sto = ["#=GS seq1 AC BLAAA", "#=GR seq2 HUH THIS SHOULD NOT BE HERE"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto)

    def test_parse_gs_gr_info_malformed(self):
        """Makes sure error raised if too short a line passed"""
        sto = ["#=GS AC BLAAAAAAAHHH", "#=GS"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto)

    def test_parse_gs_gr_info_strict(self):
        """Make sure error raised if GR lines bad length and strict parsing"""
        sto = ["#=GR seq1 SS  10101111", "#=GR seq2 SS  01101"]
        with self.assertRaises(StockholmParseError):
            self.st._parse_gs_gr_info(sto, seqlen=20, strict=True)

    def test_str(self):
        """ Make sure stockholm with all information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS,
                                gr=self.GR)
        obs = str(st)
        exp = ('# STOCKHOLM 1.0\n'
               '#=GF AC RF00360\n'
               '#=GF BM cmbuild  -F CM SEED\n'
               '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
               '#=GF SQ 9\n'
               '#=GF RN [1]\n'
               '#=GF RM 11469857\n'
               '#=GF RT TITLE1\n'
               '#=GF RA Auth1;\n'
               '#=GF RL J Mol Biol\n'
               '#=GF RN [2]\n'
               '#=GF RM 12007400\n'
               '#=GF RT TITLE2\n'
               '#=GF RA Auth2;\n'
               '#=GF RL Cell\n'
               '#=GS seq1 AC 111\n'
               '#=GS seq2 AC 222\n'
               'seq1          ACC-G-GGTA\n'
               '#=GR seq1 SS  1110101111\n'
               'seq2          TCC-G-GGCA\n'
               '#=GR seq2 SS  0110101110\n'
               '#=GC SS_cons  (((....)))\n//')
        self.assertEqual(obs, exp)

    def test_to_file(self):
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS,
                                gr=self.GR)

        with tempfile.NamedTemporaryFile('r+') as temp_file:
            st.to_file(temp_file)
            temp_file.flush()
            temp_file.seek(0)
            obs = temp_file.read()
            exp = ('# STOCKHOLM 1.0\n'
                   '#=GF AC RF00360\n'
                   '#=GF BM cmbuild  -F CM SEED\n'
                   '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
                   '#=GF SQ 9\n'
                   '#=GF RN [1]\n'
                   '#=GF RM 11469857\n'
                   '#=GF RT TITLE1\n'
                   '#=GF RA Auth1;\n'
                   '#=GF RL J Mol Biol\n'
                   '#=GF RN [2]\n'
                   '#=GF RM 12007400\n'
                   '#=GF RT TITLE2\n'
                   '#=GF RA Auth2;\n'
                   '#=GF RL Cell\n'
                   '#=GS seq1 AC 111\n'
                   '#=GS seq2 AC 222\n'
                   'seq1          ACC-G-GGTA\n'
                   '#=GR seq1 SS  1110101111\n'
                   'seq2          TCC-G-GGCA\n'
                   '#=GR seq2 SS  0110101110\n'
                   '#=GC SS_cons  (((....)))\n//')
        self.assertEqual(obs, exp)

    def test_str_gc(self):
        """ Make sure stockholm with only GC information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=self.GC, gf=None, gs=None,
                                gr=None)
        obs = str(st)
        exp = ("# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
               "seq2          TCC-G-GGCA\n"
               "#=GC SS_cons  (((....)))\n//")
        self.assertEqual(obs, exp)

    def test_str_gf(self):
        """ Make sure stockholm with only GF information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=None, gf=self.GF, gs=None,
                                gr=None)
        obs = str(st)
        exp = ('# STOCKHOLM 1.0\n'
               '#=GF AC RF00360\n'
               '#=GF BM cmbuild  -F CM SEED\n'
               '#=GF BM cmsearch  -Z 274931 -E 1000000\n'
               '#=GF SQ 9\n'
               '#=GF RN [1]\n'
               '#=GF RM 11469857\n'
               '#=GF RT TITLE1\n'
               '#=GF RA Auth1;\n'
               '#=GF RL J Mol Biol\n'
               '#=GF RN [2]\n'
               '#=GF RM 12007400\n'
               '#=GF RT TITLE2\n'
               '#=GF RA Auth2;\n'
               '#=GF RL Cell\n'
               'seq1          ACC-G-GGTA\n'
               'seq2          TCC-G-GGCA\n//')
        self.assertEqual(obs, exp)

    def test_str_gs(self):
        """ Make sure stockholm with only GS information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=self.GS,
                                gr=None)
        obs = str(st)
        exp = ('# STOCKHOLM 1.0\n'
               '#=GS seq1 AC 111\n'
               '#=GS seq2 AC 222\n'
               'seq1          ACC-G-GGTA\n'
               'seq2          TCC-G-GGCA\n//')
        self.assertEqual(obs, exp)

    def test_str_gr(self):
        """ Make sure stockholm with only GR information contained is formatted
        correctly """
        st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=None,
                                gr=self.GR)
        obs = str(st)
        exp = ("# STOCKHOLM 1.0\nseq1          ACC-G-GGTA\n"
               "#=GR seq1 SS  1110101111\nseq2          TCC-G-GGCA\n"
               "#=GR seq2 SS  0110101110\n//")
        self.assertEqual(obs, exp)

    def test_str_trees(self):
        """ Make sure stockholm with trees printed correctly"""
        GF = OrderedDict({"NH": ["IMATREE", "IMATREETOO"],
                          "TN": ["Tree2", "Tree1"]})
        st = StockholmAlignment(self.seqs, gc=None, gf=GF, gs=None,
                                gr=None)
        obs = str(st)
        exp = ("# STOCKHOLM 1.0\n#=GF TN Tree2\n#=GF NH IMATREE\n#=GF TN Tree1"
               "\n#=GF NH IMATREETOO\nseq1          ACC-G-GGTA\n"
               "seq2          TCC-G-GGCA\n//")

        self.assertEqual(obs, exp)