Пример #1
0
 def est_logdet_pair_aa(self):
     """logdet shouldn't fail to produce distances for aa seqs"""
     aln = LoadSeqs('data/brca1_5.paml', moltype=DNA)
     aln = aln.getTranslation()
     logdet_calc = LogDetPair(moltype=PROTEIN, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True, show_progress=False)
     dists = logdet_calc.getPairwiseDistances()
Пример #2
0
 def est_logdet_pair_dna(self):
     """logdet should produce distances that match MEGA"""
     aln = LoadSeqs('data/brca1_5.paml', moltype=DNA)
     logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True)
     dists = logdet_calc.getPairwiseDistances()
     all_expected = {('Human', 'NineBande'): 0.075336929999999996,
                 ('NineBande', 'DogFaced'): 0.0898575452,
                 ('DogFaced', 'Human'): 0.1061747919,
                 ('HowlerMon', 'DogFaced'): 0.0934480008,
                 ('Mouse', 'HowlerMon'): 0.26422862920000001,
                 ('NineBande', 'Human'): 0.075336929999999996,
                 ('HowlerMon', 'NineBande'): 0.062202897899999998,
                 ('DogFaced', 'NineBande'): 0.0898575452,
                 ('DogFaced', 'HowlerMon'): 0.0934480008,
                 ('Human', 'DogFaced'): 0.1061747919,
                 ('Mouse', 'Human'): 0.26539976700000001,
                 ('NineBande', 'HowlerMon'): 0.062202897899999998,
                 ('HowlerMon', 'Human'): 0.036571181899999999,
                 ('DogFaced', 'Mouse'): 0.2652555144,
                 ('HowlerMon', 'Mouse'): 0.26422862920000001,
                 ('Mouse', 'DogFaced'): 0.2652555144,
                 ('NineBande', 'Mouse'): 0.22754789210000001,
                 ('Mouse', 'NineBande'): 0.22754789210000001,
                 ('Human', 'Mouse'): 0.26539976700000001,
                 ('Human', 'HowlerMon'): 0.036571181899999999}
     for pair in dists:
         got = dists[pair]
         expected = all_expected[pair]
         self.assertFloatEqual(got, expected)
Пример #3
0
 def test_logdet_pair_aa(self):
     """logdet shouldn't fail to produce distances for aa seqs"""
     aln = LoadSeqs('data/brca1_5.paml', moltype=DNA)
     aln = aln.getTranslation()
     logdet_calc = LogDetPair(moltype=PROTEIN, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True, show_progress=False)
     dists = logdet_calc.getPairwiseDistances()
Пример #4
0
 def test_logdet_pair_dna(self):
     """logdet should produce distances that match MEGA"""
     aln = LoadSeqs('data/brca1_5.paml', moltype=DNA)
     logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True, show_progress=False)
     dists = logdet_calc.getPairwiseDistances()
     all_expected = {
         ('Human', 'NineBande'): 0.075336929999999996,
         ('NineBande', 'DogFaced'): 0.0898575452,
         ('DogFaced', 'Human'): 0.1061747919,
         ('HowlerMon', 'DogFaced'): 0.0934480008,
         ('Mouse', 'HowlerMon'): 0.26422862920000001,
         ('NineBande', 'Human'): 0.075336929999999996,
         ('HowlerMon', 'NineBande'): 0.062202897899999998,
         ('DogFaced', 'NineBande'): 0.0898575452,
         ('DogFaced', 'HowlerMon'): 0.0934480008,
         ('Human', 'DogFaced'): 0.1061747919,
         ('Mouse', 'Human'): 0.26539976700000001,
         ('NineBande', 'HowlerMon'): 0.062202897899999998,
         ('HowlerMon', 'Human'): 0.036571181899999999,
         ('DogFaced', 'Mouse'): 0.2652555144,
         ('HowlerMon', 'Mouse'): 0.26422862920000001,
         ('Mouse', 'DogFaced'): 0.2652555144,
         ('NineBande', 'Mouse'): 0.22754789210000001,
         ('Mouse', 'NineBande'): 0.22754789210000001,
         ('Human', 'Mouse'): 0.26539976700000001,
         ('Human', 'HowlerMon'): 0.036571181899999999
     }
     for pair in dists:
         got = dists[pair]
         expected = all_expected[pair]
         self.assertFloatEqual(got, expected)
    def test_paralinear_pair_dna(self):
        """calculate paralinear distance consistent with logdet distance"""
        data = [('seq1', 'TAATTCATTGGGACGTCGAATCCGGCAGTCCTGCCGCAAAAGCTTCCGGAATCGAATTTTGGCA'),
                ('seq2', 'AAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCTTTTTTTTTTTTTTTTGGGGGGGGGGGGGGGG')]
        aln = LoadSeqs(data=data, moltype=DNA)
        paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
        paralinear_calc.run(show_progress=False)
        logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
        logdet_calc.run(show_progress=False)

        self.assertFloatEqual(logdet_calc.Dists[1,1],
                paralinear_calc.Dists[1,1], eps=1e-3)
        self.assertFloatEqual(paralinear_calc.Variances[1,1], 
                logdet_calc.Variances[1,1], eps=1e-3)
Пример #6
0
 def test_logdet_tk_adjustment(self):
     """logdet using tamura kumar differs from classic"""
     aln = LoadSeqs('data/brca1_5.paml', moltype=DNA)
     logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True, show_progress=False)
     tk = logdet_calc.getPairwiseDistances()
     logdet_calc.run(use_tk_adjustment=False, show_progress=False)
     not_tk = logdet_calc.getPairwiseDistances()
     self.assertNotEqual(tk, not_tk)
Пример #7
0
    def test_logdet_variance(self):
        """calculate logdet variance consistent with hand calculation"""
        data = [
            ('seq1',
             "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT"
             ),
            ('seq2',
             "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC"
             )
        ]
        aln = LoadSeqs(data=data, moltype=DNA)
        logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
        logdet_calc.run(use_tk_adjustment=True, show_progress=False)
        self.assertEqual(logdet_calc.Variances[1, 1], None)

        index = dict(zip('ACGT', range(4)))
        J = numpy.zeros((4, 4))
        for p in zip(data[0][1], data[1][1]):
            J[index[p[0]], index[p[1]]] += 1
        for i in range(4):
            if J[i, i] == 0:
                J[i, i] += 0.5
        J /= J.sum()
        M = numpy.linalg.inv(J)
        var = 0.
        for i in range(4):
            for j in range(4):
                var += M[j, i]**2 * J[i, j] - 1
        var /= 16 * len(data[0][1])

        logdet_calc.run(use_tk_adjustment=False, show_progress=False)
        dists = logdet_calc.getPairwiseDistances()
        self.assertFloatEqual(logdet_calc.Variances[1, 1], var, eps=1e-3)
Пример #8
0
 def est_logdet_tk_adjustment(self):
     """logdet using tamura kumar differs from classic"""
     aln = LoadSeqs('data/brca1_5.paml', moltype=DNA)
     logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True, show_progress=False)
     tk = logdet_calc.getPairwiseDistances()
     logdet_calc.run(use_tk_adjustment=False, show_progress=False)
     not_tk = logdet_calc.getPairwiseDistances()
     self.assertNotEqual(tk, not_tk)
    def test_logdet_variance(self):
        """calculate logdet variance consistent with hand calculation"""
        data = [('seq1', "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT"),
                ('seq2', "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")]
        aln = LoadSeqs(data=data, moltype=DNA)
        logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
        logdet_calc.run(use_tk_adjustment=True, show_progress=False)
        self.assertEqual(logdet_calc.Variances[1,1], None)
       
        index = dict(zip('ACGT', range(4)))
        J = numpy.zeros((4, 4))
        for p in zip(data[0][1], data[1][1]):
            J[index[p[0]], index[p[1]]] += 1
        for i in range(4):
            if J[i, i] == 0:
                J[i, i] += 0.5
        J /= J.sum()
        M = numpy.linalg.inv(J)
        var = 0.
        for i in range(4):
            for j in range(4):
                var += M[j, i]**2 * J[i, j] - 1
        var /= 16 * len(data[0][1])

        logdet_calc.run(use_tk_adjustment=False, show_progress=False)
        dists = logdet_calc.getPairwiseDistances()
        self.assertFloatEqual(logdet_calc.Variances[1,1], var, eps=1e-3)
Пример #10
0
    def test_paralinear_pair_dna(self):
        """calculate paralinear distance consistent with logdet distance"""
        data = [
            ('seq1',
             'TAATTCATTGGGACGTCGAATCCGGCAGTCCTGCCGCAAAAGCTTCCGGAATCGAATTTTGGCA'
             ),
            ('seq2',
             'AAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCTTTTTTTTTTTTTTTTGGGGGGGGGGGGGGGG'
             )
        ]
        aln = LoadSeqs(data=data, moltype=DNA)
        paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
        paralinear_calc.run(show_progress=False)
        logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
        logdet_calc.run(show_progress=False)

        self.assertFloatEqual(logdet_calc.Dists[1, 1],
                              paralinear_calc.Dists[1, 1],
                              eps=1e-3)
        self.assertFloatEqual(paralinear_calc.Variances[1, 1],
                              logdet_calc.Variances[1, 1],
                              eps=1e-3)
Пример #11
0
    def test_logdet_for_determinant_lte_zero(self):
        """returns distance of None if the determinant is <= 0"""
        data = dict(
            seq1=
            "AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
            seq2=
            "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")
        aln = LoadSeqs(data=data, moltype=DNA)

        logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
        logdet_calc.run(use_tk_adjustment=True, show_progress=False)
        dists = logdet_calc.getPairwiseDistances()
        self.assertTrue(dists.values()[0] is None)
        logdet_calc.run(use_tk_adjustment=False, show_progress=False)
        dists = logdet_calc.getPairwiseDistances()
        self.assertTrue(dists.values()[0] is None)
Пример #12
0
 def est_logdet_for_determinant_lte_zero(self):
     """returns distance of None if the determinant is <= 0"""
     data = dict(seq1="AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
                 seq2="TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")
     aln = LoadSeqs(data=data, moltype=DNA)
     
     logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True, show_progress=False)
     dists = logdet_calc.getPairwiseDistances()
     self.assertTrue(dists.values()[0] is None)
     logdet_calc.run(use_tk_adjustment=False, show_progress=False)
     dists = logdet_calc.getPairwiseDistances()
     self.assertTrue(dists.values()[0] is None)
Пример #13
0
 def test_logdet_missing_states(self):
     """should calculate logdet measurement with missing states"""
     data = [('seq1', "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT"),
             ('seq2', "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")]
     aln = LoadSeqs(data=data, moltype=DNA)
     logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True, show_progress=False)
     
     dists = logdet_calc.getPairwiseDistances()
     self.assertTrue(dists.values()[0] is not None)
     
     logdet_calc.run(use_tk_adjustment=False, show_progress=False)
     dists = logdet_calc.getPairwiseDistances()
     self.assertTrue(dists.values()[0] is not None)
Пример #14
0
 def test_logdet_variance(self):
     """calculate logdet variance consistent with hand calculation"""
     data = [('seq1', "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT"),
             ('seq2', "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")]
     aln = LoadSeqs(data=data, moltype=DNA)
     logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
     logdet_calc.run(use_tk_adjustment=True, show_progress=False)
     self.assertFloatEqual(logdet_calc.Variances[1,1], 0.5267, eps=1e-3)
     
     logdet_calc.run(use_tk_adjustment=False, show_progress=False)
     dists = logdet_calc.getPairwiseDistances()
     self.assertFloatEqual(logdet_calc.Variances[1,1], 0.4797, eps=1e-3)
Пример #15
0
    def test_logdet_missing_states(self):
        """should calculate logdet measurement with missing states"""
        data = [
            ('seq1',
             "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT"
             ),
            ('seq2',
             "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTNTTTTTTTTTTTTCCCCCCCCCCCCCCCCC"
             )
        ]
        aln = LoadSeqs(data=data, moltype=DNA)
        logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
        logdet_calc.run(use_tk_adjustment=True, show_progress=False)

        dists = logdet_calc.getPairwiseDistances()
        self.assertTrue(dists.values()[0] is not None)

        logdet_calc.run(use_tk_adjustment=False, show_progress=False)
        dists = logdet_calc.getPairwiseDistances()
        self.assertTrue(dists.values()[0] is not None)