def est_logdet_pair_aa(self): """logdet shouldn't fail to produce distances for aa seqs""" aln = LoadSeqs('data/brca1_5.paml', moltype=DNA) aln = aln.getTranslation() logdet_calc = LogDetPair(moltype=PROTEIN, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) dists = logdet_calc.getPairwiseDistances()
def est_logdet_pair_dna(self): """logdet should produce distances that match MEGA""" aln = LoadSeqs('data/brca1_5.paml', moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True) dists = logdet_calc.getPairwiseDistances() all_expected = {('Human', 'NineBande'): 0.075336929999999996, ('NineBande', 'DogFaced'): 0.0898575452, ('DogFaced', 'Human'): 0.1061747919, ('HowlerMon', 'DogFaced'): 0.0934480008, ('Mouse', 'HowlerMon'): 0.26422862920000001, ('NineBande', 'Human'): 0.075336929999999996, ('HowlerMon', 'NineBande'): 0.062202897899999998, ('DogFaced', 'NineBande'): 0.0898575452, ('DogFaced', 'HowlerMon'): 0.0934480008, ('Human', 'DogFaced'): 0.1061747919, ('Mouse', 'Human'): 0.26539976700000001, ('NineBande', 'HowlerMon'): 0.062202897899999998, ('HowlerMon', 'Human'): 0.036571181899999999, ('DogFaced', 'Mouse'): 0.2652555144, ('HowlerMon', 'Mouse'): 0.26422862920000001, ('Mouse', 'DogFaced'): 0.2652555144, ('NineBande', 'Mouse'): 0.22754789210000001, ('Mouse', 'NineBande'): 0.22754789210000001, ('Human', 'Mouse'): 0.26539976700000001, ('Human', 'HowlerMon'): 0.036571181899999999} for pair in dists: got = dists[pair] expected = all_expected[pair] self.assertFloatEqual(got, expected)
def test_logdet_pair_aa(self): """logdet shouldn't fail to produce distances for aa seqs""" aln = LoadSeqs('data/brca1_5.paml', moltype=DNA) aln = aln.getTranslation() logdet_calc = LogDetPair(moltype=PROTEIN, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) dists = logdet_calc.getPairwiseDistances()
def test_logdet_pair_dna(self): """logdet should produce distances that match MEGA""" aln = LoadSeqs('data/brca1_5.paml', moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) dists = logdet_calc.getPairwiseDistances() all_expected = { ('Human', 'NineBande'): 0.075336929999999996, ('NineBande', 'DogFaced'): 0.0898575452, ('DogFaced', 'Human'): 0.1061747919, ('HowlerMon', 'DogFaced'): 0.0934480008, ('Mouse', 'HowlerMon'): 0.26422862920000001, ('NineBande', 'Human'): 0.075336929999999996, ('HowlerMon', 'NineBande'): 0.062202897899999998, ('DogFaced', 'NineBande'): 0.0898575452, ('DogFaced', 'HowlerMon'): 0.0934480008, ('Human', 'DogFaced'): 0.1061747919, ('Mouse', 'Human'): 0.26539976700000001, ('NineBande', 'HowlerMon'): 0.062202897899999998, ('HowlerMon', 'Human'): 0.036571181899999999, ('DogFaced', 'Mouse'): 0.2652555144, ('HowlerMon', 'Mouse'): 0.26422862920000001, ('Mouse', 'DogFaced'): 0.2652555144, ('NineBande', 'Mouse'): 0.22754789210000001, ('Mouse', 'NineBande'): 0.22754789210000001, ('Human', 'Mouse'): 0.26539976700000001, ('Human', 'HowlerMon'): 0.036571181899999999 } for pair in dists: got = dists[pair] expected = all_expected[pair] self.assertFloatEqual(got, expected)
def test_paralinear_pair_dna(self): """calculate paralinear distance consistent with logdet distance""" data = [('seq1', 'TAATTCATTGGGACGTCGAATCCGGCAGTCCTGCCGCAAAAGCTTCCGGAATCGAATTTTGGCA'), ('seq2', 'AAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCTTTTTTTTTTTTTTTTGGGGGGGGGGGGGGGG')] aln = LoadSeqs(data=data, moltype=DNA) paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln) paralinear_calc.run(show_progress=False) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(show_progress=False) self.assertFloatEqual(logdet_calc.Dists[1,1], paralinear_calc.Dists[1,1], eps=1e-3) self.assertFloatEqual(paralinear_calc.Variances[1,1], logdet_calc.Variances[1,1], eps=1e-3)
def test_logdet_tk_adjustment(self): """logdet using tamura kumar differs from classic""" aln = LoadSeqs('data/brca1_5.paml', moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) tk = logdet_calc.getPairwiseDistances() logdet_calc.run(use_tk_adjustment=False, show_progress=False) not_tk = logdet_calc.getPairwiseDistances() self.assertNotEqual(tk, not_tk)
def test_logdet_variance(self): """calculate logdet variance consistent with hand calculation""" data = [ ('seq1', "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT" ), ('seq2', "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC" ) ] aln = LoadSeqs(data=data, moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) self.assertEqual(logdet_calc.Variances[1, 1], None) index = dict(zip('ACGT', range(4))) J = numpy.zeros((4, 4)) for p in zip(data[0][1], data[1][1]): J[index[p[0]], index[p[1]]] += 1 for i in range(4): if J[i, i] == 0: J[i, i] += 0.5 J /= J.sum() M = numpy.linalg.inv(J) var = 0. for i in range(4): for j in range(4): var += M[j, i]**2 * J[i, j] - 1 var /= 16 * len(data[0][1]) logdet_calc.run(use_tk_adjustment=False, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertFloatEqual(logdet_calc.Variances[1, 1], var, eps=1e-3)
def est_logdet_tk_adjustment(self): """logdet using tamura kumar differs from classic""" aln = LoadSeqs('data/brca1_5.paml', moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) tk = logdet_calc.getPairwiseDistances() logdet_calc.run(use_tk_adjustment=False, show_progress=False) not_tk = logdet_calc.getPairwiseDistances() self.assertNotEqual(tk, not_tk)
def test_logdet_variance(self): """calculate logdet variance consistent with hand calculation""" data = [('seq1', "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT"), ('seq2', "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")] aln = LoadSeqs(data=data, moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) self.assertEqual(logdet_calc.Variances[1,1], None) index = dict(zip('ACGT', range(4))) J = numpy.zeros((4, 4)) for p in zip(data[0][1], data[1][1]): J[index[p[0]], index[p[1]]] += 1 for i in range(4): if J[i, i] == 0: J[i, i] += 0.5 J /= J.sum() M = numpy.linalg.inv(J) var = 0. for i in range(4): for j in range(4): var += M[j, i]**2 * J[i, j] - 1 var /= 16 * len(data[0][1]) logdet_calc.run(use_tk_adjustment=False, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertFloatEqual(logdet_calc.Variances[1,1], var, eps=1e-3)
def test_paralinear_pair_dna(self): """calculate paralinear distance consistent with logdet distance""" data = [ ('seq1', 'TAATTCATTGGGACGTCGAATCCGGCAGTCCTGCCGCAAAAGCTTCCGGAATCGAATTTTGGCA' ), ('seq2', 'AAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCTTTTTTTTTTTTTTTTGGGGGGGGGGGGGGGG' ) ] aln = LoadSeqs(data=data, moltype=DNA) paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln) paralinear_calc.run(show_progress=False) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(show_progress=False) self.assertFloatEqual(logdet_calc.Dists[1, 1], paralinear_calc.Dists[1, 1], eps=1e-3) self.assertFloatEqual(paralinear_calc.Variances[1, 1], logdet_calc.Variances[1, 1], eps=1e-3)
def test_logdet_for_determinant_lte_zero(self): """returns distance of None if the determinant is <= 0""" data = dict( seq1= "AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT", seq2= "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC") aln = LoadSeqs(data=data, moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is None) logdet_calc.run(use_tk_adjustment=False, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is None)
def est_logdet_for_determinant_lte_zero(self): """returns distance of None if the determinant is <= 0""" data = dict(seq1="AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT", seq2="TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC") aln = LoadSeqs(data=data, moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is None) logdet_calc.run(use_tk_adjustment=False, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is None)
def test_logdet_missing_states(self): """should calculate logdet measurement with missing states""" data = [('seq1', "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT"), ('seq2', "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")] aln = LoadSeqs(data=data, moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is not None) logdet_calc.run(use_tk_adjustment=False, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is not None)
def test_logdet_variance(self): """calculate logdet variance consistent with hand calculation""" data = [('seq1', "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT"), ('seq2', "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")] aln = LoadSeqs(data=data, moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) self.assertFloatEqual(logdet_calc.Variances[1,1], 0.5267, eps=1e-3) logdet_calc.run(use_tk_adjustment=False, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertFloatEqual(logdet_calc.Variances[1,1], 0.4797, eps=1e-3)
def test_logdet_missing_states(self): """should calculate logdet measurement with missing states""" data = [ ('seq1', "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT" ), ('seq2', "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTNTTTTTTTTTTTTCCCCCCCCCCCCCCCCC" ) ] aln = LoadSeqs(data=data, moltype=DNA) logdet_calc = LogDetPair(moltype=DNA, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is not None) logdet_calc.run(use_tk_adjustment=False, show_progress=False) dists = logdet_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is not None)