def test_EstimateDistances_fromUnaligned(self): """Excercising estimate distances from unaligned sequences""" d = EstimateDistances(self.collection, JC69(), do_pair_align=True, rigorous_align=True) d.run() canned_result = {('b', 'e'): 0.440840, ('c', 'e'): 0.440840, ('a', 'c'): 0.088337, ('a', 'b'): 0.188486, ('a', 'e'): 0.440840, ('b', 'c'): 0.0883373} result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result) d = EstimateDistances(self.collection, JC69(), do_pair_align=True, rigorous_align=False) d.run() canned_result = {('b', 'e'): 0.440840, ('c', 'e'): 0.440840, ('a', 'c'): 0.088337, ('a', 'b'): 0.188486, ('a', 'e'): 0.440840, ('b', 'c'): 0.0883373} result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result)
def get_alignment(tree, N_sites): 'build a PyCogent alignment object from a tree and length' sm = JC69() lf = sm.makeLikelihoodFunction(tree) lf.setConstantLengths() aln = lf.simulateAlignment(sequence_length=N_sites) return (aln)
def ml4(aln, true_tree): ''' Input a true tree and an alignment Calculate the likelihood of all possible unrooted 4-taxon trees Return True if the ML tree is the true tree Return False otherwise ''' # all trees with unit branch lengths all_trees = [ LoadTree(treestring='((a,b),(c,d))'), LoadTree(treestring='((a,c),(b,d))'), LoadTree(treestring='((a,d),(b,c))') ] # optimise lf for all trees sm = JC69() results = [] for t in all_trees: lf = sm.makeLikelihoodFunction(t) lf.setAlignment(aln) lf.optimise(local=True) results.append(lf.getLogLikelihood()) # get the ml tree and compare to true tree ml_tree = all_trees[results.index(max(results))] return ml_tree.sameTopology(true_tree)
def test_EstimateDistances_fromThreeway(self): """testing (well, exercising at least), EsimateDistances fromThreeway""" d = EstimateDistances(self.al, JC69(), threeway=True) d.run() canned_result = {('b', 'e'): 0.495312, ('c', 'e'): 0.479380, ('a', 'c'): 0.089934, ('a', 'b'): 0.190021, ('a', 'e'): 0.495305, ('b', 'c'): 0.0899339} result = d.getPairwiseDistances(summary_function="mean") self.assertDistsAlmostEqual(canned_result, result)
def evaluate_tree(aln): d = distance.EstimateDistances(aln, submodel=JC69()) d.run(show_progress=False) njtree = nj.nj(d.getPairwiseDistances()) if debug: print(d) print(njtree.asciiArt()) print(njtree.sameTopology(tr)) for otu in 'BCD': print(njtree.getConnectingEdges('A', otu)) L = njtree.getConnectingEdges('A', 'B') return len(L) == 3
def optimization(result, aln, tree1, tree2): # get the sites for each tree according to the assignments aln1 = LoadSeqs(data=[('a', ''), ('c', ''), ('b', ''), ('d', '')], moltype=DNA) aln2 = LoadSeqs(data=[('a', ''), ('c', ''), ('b', ''), ('d', '')], moltype=DNA) for i in range(len(aln)): if (result[i] == 1): aln1 = aln1 + aln[i] if (result[i] == 2): aln2 = aln2 + aln[i] if (result[i] == 0): aln1 = aln1 + aln[i] aln2 = aln2 + aln[i] tree_parameter = [[], []] modle = JC69() # calculate the likelihood and do optimization. optimise will generates # new tree parameters lf1 = modle.makeLikelihoodFunction(tree1) lf1.setAlignment(aln1) lf1.optimise(local=True) likelihood1 = lf1.getLogLikelihood() # new tree parameters generates by optimise. As tree1/2 is symmetric, get # p,q,r from 6 branch lengths p1 = (lf1.getParamValue('length', 'a') + lf1.getParamValue('length', 'c')) / 2.0 q1 = (lf1.getParamValue('length', 'b') + lf1.getParamValue('length', 'd')) / 2.0 r1 = lf1.getParamValue('length', 'edge.1') + \ lf1.getParamValue('length', 'edge.0') lf2 = modle.makeLikelihoodFunction(tree2) lf2.setAlignment(aln2) lf2.optimise(local=True) likelihood2 = lf2.getLogLikelihood() p2 = (lf2.getParamValue('length', 'a') + lf2.getParamValue('length', 'c')) / 2.0 q2 = (lf2.getParamValue('length', 'b') + lf2.getParamValue('length', 'd')) / 2.0 r2 = lf2.getParamValue('length', 'edge.1') + \ lf2.getParamValue('length', 'edge.0') # return the new tree_parameter. As likelihood is in log, so plus together # get the total likelihood for the whole sites tree_parameter[0] = [p1, q1, r1] tree_parameter[1] = [p2, q2, r2] likelihood = likelihood1 + likelihood2 return tree_parameter, likelihood
def expectation_singlesite(aln, tree1, tree2): modle = JC69() result = [] lf1 = modle.makeLikelihoodFunction(tree1) lf2 = modle.makeLikelihoodFunction(tree2) for i in range(len(aln)): lf1.setAlignment(aln[i]) prob1 = lf1.getLogLikelihood() lf2.setAlignment(aln[i]) prob2 = lf2.getLogLikelihood() if (prob1 > prob2): result.append(1) if (prob1 < prob2): result.append(2) if (prob1 == prob2): result.append(0) return result
def test_EstimateDistances(self): """testing (well, exercising at least), EstimateDistances""" d = EstimateDistances(self.al, JC69()) d.run() canned_result = {('b', 'e'): 0.440840, ('c', 'e'): 0.440840, ('a', 'c'): 0.088337, ('a', 'b'): 0.188486, ('a', 'e'): 0.440840, ('b', 'c'): 0.0883373} result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result) # excercise writing to file d.writeToFile('junk.txt') try: os.remove('junk.txt') except OSError: pass # probably parallel
def optimization(result, aln, tree1, tree2, tree): aln1 = LoadSeqs(data=[('a', ''), ('c', ''), ('b', ''), ('d', '')], moltype=DNA) aln2 = LoadSeqs(data=[('a', ''), ('c', ''), ('b', ''), ('d', '')], moltype=DNA) for i in range(len(aln)): if (result[i] == 1): aln1 = aln1 + aln[i] if (result[i] == 2): aln2 = aln2 + aln[i] if (result[i] == 0): aln1 = aln1 + aln[i] aln2 = aln2 + aln[i] print "aln1:" print aln1 print "aln2:" print aln2 tree_parameter = [[], []] modle = JC69() lf1 = modle.makeLikelihoodFunction(tree) lf1.setAlignment(aln1) lf1.optimise(local=True) p1 = (lf1.getParamValue('length', 'a') + lf1.getParamValue('length', 'c')) / 2.0 q1 = (lf1.getParamValue('length', 'b') + lf1.getParamValue('length', 'd')) / 2.0 r1 = lf1.getParamValue('length', 'edge.1') + lf1.getParamValue( 'length', 'edge.0') lf2 = modle.makeLikelihoodFunction(tree) lf2.setAlignment(aln2) lf2.optimise(local=True) p2 = (lf2.getParamValue('length', 'a') + lf2.getParamValue('length', 'c')) / 2.0 q2 = (lf2.getParamValue('length', 'b') + lf2.getParamValue('length', 'd')) / 2.0 r2 = lf2.getParamValue('length', 'edge.1') + lf2.getParamValue( 'length', 'edge.0') tree_parameter[0] = [p1, q1, r1] tree_parameter[1] = [p2, q2, r2] print "tree parameter in this time" print tree_parameter return tree_parameter
def expectation_singlesite(aln, tree1, tree2): modle = JC69() result = [] lf1 = modle.makeLikelihoodFunction(tree1) # make lilelihood function for tree1,tree2 lf2 = modle.makeLikelihoodFunction(tree2) for i in range( len(aln) ): # for each site,compare the likelihood for it belong to tree1,tree2 # and assign it to the one with larger likelihood lf1.setAlignment(aln[i]) prob1 = lf1.getLogLikelihood() lf2.setAlignment(aln[i]) prob2 = lf2.getLogLikelihood() # if(prob1 > prob2): # result.append(1) # if(prob1 < prob2): # result.append(2) # # if it is the same, assign to both of the two trees # if(prob1 == prob2): # result.append(0) _max = max(prob1, prob2) prob1 -= _max prob2 -= _max exp_prob1 = np.exp(prob1) exp_prob2 = np.exp(prob2) _sum = exp_prob1 + exp_prob2 random_prob = np.random.random() * _sum if exp_prob1 < random_prob: result.append(1) else: result.append(2) return result