Exemplo n.º 1
0
 def test_EstimateDistances_fromUnaligned(self):
     """Excercising estimate distances from unaligned sequences"""
     d = EstimateDistances(self.collection, JC69(), do_pair_align=True,
                             rigorous_align=True)
     d.run()
     canned_result = {('b', 'e'): 0.440840,
                     ('c', 'e'): 0.440840,
                     ('a', 'c'): 0.088337,
                     ('a', 'b'): 0.188486,
                     ('a', 'e'): 0.440840,
                     ('b', 'c'): 0.0883373}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
     
     d = EstimateDistances(self.collection, JC69(), do_pair_align=True,
                             rigorous_align=False)
     d.run()
     canned_result = {('b', 'e'): 0.440840,
                     ('c', 'e'): 0.440840,
                     ('a', 'c'): 0.088337,
                     ('a', 'b'): 0.188486,
                     ('a', 'e'): 0.440840,
                     ('b', 'c'): 0.0883373}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
Exemplo n.º 2
0
def get_alignment(tree, N_sites):
    'build a PyCogent alignment object from a tree and length'
    sm = JC69()
    lf = sm.makeLikelihoodFunction(tree)
    lf.setConstantLengths()
    aln = lf.simulateAlignment(sequence_length=N_sites)
    return (aln)
Exemplo n.º 3
0
def ml4(aln, true_tree):
    '''
	Input a true tree and an alignment
	Calculate the likelihood of all possible unrooted 4-taxon trees
	Return True if the ML tree is the true tree
	Return False otherwise
	'''

    # all trees with unit branch lengths
    all_trees = [
        LoadTree(treestring='((a,b),(c,d))'),
        LoadTree(treestring='((a,c),(b,d))'),
        LoadTree(treestring='((a,d),(b,c))')
    ]

    # optimise lf for all trees
    sm = JC69()

    results = []
    for t in all_trees:
        lf = sm.makeLikelihoodFunction(t)
        lf.setAlignment(aln)
        lf.optimise(local=True)
        results.append(lf.getLogLikelihood())

    # get the ml tree and compare to true tree
    ml_tree = all_trees[results.index(max(results))]

    return ml_tree.sameTopology(true_tree)
Exemplo n.º 4
0
 def test_EstimateDistances_fromThreeway(self):
     """testing (well, exercising at least), EsimateDistances fromThreeway"""
     d = EstimateDistances(self.al, JC69(), threeway=True)
     d.run()
     canned_result = {('b', 'e'): 0.495312,
                     ('c', 'e'): 0.479380,
                     ('a', 'c'): 0.089934,
                     ('a', 'b'): 0.190021,
                     ('a', 'e'): 0.495305,
                     ('b', 'c'): 0.0899339}
     result = d.getPairwiseDistances(summary_function="mean")
     self.assertDistsAlmostEqual(canned_result, result)
Exemplo n.º 5
0
def evaluate_tree(aln):
    d = distance.EstimateDistances(aln, submodel=JC69())
    d.run(show_progress=False)
    njtree = nj.nj(d.getPairwiseDistances())
    if debug:
        print(d)
        print(njtree.asciiArt())
        print(njtree.sameTopology(tr))
        for otu in 'BCD':
            print(njtree.getConnectingEdges('A', otu))
    L = njtree.getConnectingEdges('A', 'B')
    return len(L) == 3
Exemplo n.º 6
0
def optimization(result, aln, tree1, tree2):

    # get the sites for each tree according to the assignments
    aln1 = LoadSeqs(data=[('a', ''), ('c', ''), ('b', ''), ('d', '')],
                    moltype=DNA)
    aln2 = LoadSeqs(data=[('a', ''), ('c', ''), ('b', ''), ('d', '')],
                    moltype=DNA)
    for i in range(len(aln)):
        if (result[i] == 1):
            aln1 = aln1 + aln[i]
        if (result[i] == 2):
            aln2 = aln2 + aln[i]
        if (result[i] == 0):
            aln1 = aln1 + aln[i]
            aln2 = aln2 + aln[i]
    tree_parameter = [[], []]
    modle = JC69()

    # calculate the likelihood and do optimization. optimise will generates
    # new tree parameters
    lf1 = modle.makeLikelihoodFunction(tree1)
    lf1.setAlignment(aln1)
    lf1.optimise(local=True)
    likelihood1 = lf1.getLogLikelihood()

    # new tree parameters generates by optimise. As tree1/2 is symmetric, get
    # p,q,r from 6 branch lengths
    p1 = (lf1.getParamValue('length', 'a') +
          lf1.getParamValue('length', 'c')) / 2.0
    q1 = (lf1.getParamValue('length', 'b') +
          lf1.getParamValue('length', 'd')) / 2.0
    r1 = lf1.getParamValue('length', 'edge.1') + \
        lf1.getParamValue('length', 'edge.0')

    lf2 = modle.makeLikelihoodFunction(tree2)
    lf2.setAlignment(aln2)
    lf2.optimise(local=True)
    likelihood2 = lf2.getLogLikelihood()
    p2 = (lf2.getParamValue('length', 'a') +
          lf2.getParamValue('length', 'c')) / 2.0
    q2 = (lf2.getParamValue('length', 'b') +
          lf2.getParamValue('length', 'd')) / 2.0
    r2 = lf2.getParamValue('length', 'edge.1') + \
        lf2.getParamValue('length', 'edge.0')

    # return the new tree_parameter. As likelihood is in log, so plus together
    # get the total likelihood for the whole sites
    tree_parameter[0] = [p1, q1, r1]
    tree_parameter[1] = [p2, q2, r2]
    likelihood = likelihood1 + likelihood2

    return tree_parameter, likelihood
Exemplo n.º 7
0
def expectation_singlesite(aln, tree1, tree2):
    modle = JC69()
    result = []
    lf1 = modle.makeLikelihoodFunction(tree1)
    lf2 = modle.makeLikelihoodFunction(tree2)
    for i in range(len(aln)):
        lf1.setAlignment(aln[i])
        prob1 = lf1.getLogLikelihood()
        lf2.setAlignment(aln[i])
        prob2 = lf2.getLogLikelihood()
        if (prob1 > prob2):
            result.append(1)
        if (prob1 < prob2):
            result.append(2)
        if (prob1 == prob2):
            result.append(0)
    return result
Exemplo n.º 8
0
 def test_EstimateDistances(self):
     """testing (well, exercising at least), EstimateDistances"""
     d = EstimateDistances(self.al, JC69())
     d.run()
     canned_result = {('b', 'e'): 0.440840,
                     ('c', 'e'): 0.440840,
                     ('a', 'c'): 0.088337,
                     ('a', 'b'): 0.188486,
                     ('a', 'e'): 0.440840,
                     ('b', 'c'): 0.0883373}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
     
     # excercise writing to file
     d.writeToFile('junk.txt')
     try:
         os.remove('junk.txt')
     except OSError:
         pass # probably parallel
Exemplo n.º 9
0
def optimization(result, aln, tree1, tree2, tree):
    aln1 = LoadSeqs(data=[('a', ''), ('c', ''), ('b', ''), ('d', '')],
                    moltype=DNA)
    aln2 = LoadSeqs(data=[('a', ''), ('c', ''), ('b', ''), ('d', '')],
                    moltype=DNA)
    for i in range(len(aln)):
        if (result[i] == 1):
            aln1 = aln1 + aln[i]
        if (result[i] == 2):
            aln2 = aln2 + aln[i]
        if (result[i] == 0):
            aln1 = aln1 + aln[i]
            aln2 = aln2 + aln[i]
    print "aln1:"
    print aln1
    print "aln2:"
    print aln2
    tree_parameter = [[], []]
    modle = JC69()
    lf1 = modle.makeLikelihoodFunction(tree)
    lf1.setAlignment(aln1)
    lf1.optimise(local=True)
    p1 = (lf1.getParamValue('length', 'a') +
          lf1.getParamValue('length', 'c')) / 2.0
    q1 = (lf1.getParamValue('length', 'b') +
          lf1.getParamValue('length', 'd')) / 2.0
    r1 = lf1.getParamValue('length', 'edge.1') + lf1.getParamValue(
        'length', 'edge.0')
    lf2 = modle.makeLikelihoodFunction(tree)
    lf2.setAlignment(aln2)
    lf2.optimise(local=True)
    p2 = (lf2.getParamValue('length', 'a') +
          lf2.getParamValue('length', 'c')) / 2.0
    q2 = (lf2.getParamValue('length', 'b') +
          lf2.getParamValue('length', 'd')) / 2.0
    r2 = lf2.getParamValue('length', 'edge.1') + lf2.getParamValue(
        'length', 'edge.0')
    tree_parameter[0] = [p1, q1, r1]
    tree_parameter[1] = [p2, q2, r2]
    print "tree parameter in this time"
    print tree_parameter
    return tree_parameter
Exemplo n.º 10
0
def expectation_singlesite(aln, tree1, tree2):
    modle = JC69()
    result = []
    lf1 = modle.makeLikelihoodFunction(tree1)
    # make lilelihood function for tree1,tree2
    lf2 = modle.makeLikelihoodFunction(tree2)
    for i in range(
            len(aln)
    ):  # for each site,compare the likelihood for it belong to tree1,tree2
        # and assign it to the one with larger likelihood
        lf1.setAlignment(aln[i])
        prob1 = lf1.getLogLikelihood()
        lf2.setAlignment(aln[i])
        prob2 = lf2.getLogLikelihood()

        # if(prob1 > prob2):
        #     result.append(1)
        # if(prob1 < prob2):
        #     result.append(2)
        # # if it is the same, assign to both of the two trees
        # if(prob1 == prob2):
        #     result.append(0)

        _max = max(prob1, prob2)
        prob1 -= _max
        prob2 -= _max
        exp_prob1 = np.exp(prob1)
        exp_prob2 = np.exp(prob2)
        _sum = exp_prob1 + exp_prob2
        random_prob = np.random.random() * _sum
        if exp_prob1 < random_prob:
            result.append(1)
        else:
            result.append(2)

    return result