Пример #1
0
def unifrac_tasks_from_matrix(u, env_names, modes=UNIFRAC_DEFAULT_MODES):
    """Returns the UniFrac matrix, PCoA, and/or cluster from the matrix."""
    result = {}

    if UNIFRAC_DIST_MATRIX in modes:
        result[UNIFRAC_DIST_MATRIX] = (u, env_names)

    if UNIFRAC_PCOA in modes:
        point_matrix, eigvals = principal_coordinates_analysis(u)
        result[UNIFRAC_PCOA] = output_pca(point_matrix, eigvals, env_names)

    if UNIFRAC_CLUST_ENVS in modes:
        nodes = map(PhyloNode, env_names)
        BIG = 1e305
        U = u.copy()
        for i in range(len(U)):
            U[i, i] = BIG
        c = UPGMA_cluster(U, nodes, BIG)
        result[UNIFRAC_CLUST_ENVS] = c

    if UNIFRAC_NJ_ENVS in modes:
        c = nj(dists_to_nj(u, env_names))
        result[UNIFRAC_NJ_ENVS] = c

    return result
Пример #2
0
def unifrac_tasks_from_matrix(u, env_names, modes=UNIFRAC_DEFAULT_MODES):
    """Returns the UniFrac matrix, PCoA, and/or cluster from the matrix."""
    result = {}

    if UNIFRAC_DIST_MATRIX in modes:
        result[UNIFRAC_DIST_MATRIX] = (u, env_names)

    if UNIFRAC_PCOA in modes:
        point_matrix, eigvals = principal_coordinates_analysis(u)
        result[UNIFRAC_PCOA] =  output_pca(point_matrix, eigvals, env_names)

    if UNIFRAC_CLUST_ENVS in modes:
        nodes = map(PhyloNode, env_names)
        BIG = 1e305
        U = u.copy()
        for i in range(len(U)):
            U[i,i] = BIG
        c = UPGMA_cluster(U, nodes, BIG)
        result[UNIFRAC_CLUST_ENVS] = c

    if UNIFRAC_NJ_ENVS in modes:
        c = nj(dists_to_nj(u, env_names))
        result[UNIFRAC_NJ_ENVS] = c

    return result
Пример #3
0
def construct_cluster(args, dm):
    # UPGMA OR
    # neighbor joining:
    from cogent.phylo import nj
    from cogent.cluster.UPGMA import upgma
    mycluster = nj.nj(dm)

    #mycluster = upgma(dm)

    return mycluster
def construct_cluster(args, dm):
        # UPGMA OR
        # neighbor joining:
        from cogent.phylo import nj
        from cogent.cluster.UPGMA import upgma
        mycluster = nj.nj(dm)

        #mycluster = upgma(dm)


        return mycluster
Пример #5
0
def evaluate_tree(aln):
    d = distance.EstimateDistances(aln, submodel=JC69())
    d.run(show_progress=False)
    njtree = nj.nj(d.getPairwiseDistances())
    if debug:
        print(d)
        print(njtree.asciiArt())
        print(njtree.sameTopology(tr))
        for otu in 'BCD':
            print(njtree.getConnectingEdges('A', otu))
    L = njtree.getConnectingEdges('A', 'B')
    return len(L) == 3
Пример #6
0
def distmat_to_tree(distmat):
    dist_headers, dist_matrix = distmat
    cogent_host_dist = {}
    # Loop through host distance matrix to create a dictionary of pairwise
    # distances
    for i, item in enumerate(dist_matrix):
        for j, itemtwo in enumerate(dist_matrix[i]):
            if i != j:
                cogent_host_dist[
                    (dist_headers[i], dist_headers[j])] = dist_matrix[i][j]
    # Generate tree from distance matrix

    return nj.nj(cogent_host_dist)
Пример #7
0
def get_alignment_tree(fname):
    """Build a neighbour joining tree"""

    from cogent.phylo import distance, nj
    from cogent.evolve.models import HKY85, F81
    al = cogent.LoadSeqs(fname, format='fasta')
    d = distance.EstimateDistances(al, submodel=F81())
    d.run(show_progress=False)
    mytree = nj.nj(d.getPairwiseDistances())
    mytree = mytree.balanced()
    print(mytree.asciiArt())
    print
    '''from cogent.draw import dendrogram
    p = dendrogram.SquareDendrogram(mytree)
    p.drawToPDF('tree-scaled.pdf', 500, 400, stroke_width=2.0,
                shade_param = 'r', max_value = 1.0,)'''
    return
Пример #8
0
def single_file_nj(input_file, output_file):
    # read in dist matrix
    f = open(input_file, 'U')
    headers, data = parse_distmat(f)
    f.close()

    # do nj
    distdict = {}
    for i in range(len(headers)):
        for j in range(len(headers)):
            distdict[(headers[i], headers[j])] = data[i, j]  # need j,i too?

    tree = nj(distdict)

    # write output
    f = open(output_file, 'w')
    f.write(tree.getNewick(with_distances=True))
    f.close()
def single_file_nj(input_file, output_file):
    # read in dist matrix
    f = open(input_file, 'U')
    headers, data = parse_distmat(f)
    f.close()

    # do nj
    distdict = {}
    for i in range(len(headers)):
        for j in range(len(headers)):
            distdict[(headers[i],headers[j])] = data[i,j] # need j,i too?

    tree = nj(distdict)

    # write output
    f = open(output_file,'w')
    f.write(tree.getNewick(with_distances=True))
    f.close()
Пример #10
0
def plot_tree(sourceArray, args):

    # Build the input dictionary for the tree functions.
    distanceDict = dict()
    for i in range(len(idList)):
        for j in range(len(idList)):
            distanceDict[(idList[i], idList[j])]=sourceArray[i,j]

    # Generate the tree using the specified method.
    if args.method == 'upgma' or args.method is None:
        tree = upgma(distanceDict)
    elif args.method == 'nj':
        tree = nj(distanceDict)
    else:
        print "Method '%s' is not supported." %(args.method)
        exit(1)

    # Convert the tree to text and save to the specified file.
    art = tree.asciiArt()
    destFile = open(args.destPath, 'w')
    destFile.write(art+'\n')
    destFile.close()
    return
import sys
from cogent.draw import dendrogram
from cogent.phylo import nj
import utils

fn = 'data/nj_data.txt'
data = utils.load_data(fn,split_lines=True)
otus = utils.letters[:len(data)]
dists = dict()
for t1,line in zip(otus,data):
    L = line.strip().split()
    L = [float(n) for n in L]
    for t2,e in zip(otus,L):
        dists[(t1,t2)] = e
        
tr = nj.nj(dists)
print tr.asciiArt()
print

for n in tr.iterTips():
    print n.ancestors()[0].Name, n

def show_edge_children(node):
    children = node.iterNontips()
    for child in children:
        print node.Name,
        print child.Name, 
        print node.distance(child),

edges = list(tr.iterNontips())
for e in edges:
Пример #12
0
 def test_nj(self):
     """testing nj"""
     reconstructed = nj(self.dists)
     self.assertTreeDistancesEqual(self.tree, reconstructed)
def phyl_tree():
	mytree = nj.nj(d.getPairwiseDistances())
	print ("\n\n")
	print mytree.asciiArt()

al = LoadSeqs("cytc.fasta", moltype=PROTEIN, interleaved=False)
d = distance.EstimateDistances(al, submodel = JTT92())
d.run()
sys.stdout = open("cytc distances.txt", "w")
print d
phyl_tree()

al = LoadSeqs("mtdna.fasta", moltype=DNA, interleaved=True, aligned=False)
d = distance.EstimateDistances(al, submodel = JC69())
d.run()
sys.stdout = open("mtdna distances.txt", "w")
print d
phyl_tree()

seqs = LoadSeqs("cytb.fasta", moltype=PROTEIN, aligned=False)
al = align_unaligned_seqs(seqs,PROTEIN)
dcalc = distance.EstimateDistances(al, submodel = JTT92())
dcalc.run(show_progress = True)
d = dcalc.getPairwiseDistances()
tree=nj.nj(d)
sys.stdout = open("cytb distances.txt", "w")
print dcalc
print '\n\n'
print tree.asciiArt()
#phyl_tree()
Пример #14
0
def TreeAlign(model, seqs, tree=None, indel_rate=0.01, indel_length=0.01,
    ui = None, ests_from_pairwise=True, param_vals=None):
    """Returns a multiple alignment and tree.
    
    Uses the provided substitution model and a tree for determining the
    progressive order. If a tree is not provided a Neighbour Joining tree is
    constructed from pairwise distances estimated from pairwise aligning the
    sequences. If running in parallel, only the distance estimation is
    parallelised and only the master CPU returns the alignment and tree, other
    CPU's return None, None.
    
    Arguments:
        - model: a substitution model
        - seqs: a sequence collection
        - indel_rate, indel_length: parameters for the progressive pair-HMM
        - ests_from_pairwise: if no tree provided and True, the median value
          of the substitution model parameters are used
        - param_vals: named key, value pairs for model parameters. These
          override ests_from_pairwise.
    """
    _exclude_params = ['mprobs', 'rate', 'bin_switch']
    if param_vals:
        param_vals = dict(param_vals)
    else:
        param_vals = {}
    if isinstance(seqs, dict):
        seq_names = list(seqs.keys())
    else:
        seq_names = seqs.getSeqNames()
    
    two_seqs = len(seq_names) == 2
    
    if tree:
        tip_names = tree.getTipNames()
        tip_names.sort()
        seq_names.sort()
        assert tip_names == seq_names, \
            "names don't match between seqs and tree: tree=%s; seqs=%s" % \
            (tip_names, seq_names)
        ests_from_pairwise = False
    elif two_seqs:
        tree = LoadTree(tip_names=seqs.getSeqNames())
        ests_from_pairwise = False
    else:
        if ests_from_pairwise:
            est_params = [param for param in model.getParamList() \
                                    if param not in _exclude_params]
        else:
            est_params = None
        
        dcalc = EstimateDistances(seqs, model, do_pair_align=True,
                                    est_params=est_params)
        dcalc.run()
        dists = dcalc.getPairwiseDistances()
        tree = NJ.nj(dists)
    
    LF = model.makeLikelihoodFunction(tree.bifurcating(name_unnamed=True), aligned=False)
    if ests_from_pairwise and not param_vals:
        # we use the Median to avoid the influence of outlier pairs
        param_vals = {}
        for param in est_params:
            numbers = dcalc.getParamValues(param)
            print("Param Estimate Summary Stats: %s" % param)
            print(numbers.summarize())
            param_vals[param] = numbers.Median
    
    ui.display("Doing %s alignment" % ["progressive", "pairwise"][two_seqs])
    with LF.updatesPostponed():
        for param, val in list(param_vals.items()):
            LF.setParamRule(param, value=val, is_constant=True)
        LF.setParamRule('indel_rate', value=indel_rate, is_constant=True)
        LF.setParamRule('indel_length', value=indel_length, is_constant=True)
        LF.setSequences(seqs)
    edge = LF.getLogLikelihood().edge
    align = edge.getViterbiPath().getAlignment()
    info = Info()
    info["AlignParams"] = param_vals
    info["AlignParams"].update(dict(indel_length=indel_length, indel_rate=indel_rate))
    align.Info = info
    return align, tree
def phyl_tree():
	mytree = nj.nj(d.getPairwiseDistances())
	print ("\n\n")
	print mytree.asciiArt()
Пример #16
0
def main():
    if len(sys.argv) != 2:
        print "not enought arguments"
        sys.exit(1)

    inputfile = sys.argv[1]
    if not os.path.exists(inputfile):
        print "input file %s does not exists" % inputfile
        sys.exit(1)

    trees = {}
    with open(inputfile, 'r') as fhd:
        name = None
        for line in fhd:
            #print "line", line
            line = line.strip()
            cols = line.split("\t")

            if len(line) > 1 and len(cols) == 1:
                name = cols[0]
                print "name '%s'" % name

                for line in fhd:
                    line = line.rstrip()

                    if len(line) == 0:
                        #print "name '%s' :: empty line" % str(name)
                        name = None
                        break

                    if name is not None and name not in trees:
                        #print "name '%s' :: getting header"       % str(name)
                        #print "name '%s' :: getting header :: %s" % (str(name), line)
                        header      = line
                        headerNames = header.split("\t")

                        if headerNames[0] != "":
                            #print "name '%s' :: getting header :: first not empty :: '%s'" % (str(name), line)
                            name = None
                            continue

                        dimentions  = len(headerNames) - 1
                        trees[name] = {}
                        trees[name]['dimentions'] = dimentions
                        trees[name]['names'     ] = headerNames[1:]
                        trees[name]['matrix'    ] = []
                        #print "name '%s' :: getting header :: dimentions %d" % ( str(name), dimentions )

                    else:
                        if name is not None:
                            colss = line.split("\t")
                            if colss[0] not in trees[name]['names'     ]:
                                print "name '%s' :: getting matrix :: vertical name %s not in names %s" % (str(name), str(name), str(names))
                                sys.exit(1)

                            cols = []
                            for x in colss[1:]:
                                x = float(x)
                                if x == 0: x = 0.00000001
                                cols.append(x)

                            cols = array.array('f', cols )

                            if len(cols) != dimentions:
                                print "name '%s' :: dimentions dont match %d vs %d" % ( str(name), len(cols), dimentions )
                                sys.exit(1)

                            trees[name]['matrix'].append(cols)

    for treename in sorted(trees):
        if (not treename.endswith('_prop')) and (not treename.endswith('_jaccard_')): continue

        print "exporting tree %s" % treename
        treedata   = trees[treename]
        dimentions = treedata['dimentions']
        names      = treedata['names'     ]
        matrix     = treedata['matrix'    ]
        outbase    = "%s.%s" % ( inputfile, treename )

        dissi = {}
        for name1pos in xrange(len(names)):
            name1 = names[name1pos]
            for name2pos in xrange(len(names)):
                if name2pos >= name1pos: continue
                name2       = names[name2pos]
                name        = (name1, name2)
                eman        = (name2, name1)
                val1        = matrix[name1pos][name2pos]
                val2        = matrix[name2pos][name1pos]
                dissi[name] = val1
                dissi[eman] = val2





        mycluster = upgma(dissi).balanced()
        mycluster.writeToFile(outbase + ".upgma", with_distances=True)

        with open(outbase + ".upgma.tree", "w") as upgmat:
            upgmat.write( mycluster.asciiArt() )

        myclusterden     = dendrogram.ContemporaneousDendrogram( mycluster )
        myclusterdendraw = myclusterden.drawToPDF(outbase + '.upgma.tree.pdf', 1024, 2048)

        #d = distance.LoadTree(treestring=mycluster.getNewick(with_distances=True), format='newick')
        #print dissi
        myclusterls    = least_squares.WLS(dissi).evaluateTree(mycluster)
        print "UPGMA Least Square", myclusterls
        #myclusterls    = least_square.evaluateTree(mycluster)
        #print least_square.evaluateTopology( LoadTree(treestring=mycluster.getNewick(with_distances=True), format='newick') )




        mytree = nj.nj(dissi).balanced()
        mytree.writeToFile(outbase + ".nj", with_distances=True )
        #print mytree.getNewick(with_distances=True)
        #print mytree.balanced().getNewick(with_distances=True)

        with open(outbase + ".nj.tree", "w") as njt:
            njt.write( mytree.asciiArt() )

        mytreeden     = dendrogram.ContemporaneousDendrogram( mytree )
        mytreedendraw = mytreeden.drawToPDF(outbase + '.nj.tree.pdf', 1024, 2048)

        mytreels      = least_squares.WLS(dissi).evaluateTree(mytree)
        print "NJ    Least Square", mytreels
Пример #17
0
    
    model = JC()
    #for seq in bububu:
    dists = dict()
    for i in range(len(bububu)-1):
        for j in range(i+1,len(bububu)):
            dists[(bububu[i].name,bububu[j].name)] = model.count(bububu[i],bububu[j])
             
        #model.count()
    #for i in range(len(bububu)):
    #   model.count()
    #from cogent.phylo import nj
    from cogent.phylo import nj
    #dists = {('a', 'b'): 2.7, ('c', 'b'): 2.33, ('c', 'a'): 0.73}
    njtree2 = nj.nj(dists)
    print njtree2.writeToFile('/home/puko/Desktop/science2.newick')
    #njtree2.writeToFile('/home/puko/Desktop/tree.newick')
    #print njtree2.asciiArt()
        
    #bootstrap    
    import random
    

    file = open('/home/puko/Desktop/science_bootstrap2.newick','w')
    trees = str()
    trees_list = []
    
    
    
    for x in range(0,499):
Пример #18
0
def main():
    if len(sys.argv) != 2:
        print "not enought arguments"
        sys.exit(1)

    inputfile = sys.argv[1]
    if not os.path.exists(inputfile):
        print "input file %s does not exists" % inputfile
        sys.exit(1)

    trees = {}
    with open(inputfile, 'r') as fhd:
        name = None
        for line in fhd:
            #print "line", line
            line = line.strip()
            cols = line.split("\t")

            if len(line) > 1 and len(cols) == 1:
                name = cols[0]
                print "name '%s'" % name

                for line in fhd:
                    line = line.rstrip()

                    if len(line) == 0:
                        #print "name '%s' :: empty line" % str(name)
                        name = None
                        break

                    if name is not None and name not in trees:
                        #print "name '%s' :: getting header"       % str(name)
                        #print "name '%s' :: getting header :: %s" % (str(name), line)
                        header = line
                        headerNames = header.split("\t")

                        if headerNames[0] != "":
                            #print "name '%s' :: getting header :: first not empty :: '%s'" % (str(name), line)
                            name = None
                            continue

                        dimentions = len(headerNames) - 1
                        trees[name] = {}
                        trees[name]['dimentions'] = dimentions
                        trees[name]['names'] = headerNames[1:]
                        trees[name]['matrix'] = []
                        #print "name '%s' :: getting header :: dimentions %d" % ( str(name), dimentions )

                    else:
                        if name is not None:
                            colss = line.split("\t")
                            if colss[0] not in trees[name]['names']:
                                print "name '%s' :: getting matrix :: vertical name %s not in names %s" % (
                                    str(name), str(name), str(names))
                                sys.exit(1)

                            cols = []
                            for x in colss[1:]:
                                x = float(x)
                                if x == 0: x = 0.00000001
                                cols.append(x)

                            cols = array.array('f', cols)

                            if len(cols) != dimentions:
                                print "name '%s' :: dimentions dont match %d vs %d" % (
                                    str(name), len(cols), dimentions)
                                sys.exit(1)

                            trees[name]['matrix'].append(cols)

    for treename in sorted(trees):
        if (not treename.endswith('_prop')) and (
                not treename.endswith('_jaccard_')):
            continue

        print "exporting tree %s" % treename
        treedata = trees[treename]
        dimentions = treedata['dimentions']
        names = treedata['names']
        matrix = treedata['matrix']
        outbase = "%s.%s" % (inputfile, treename)

        dissi = {}
        for name1pos in xrange(len(names)):
            name1 = names[name1pos]
            for name2pos in xrange(len(names)):
                if name2pos >= name1pos: continue
                name2 = names[name2pos]
                name = (name1, name2)
                eman = (name2, name1)
                val1 = matrix[name1pos][name2pos]
                val2 = matrix[name2pos][name1pos]
                dissi[name] = val1
                dissi[eman] = val2

        mycluster = upgma(dissi).balanced()
        mycluster.writeToFile(outbase + ".upgma", with_distances=True)

        with open(outbase + ".upgma.tree", "w") as upgmat:
            upgmat.write(mycluster.asciiArt())

        myclusterden = dendrogram.ContemporaneousDendrogram(mycluster)
        myclusterdendraw = myclusterden.drawToPDF(outbase + '.upgma.tree.pdf',
                                                  1024, 2048)

        #d = distance.LoadTree(treestring=mycluster.getNewick(with_distances=True), format='newick')
        #print dissi
        myclusterls = least_squares.WLS(dissi).evaluateTree(mycluster)
        print "UPGMA Least Square", myclusterls
        #myclusterls    = least_square.evaluateTree(mycluster)
        #print least_square.evaluateTopology( LoadTree(treestring=mycluster.getNewick(with_distances=True), format='newick') )

        mytree = nj.nj(dissi).balanced()
        mytree.writeToFile(outbase + ".nj", with_distances=True)
        #print mytree.getNewick(with_distances=True)
        #print mytree.balanced().getNewick(with_distances=True)

        with open(outbase + ".nj.tree", "w") as njt:
            njt.write(mytree.asciiArt())

        mytreeden = dendrogram.ContemporaneousDendrogram(mytree)
        mytreedendraw = mytreeden.drawToPDF(outbase + '.nj.tree.pdf', 1024,
                                            2048)

        mytreels = least_squares.WLS(dissi).evaluateTree(mytree)
        print "NJ    Least Square", mytreels
Пример #19
0
import sys
from cogent.draw import dendrogram
from cogent.phylo import nj
import utils

fn = 'data/nj_data.txt'
data = utils.load_data(fn, split_lines=True)
otus = utils.letters[:len(data)]
dists = dict()
for t1, line in zip(otus, data):
    L = line.strip().split()
    L = [float(n) for n in L]
    for t2, e in zip(otus, L):
        dists[(t1, t2)] = e

tr = nj.nj(dists)
print tr.asciiArt()
print

for n in tr.iterTips():
    print n.ancestors()[0].Name, n


def show_edge_children(node):
    children = node.iterNontips()
    for child in children:
        print node.Name,
        print child.Name,
        print node.distance(child),