示例#1
0
def orthoUnicopyFromUnreconciledGT(nfgt, nfgtmt, outortdir, method='unreconciled', colourTree=False, verbose=False, **kw):
	verbose = kw.get('verbose')
	fam = os.path.basename(nfgt).split('.', 1)[0].split('-', 1)[0].split('_', 1)[0]
	if nfgtmt.lower()=='nexus':
		dgt = tree2.read_nexus(nfgt, treeclass="AnnotatedNode", returnDict=True, translateLabels=True, getTaxLabels=False, allLower=False)
		gt = dgt['tree']['con_all_compat']
	else:
		gt = tree2.Node(file=nfgt)
	gt.reRoot_max_tree_balance
	if verbose: print "\n# unicopy_ogs:\n"
	unicopy_ogs, notrelevant, dlabs = getOrthologues(gt, method='unicopy', noNodeAnnot=True, **kw)
	# ouput
	nfoutrad = os.path.join(outortdir, method, "%s_%s"%(fam, method))
	writeOrthologs(nfoutrad, 'unicopy', unicopy_ogs, dlabs, colourTree, gt, ltreenames=["tree_0"], figtree=True)
示例#2
0
class PartialMoranProcess(BaseMoranProcess, SingleTreeModel):
    """Follownig description of model C in Hey, J. 1992. Using Phylogenetic Trees to Study Speciation and Extinction. Evolution, 46(3), 1992, pp. 627-640
	
	Ideal model is that on the tree, time between two speciation/extinction (i.e. birth/death [B/D]) events is exponentially distibuted length (with rate param B). 
	This can be seens as a branch growing of an extra length (exponentially distibuted with rate param B), at the end of which a Birth event occur; 
	a Death event occurs simultaneously in another lineage. Dificult to implement in a simulation process as, focusing on a branch at each simulation time step, 
	the extinction (Death) event has to happen on a non-yet grown portion of another branch, which could have been speciating in the meantime. Rather do the following:
	
	According to Hey (1992), growth of all branches occur at each simulation step, of a length following a exponential decay process function of the time elapsed 
	(so added length is gradually shorter, accounting for the growing breadth of the tree, if one consider together theextant and extinct lineages). 
	Conjugated birth and death events then occur simultaneously on a randomly selected pair of branches. Assumes a population of size N original species parallely 
	evolving (resuting in several unconected trees, from which only one tree will eventually prevail, or be sampled.
	
	In this class, a single tree can be simulated (assuming it will be the one prevailing), allowing the B/D events to occur in lineages out of the tree, 
	i.e. only a fraction ni/N of events will occur on the tree, with ni the number of extant lineages at ti.
	
	!!! While this gives a tree equivalent to one sampled from a tree population from a Moran process, the simulations will differ in that 
	time slices from a PartialMoranProcess will have various (exponentionally distibuted) lengths, whereas time slices from a (full) MoranProcess will have constant length. 
	When using the simulated tree as a reference for a gene tree simulation, e.g. with BirthDeathDTLModel, this will impact the rate of DTL events per reference tree branches, 
	as events have constant rate per time slice.
	"""
    def __init__(self, **kwargs):
        print 'invoke models.PartialMoranProcess.__init__()'
        print 'kwargs:', kwargs
        super(PartialMoranProcess, self).__init__(**kwargs)
        self.dummynode = tree2.Node()

    def newlen(self, t):
        # growth at ti follow expontial law of parameter b*i*(i+1), with compound parameter b = B
        b = float(self.rate) / (self.popsize - 1)
        l = exponential(1 / (b * t * (t + 1))) * self.tunit
        return l

    # generic place holder nodes for filling up event record dictionaries
    dummynode = tree2.Node()
    dummynode.edit_label('out')
示例#3
0
 def __init__(self, **kwargs):
     print 'invoke models.PartialMoranProcess.__init__()'
     print 'kwargs:', kwargs
     super(PartialMoranProcess, self).__init__(**kwargs)
     self.dummynode = tree2.Node()
示例#4
0
            'restrict-to-clade=', 'help'
        ])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        print usage()
        sys.exit(2)

    dopt = dict(opts)
    if ('-h' in dopt) or ('--help' in dopt):
        print usage()
        sys.exit(0)

    if '--reftree' in dopt:
        nfreftreelen = dopt['--reftree']
        reftreelen = tree2.Node(fic=nfreftreelen)
        print "Using branch lengths from reference tree '%s'" % (nfreftreelen)
    else:
        reftreelen = None
    if '--maxrecgt' in dopt: maxrecgt = int(dopt['--maxrecgt'])
    else: maxrecgt = 1
    sgsep = dopt.get('--species-gene-separator', '_')

    restrictclade = dopt.get('--restrict-to-clade')

    lnfrec = args
    if len(lnfrec) < 1:
        raise ValueError, "need at least one argument (file path[s])"

    for nfrec in lnfrec:
        main(nfrec, reftreelen, maxrecgt, sgsep, restrictclade)
	foutgttable = open("%s/bipart_intrees.tab"%(outdir), 'w')
	foutbipartPP = open("%s/bipart_PostProbs.tab"%(outdir), 'w')
	foutbipartclust = open("%s/bipart_clusters.tab"%(outdir), 'w')
	foutgenelist = open("%s/screened_gene_list.txt"%(outdir), 'w')

	foutgttable.write('\t'.join(['bipart', 'gene_label', 'node_label', 'smallerclade_size', 'branch_support', 'branch_length', 'rel_branch_length', 'subtree_length', 'rel_subtree_length'])+'\n')

	excludedgenes = []
	for ngene in lngenes:
		if ngene in dbipartintrees: continue
		print ngene
		lnfgenebayesresults = getFileNameFromPat(lnfbayesresults, ngene)
		# get taxon corespondence of taxonomic profile an consensus gene tree
		nfconstree = getFileNameFromPat(lnfgenebayesresults, '.con.tre', nbmatch=1)[0]
		if '.nwk' in nfconstree:
			genetree = tree2.Node(file="%s/%s"%(dirbayesresults, nfconstree), returnDict=True, allLower=False, leafNamesAsNum=True)
			# emuate nexus
			if not ltaxall:
				dnexconstree = {'taxlabels':genetree.get_leaf_labels()}
			else:
				dnexconstree = {'taxlabels':ltaxall}
			# correct branch support scale if detects than higher than 1.0 (typically up to 100)
			lbs = [node.bs() for node in genetree if (node.bs() is not None)]
			if lbs and max(lbs)> 1:
				# correct scale
				for node in genetree:
					if node.bs() is not None:
						node.set_bs(node.bs()/100)
		else:
			dnexconstree = tree2.read_nexus("%s/%s"%(dirbayesresults, nfconstree), returnDict=True, allLower=False)
			genetree = dnexconstree['tree']['con_50_majrule']
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-

import tree2
import sys

nftreein = sys.argv[1]
nfref = sys.argv[2]
nfout = sys.argv[3]

t = tree2.Node(file=nftreein, unrooted=True)

with open(nfref, 'r') as fref:
    for line in fref:
        lsp = line.rstrip('\n').split('\t')
        ass = lsp[0]
        code = lsp[1]
        ass += '__' + code
        print code, ass
        node = t[code]
        if node:
            t[code].edit_label(ass)
            print ass
        else:
            print "could not find %s in tree" % code

t.write_newick(nfout)
示例#7
0
def main(nfrec,
         nfreftree,
         nfgenetree,
         maxrecgt=1,
         recformat='tera',
         sgsep='_',
         phylofact=1000.0,
         restrictclade=None,
         verbose=False,
         **kw):
    try:
        genetree = tree2.Node(file=nfgenetree, namesAsNum=True)
    except ValueError:
        genetree = tree2.Node(file=nfgenetree,
                              namesAsNum=True,
                              branch_lengths=False)
    reftree = tree2.AnnotatedNode(file=nfreftree, namesAsNum=True)
    if restrictclade: st = reftree.restrictToLeaves(restrictclade)
    else: st = reftree
    # check presence of outgroup/dead lineage branch if necessary
    if recformat == 'tera':
        if not (kw.get('noDeadStories') or
                (deadlabnum in st.get_leaf_labels())):
            if (outtaxlab in st.get_leaf_labels()):
                # must adapt mowgli-compliant species tree
                st[outtaxlab].edit_label(deadlabnum)
            else:
                maxd = reftree.max_leaf_distance()
                outgroup = tree2.AnnotatedNode(lleaves=[deadlabnum])
                outgroup.get_children()[0].set_lg(maxd * 3)
                outgroup.link_child(reftree, newlen=maxd * 2)
                reftree = outgroup
                reftree.complete_internal_labels(prefix='')
#			else:
#				raise ValueError, "the provided species tree should feature a branch labaelled 'OUTGROUP' or '-1' to represent the dead/unsampled lineages"
    elif recformat == 'mowgli':
        if not (outtaxlab in st.get_leaf_labels()):
            if (deadlabnum in st.get_leaf_labels()):
                # must adapt mowgli-compliant species tree
                st[deadlabnum].edit_label(outtaxlab)
            else:
                outgroup = tree2.AnnotatedNode(lleaves=[outtaxlab])
                outgroup.get_children()[0].set_lg(maxd * 3)
                outgroup.link_child(reftree, newlen=maxd * 2)
                reftree = outgroup
                reftree.complete_internal_labels(prefix='')


#			else:
#				raise ValueError, "the provided species tree should feature a branch labaelled 'OUTGROUP' or '-1' to represent the dead/unsampled lineages"
    for i, rec in enumerate(
            parseTERARecFile(nfrec,
                             genetree=genetree,
                             recformat=recformat,
                             sgsep=sgsep,
                             verbose=verbose,
                             **kw)):
        dnodefreq, dlevt = rec
        # write SVG species tree
        tag = '_no_dead' if kw.get('noDeadStories') else ''
        nfoutspe = '%s_%d_maprec2spetree%s.svg' % (nfrec, i, tag)
        lleaffreq = [(lab, f) for lab, f in dnodefreq.items()
                     if st[lab].is_leaf()]
        st.writeSvgTree(nfoutspe, padleaves=True, supports=False, phylofact=phylofact, branchwidths=dnodefreq, textorbit=5, \
         treetype='species', transfers=dlevt['T'], duplications=dlevt['D'], losses=dlevt['L'], counts=lleaffreq, \
         transferwidth='freq', modstyle="stroke-width:1; ", padstyle="stroke:red; stroke-width:0.5; stroke-dasharray:1,1; ")
        # transfercolor='green',
        print os.path.basename(nfoutspe)
示例#8
0
nfoutgenelist = '%s/%s_gene_list' % (dirout, os.path.basename(dirout))
with open(nfoutgenelist, 'w') as foutgenelist:
    foutgenelist.write(
        '\n'.join([os.path.basename(pngt).split('.')[0]
                   for pngt in lngt]) + '\n')

if verbose:
    print '\nrelvarthresh: %g\tminseqwithin: %d\nfixed number of tree cuts: %d' % (
        rvt, msw, fnc)
outdir = '%s/list_genotypes_rvt%g_msw%d_fnc%d' % (dirout, rvt, msw, fnc)
if not os.path.exists(outdir): os.makedirs(outdir)
for pngt in lngt:
    ngt = os.path.basename(pngt)
    if verbose: print ngt
    ng = ngt.split('.')[0]
    gt = tree2.Node(fic=pngt, leafNamesAsNum=True)
    gt.complete_internal_labels()
    agt = gt.prune_genotypes(relvarthresh=rvt,
                             minseqwithin=msw,
                             minvarwithin=1e-5,
                             minbs=0.8,
                             returnLabels=True,
                             fixnbcut=fnc,
                             silent=(not verbose))
    nfoutlist = '%s/%s.geno_labels' % (outdir, ng)
    with open(nfoutlist, 'w') as foutlist:
        for geno in agt:
            if splitlastunderscore > 0:
                # split the gene name at the n-th rightmost '_' and keep the left part
                genos = [g.rsplit('_', splitlastunderscore)[0] for g in geno]
            else: