def orthoUnicopyFromUnreconciledGT(nfgt, nfgtmt, outortdir, method='unreconciled', colourTree=False, verbose=False, **kw): verbose = kw.get('verbose') fam = os.path.basename(nfgt).split('.', 1)[0].split('-', 1)[0].split('_', 1)[0] if nfgtmt.lower()=='nexus': dgt = tree2.read_nexus(nfgt, treeclass="AnnotatedNode", returnDict=True, translateLabels=True, getTaxLabels=False, allLower=False) gt = dgt['tree']['con_all_compat'] else: gt = tree2.Node(file=nfgt) gt.reRoot_max_tree_balance if verbose: print "\n# unicopy_ogs:\n" unicopy_ogs, notrelevant, dlabs = getOrthologues(gt, method='unicopy', noNodeAnnot=True, **kw) # ouput nfoutrad = os.path.join(outortdir, method, "%s_%s"%(fam, method)) writeOrthologs(nfoutrad, 'unicopy', unicopy_ogs, dlabs, colourTree, gt, ltreenames=["tree_0"], figtree=True)
class PartialMoranProcess(BaseMoranProcess, SingleTreeModel): """Follownig description of model C in Hey, J. 1992. Using Phylogenetic Trees to Study Speciation and Extinction. Evolution, 46(3), 1992, pp. 627-640 Ideal model is that on the tree, time between two speciation/extinction (i.e. birth/death [B/D]) events is exponentially distibuted length (with rate param B). This can be seens as a branch growing of an extra length (exponentially distibuted with rate param B), at the end of which a Birth event occur; a Death event occurs simultaneously in another lineage. Dificult to implement in a simulation process as, focusing on a branch at each simulation time step, the extinction (Death) event has to happen on a non-yet grown portion of another branch, which could have been speciating in the meantime. Rather do the following: According to Hey (1992), growth of all branches occur at each simulation step, of a length following a exponential decay process function of the time elapsed (so added length is gradually shorter, accounting for the growing breadth of the tree, if one consider together theextant and extinct lineages). Conjugated birth and death events then occur simultaneously on a randomly selected pair of branches. Assumes a population of size N original species parallely evolving (resuting in several unconected trees, from which only one tree will eventually prevail, or be sampled. In this class, a single tree can be simulated (assuming it will be the one prevailing), allowing the B/D events to occur in lineages out of the tree, i.e. only a fraction ni/N of events will occur on the tree, with ni the number of extant lineages at ti. !!! While this gives a tree equivalent to one sampled from a tree population from a Moran process, the simulations will differ in that time slices from a PartialMoranProcess will have various (exponentionally distibuted) lengths, whereas time slices from a (full) MoranProcess will have constant length. When using the simulated tree as a reference for a gene tree simulation, e.g. with BirthDeathDTLModel, this will impact the rate of DTL events per reference tree branches, as events have constant rate per time slice. """ def __init__(self, **kwargs): print 'invoke models.PartialMoranProcess.__init__()' print 'kwargs:', kwargs super(PartialMoranProcess, self).__init__(**kwargs) self.dummynode = tree2.Node() def newlen(self, t): # growth at ti follow expontial law of parameter b*i*(i+1), with compound parameter b = B b = float(self.rate) / (self.popsize - 1) l = exponential(1 / (b * t * (t + 1))) * self.tunit return l # generic place holder nodes for filling up event record dictionaries dummynode = tree2.Node() dummynode.edit_label('out')
def __init__(self, **kwargs): print 'invoke models.PartialMoranProcess.__init__()' print 'kwargs:', kwargs super(PartialMoranProcess, self).__init__(**kwargs) self.dummynode = tree2.Node()
'restrict-to-clade=', 'help' ]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" print usage() sys.exit(2) dopt = dict(opts) if ('-h' in dopt) or ('--help' in dopt): print usage() sys.exit(0) if '--reftree' in dopt: nfreftreelen = dopt['--reftree'] reftreelen = tree2.Node(fic=nfreftreelen) print "Using branch lengths from reference tree '%s'" % (nfreftreelen) else: reftreelen = None if '--maxrecgt' in dopt: maxrecgt = int(dopt['--maxrecgt']) else: maxrecgt = 1 sgsep = dopt.get('--species-gene-separator', '_') restrictclade = dopt.get('--restrict-to-clade') lnfrec = args if len(lnfrec) < 1: raise ValueError, "need at least one argument (file path[s])" for nfrec in lnfrec: main(nfrec, reftreelen, maxrecgt, sgsep, restrictclade)
foutgttable = open("%s/bipart_intrees.tab"%(outdir), 'w') foutbipartPP = open("%s/bipart_PostProbs.tab"%(outdir), 'w') foutbipartclust = open("%s/bipart_clusters.tab"%(outdir), 'w') foutgenelist = open("%s/screened_gene_list.txt"%(outdir), 'w') foutgttable.write('\t'.join(['bipart', 'gene_label', 'node_label', 'smallerclade_size', 'branch_support', 'branch_length', 'rel_branch_length', 'subtree_length', 'rel_subtree_length'])+'\n') excludedgenes = [] for ngene in lngenes: if ngene in dbipartintrees: continue print ngene lnfgenebayesresults = getFileNameFromPat(lnfbayesresults, ngene) # get taxon corespondence of taxonomic profile an consensus gene tree nfconstree = getFileNameFromPat(lnfgenebayesresults, '.con.tre', nbmatch=1)[0] if '.nwk' in nfconstree: genetree = tree2.Node(file="%s/%s"%(dirbayesresults, nfconstree), returnDict=True, allLower=False, leafNamesAsNum=True) # emuate nexus if not ltaxall: dnexconstree = {'taxlabels':genetree.get_leaf_labels()} else: dnexconstree = {'taxlabels':ltaxall} # correct branch support scale if detects than higher than 1.0 (typically up to 100) lbs = [node.bs() for node in genetree if (node.bs() is not None)] if lbs and max(lbs)> 1: # correct scale for node in genetree: if node.bs() is not None: node.set_bs(node.bs()/100) else: dnexconstree = tree2.read_nexus("%s/%s"%(dirbayesresults, nfconstree), returnDict=True, allLower=False) genetree = dnexconstree['tree']['con_50_majrule']
#!/usr/bin/python2.7 # -*- coding: utf-8 -*- import tree2 import sys nftreein = sys.argv[1] nfref = sys.argv[2] nfout = sys.argv[3] t = tree2.Node(file=nftreein, unrooted=True) with open(nfref, 'r') as fref: for line in fref: lsp = line.rstrip('\n').split('\t') ass = lsp[0] code = lsp[1] ass += '__' + code print code, ass node = t[code] if node: t[code].edit_label(ass) print ass else: print "could not find %s in tree" % code t.write_newick(nfout)
def main(nfrec, nfreftree, nfgenetree, maxrecgt=1, recformat='tera', sgsep='_', phylofact=1000.0, restrictclade=None, verbose=False, **kw): try: genetree = tree2.Node(file=nfgenetree, namesAsNum=True) except ValueError: genetree = tree2.Node(file=nfgenetree, namesAsNum=True, branch_lengths=False) reftree = tree2.AnnotatedNode(file=nfreftree, namesAsNum=True) if restrictclade: st = reftree.restrictToLeaves(restrictclade) else: st = reftree # check presence of outgroup/dead lineage branch if necessary if recformat == 'tera': if not (kw.get('noDeadStories') or (deadlabnum in st.get_leaf_labels())): if (outtaxlab in st.get_leaf_labels()): # must adapt mowgli-compliant species tree st[outtaxlab].edit_label(deadlabnum) else: maxd = reftree.max_leaf_distance() outgroup = tree2.AnnotatedNode(lleaves=[deadlabnum]) outgroup.get_children()[0].set_lg(maxd * 3) outgroup.link_child(reftree, newlen=maxd * 2) reftree = outgroup reftree.complete_internal_labels(prefix='') # else: # raise ValueError, "the provided species tree should feature a branch labaelled 'OUTGROUP' or '-1' to represent the dead/unsampled lineages" elif recformat == 'mowgli': if not (outtaxlab in st.get_leaf_labels()): if (deadlabnum in st.get_leaf_labels()): # must adapt mowgli-compliant species tree st[deadlabnum].edit_label(outtaxlab) else: outgroup = tree2.AnnotatedNode(lleaves=[outtaxlab]) outgroup.get_children()[0].set_lg(maxd * 3) outgroup.link_child(reftree, newlen=maxd * 2) reftree = outgroup reftree.complete_internal_labels(prefix='') # else: # raise ValueError, "the provided species tree should feature a branch labaelled 'OUTGROUP' or '-1' to represent the dead/unsampled lineages" for i, rec in enumerate( parseTERARecFile(nfrec, genetree=genetree, recformat=recformat, sgsep=sgsep, verbose=verbose, **kw)): dnodefreq, dlevt = rec # write SVG species tree tag = '_no_dead' if kw.get('noDeadStories') else '' nfoutspe = '%s_%d_maprec2spetree%s.svg' % (nfrec, i, tag) lleaffreq = [(lab, f) for lab, f in dnodefreq.items() if st[lab].is_leaf()] st.writeSvgTree(nfoutspe, padleaves=True, supports=False, phylofact=phylofact, branchwidths=dnodefreq, textorbit=5, \ treetype='species', transfers=dlevt['T'], duplications=dlevt['D'], losses=dlevt['L'], counts=lleaffreq, \ transferwidth='freq', modstyle="stroke-width:1; ", padstyle="stroke:red; stroke-width:0.5; stroke-dasharray:1,1; ") # transfercolor='green', print os.path.basename(nfoutspe)
nfoutgenelist = '%s/%s_gene_list' % (dirout, os.path.basename(dirout)) with open(nfoutgenelist, 'w') as foutgenelist: foutgenelist.write( '\n'.join([os.path.basename(pngt).split('.')[0] for pngt in lngt]) + '\n') if verbose: print '\nrelvarthresh: %g\tminseqwithin: %d\nfixed number of tree cuts: %d' % ( rvt, msw, fnc) outdir = '%s/list_genotypes_rvt%g_msw%d_fnc%d' % (dirout, rvt, msw, fnc) if not os.path.exists(outdir): os.makedirs(outdir) for pngt in lngt: ngt = os.path.basename(pngt) if verbose: print ngt ng = ngt.split('.')[0] gt = tree2.Node(fic=pngt, leafNamesAsNum=True) gt.complete_internal_labels() agt = gt.prune_genotypes(relvarthresh=rvt, minseqwithin=msw, minvarwithin=1e-5, minbs=0.8, returnLabels=True, fixnbcut=fnc, silent=(not verbose)) nfoutlist = '%s/%s.geno_labels' % (outdir, ng) with open(nfoutlist, 'w') as foutlist: for geno in agt: if splitlastunderscore > 0: # split the gene name at the n-th rightmost '_' and keep the left part genos = [g.rsplit('_', splitlastunderscore)[0] for g in geno] else: