def _test_ml_speed(self): # params bgfreq = [.258,.267,.266,.209] kappa = 1.59 # data tree = treelib.readTree("test/data/flies.nt/0/0.tree") align = fasta.readFasta("test/data/flies.nt/0/0.align") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) util.tic("find ML") for i in xrange(10): l = spidir.find_ml_branch_lengths_hky( tree, util.mget(align, tree.leafNames()), bgfreq, kappa, maxiter=10) util.toc() dists.append([n.dist for n in nodes]) likes.append(l)
def test_ml_large(self): """Test ML code""" # params bgfreq = [.258,.267,.266,.209] kappa = 1.59 # data tree = treelib.readTree("test/data/verts/19520/19520.ensembl.tree") align = fasta.readFasta("test/data/verts/19520/19520.nt.mfa") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) l = spidir.calc_seq_likelihood_hky(tree, align, bgfreq, kappa) print l self.assert_(l != -util.INF) l = spidir.find_ml_branch_lengths_hky( tree, util.mget(align, tree.leafNames()), bgfreq, kappa, parsinit=False, maxiter=1) print l self.assert_(l != -util.INF)
def test_ml_large(self): """Test ML code""" # params bgfreq = [.258, .267, .266, .209] kappa = 1.59 # data tree = treelib.readTree("test/data/verts/19520/19520.ensembl.tree") align = fasta.readFasta("test/data/verts/19520/19520.nt.mfa") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) l = spidir.calc_seq_likelihood_hky(tree, align, bgfreq, kappa) print l self.assert_(l != -util.INF) l = spidir.find_ml_branch_lengths_hky(tree, util.mget( align, tree.leafNames()), bgfreq, kappa, parsinit=False, maxiter=1) print l self.assert_(l != -util.INF)
def _test_ml_speed(self): # params bgfreq = [.258, .267, .266, .209] kappa = 1.59 # data tree = treelib.readTree("test/data/flies.nt/0/0.tree") align = fasta.readFasta("test/data/flies.nt/0/0.align") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) util.tic("find ML") for i in xrange(10): l = spidir.find_ml_branch_lengths_hky(tree, util.mget( align, tree.leafNames()), bgfreq, kappa, maxiter=10) util.toc() dists.append([n.dist for n in nodes]) likes.append(l)
def _test_ml(self): """Test ML code""" # params bgfreq = [.258, .267, .266, .209] kappa = 1.59 # data tree = treelib.readTree("test/data/flies.nt/0/0.tree") align = fasta.readFasta("test/data/flies.nt/0/0.align") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) util.tic("find ML") for i in range(40): l = spidir.find_ml_branch_lengths_hky(tree, util.mget( align, tree.leafNames()), bgfreq, kappa, parsinit=False, maxiter=1) dists.append([n.dist for n in nodes]) likes.append(l) util.toc() print likes prep_dir("test/output/ml/") # distances plot util.rplot_start("test/output/ml/ml_branches.pdf") util.rplot("plot", util.cget(dists, 0), ylim=[0, max(dists[0])], t="l", main="branch length convergence", xlab="iterations", ylab="branch lengths (sub/site)") for d in zip(*dists): util.rplot("lines", d) util.rplot_end(True) print util.cget(dists, 4) # likelihood plot util.rplot_start("test/output/ml/ml_likelihood.pdf") util.rplot("plot", likes, t="l", xlab="iterations", ylab="log likelihood", main="likelihood convergence") util.rplot_end(True)
def _test_ml(self): """Test ML code""" # params bgfreq = [.258,.267,.266,.209] kappa = 1.59 # data tree = treelib.readTree("test/data/flies.nt/0/0.tree") align = fasta.readFasta("test/data/flies.nt/0/0.align") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) util.tic("find ML") for i in range(40): l = spidir.find_ml_branch_lengths_hky( tree, util.mget(align, tree.leafNames()), bgfreq, kappa, parsinit=False, maxiter=1) dists.append([n.dist for n in nodes]) likes.append(l) util.toc() print likes prep_dir("test/output/ml/") # distances plot util.rplot_start("test/output/ml/ml_branches.pdf") util.rplot("plot", util.cget(dists, 0), ylim=[0, max(dists[0])], t="l", main="branch length convergence", xlab="iterations", ylab="branch lengths (sub/site)") for d in zip(* dists): util.rplot("lines", d) util.rplot_end(True) print util.cget(dists, 4) # likelihood plot util.rplot_start("test/output/ml/ml_likelihood.pdf") util.rplot("plot", likes, t="l", xlab="iterations", ylab="log likelihood", main="likelihood convergence") util.rplot_end(True)
def readOptions(conf): """Setup data paths and parse common options""" # read species map if "smap" in conf: conf["gene2species"] = readGene2species(*conf["smap"]) else: conf["gene2species"] = gene2species if "stree" in conf: conf["stree"] = treelib.readTree(conf["stree"])
def gene2speciesArray(tree, stree, gene2species): ptree, nodes, nodelookup = Spidir.makePtree(tree) sptree, snodes, snodelookup = Spidir.makePtree(stree) gene2speciesarray = [] for node in nodes: if node.isLeaf(): gene2speciesarray.append(snodelookup[ stree.nodes[gene2species(node.name)]]) else: gene2speciesarray.append(-1) return gene2speciesarray tree = treelib.readTree("../data/0.nt.tree") tree = treelib.parseNewick("((dmoj_sim,(dvir_sim,dgri_sim)),(dwil_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));") tree = treelib.parseNewick("((dwil_sim,(dvir_sim,dgri_sim)),(dmoj_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));") stree = treelib.readTree("../data/flies.stree") gene2species = genomeutil.readGene2species("../data/flies.smap") params = Spidir.readParams("../data/flies.nt.param") ptree, nodes, nodelookup = Spidir.makePtree(tree) pstree, snodes, snodelookup = Spidir.makePtree(stree) g2s = gene2speciesArray(tree, stree, gene2species) mu = [float(params[snode.name][0]) for snode in snodes] sigma = [float(params[snode.name][1]) for snode in snodes] alpha = float(params['baserate'][0]) beta = float(params['baserate'][1])
def gene2speciesArray(tree, stree, gene2species): ptree, nodes, nodelookup = Spidir.makePtree(tree) sptree, snodes, snodelookup = Spidir.makePtree(stree) gene2speciesarray = [] for node in nodes: if node.isLeaf(): gene2speciesarray.append(snodelookup[stree.nodes[gene2species( node.name)]]) else: gene2speciesarray.append(-1) return gene2speciesarray tree = treelib.readTree("../data/0.nt.tree") tree = treelib.parseNewick( "((dmoj_sim,(dvir_sim,dgri_sim)),(dwil_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));" ) tree = treelib.parseNewick( "((dwil_sim,(dvir_sim,dgri_sim)),(dmoj_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));" ) stree = treelib.readTree("../data/flies.stree") gene2species = genomeutil.readGene2species("../data/flies.smap") params = Spidir.readParams("../data/flies.nt.param") ptree, nodes, nodelookup = Spidir.makePtree(tree) pstree, snodes, snodelookup = Spidir.makePtree(stree) g2s = gene2speciesArray(tree, stree, gene2species) mu = [float(params[snode.name][0]) for snode in snodes]