def spidir(conf, distmat, labels, stree, gene2species, params): """Main function for the SPIDIR algorithm""" setDebug(conf["debug"]) if isDebug(DEBUG_HIGH) and pyspidir: pyspidir.set_log(3, "") if "out" in conf: # create debug table conf["debugtab_file"] = file(conf["out"] + ".debug.tab", "w") debugtab = tablelib.Table(headers=["correct", "logl", "treelen", "baserate", "error", "errorlogl", "eventlogl", "tree", "topology", "species_hash"], types={"correct": bool, "logl": float, "treelen": float, "baserate": float, "error": float, "errorlogl": float, "eventlogl": float, "tree": str, "topology": str, "species_hash": str}) debugtab.writeHeader(conf["debugtab_file"]) conf["debugtab"] = debugtab else: conf["debugfile"] = None trees = [] logls = [] tree = None visited = {} util.tic("SPIDIR") # do auto searches for search in conf["search"]: util.tic("Search by %s" % search) if search == "greedy": tree, logl = Search.searchGreedy(conf, distmat, labels, stree, gene2species, params, visited=visited) elif search == "mcmc": tree, logl = Search.searchMCMC(conf, distmat, labels, stree, gene2species, params, initTree=tree, visited=visited) elif search == "regraft": tree, logl = Search.searchRegraft(conf, distmat, labels, stree, gene2species, params, initTree=tree, visited=visited, proposeFunc=Search.proposeTree3) elif search == "exhaustive": if tree == None: tree = phylo.neighborjoin(distmat, labels) tree = phylo.recon_root(tree, stree, gene2species) tree, logl = Search.searchExhaustive(conf, distmat, labels, tree, stree, gene2species, params, depth=conf["depth"], visited=visited) elif search == "hillclimb": tree, logl = Search.searchHillClimb(conf, distmat, labels, stree, gene2species, params, initTree=tree, visited=visited) elif search == "none": break else: raise SindirError("unknown search '%s'" % search) util.toc() Search.printMCMC(conf, "N/A", tree, stree, gene2species, visited) printVisitedTrees(visited) def evalUserTree(tree): setTreeDistances(conf, tree, distmat, labels) logl = treeLogLikelihood(conf, tree, stree, gene2species, params) thash = phylo.hash_tree(tree) if thash in visited: a, b, count = visited[thash] else: count = 0 visited[thash] = [logl, tree.copy(), count+1] if isDebug(DEBUG_LOW): debug("\nuser given tree:") recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) drawTreeLogl(tree, events=events) # eval the user given trees for treefile in conf["tree"]: tree = treelib.read_tree(treefile) evalUserTree(tree) for topfile in conf["tops"]: infile = file(topfile) strees = [] while True: try: strees.append(treelib.read_tree(infile)) except: break print len(strees) for top in strees: tree = phylo.stree2gtree(top, labels, gene2species) evalUserTree(tree) if len(conf["tops"]) > 0: printVisitedTrees(visited) # eval correcttree for debug only if "correcttree" in conf: tree = conf["correcttree"] setTreeDistances(conf, tree, distmat, labels) logl = treeLogLikelihood(conf, tree, stree, gene2species, params) if isDebug(DEBUG_LOW): debug("\ncorrect tree:") recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) drawTreeLogl(tree, events=events) util.toc() if len(visited) == 0: raise SindirError("No search or tree topologies given") if "correcthash" in conf: if conf["correcthash"] in visited: debug("SEARCH: visited correct tree") else: debug("SEARCH: NEVER saw correct tree") # return ML tree trees = [x[1] for x in visited.itervalues()] i = util.argmax([x.data["logl"] for x in trees]) return trees[i], trees[i].data["logl"]