Exemplo n.º 1
0
def setTreeDistances(conf, tree, distmat, genes):
    if isDebug(DEBUG_MED):
        util.tic("fit branch lengths")
    
    if pyspidir and "parsimony" in conf:
        # estimate branch lengths with parsimony
        parsimony_C(conf["aln"], tree)
        tree.data["error"] = sum(node.dist 
                                 for node in tree.nodes.itervalues())
    
    elif pyspidir and "mlhkydist" in conf:
        # estimate branch lengths with ML
        logl = mlhkydist_C(conf["aln"], tree, conf["bgfreq"], conf["tsvratio"], 
                           3*len(tree.nodes))
        tree.data["distlogl"] = logl
        tree.data["error"] = 0.0
    else:
        # perform LSE
        lse = phylo.least_square_error(tree, distmat, genes)

        # catch unusual case that may occur in greedy search
        if sum(x.dist for x in tree.nodes.values()) == 0:
            for node in tree.nodes.values():
                node.dist = .01

        tree.data["error"] = math.sqrt(scipy.dot(lse.resids, lse.resids)) / \
                                       sum(x.dist for x in tree.nodes.values())

        setBranchError(conf, tree, lse.resids, lse.paths, lse.edges, lse.topmat)
        
    if isDebug(DEBUG_MED):
        util.toc()
Exemplo n.º 2
0
def sample_thread(arg, seqs, rho=1.5e-8, mu=2.5e-8, popsize=1e4,
                  times=None, ntimes=20, maxtime=200000, verbose=False):

    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=maxtime, delta=.01)
    popsizes = [popsize] * len(times)

    if verbose:
        util.tic("sample thread")

    trees, names = arg2ctrees(arg, times)

    seqs2 = [seqs[name] for name in names]

    new_name = [x for x in seqs.keys() if x not in names][0]
    names.append(new_name)
    seqs2.append(seqs[new_name])
    seqlen = len(seqs2[0])

    trees = argweaver_sample_thread(
        trees, times, len(times),
        popsizes, rho, mu,
        (C.c_char_p * len(seqs2))(*seqs2), len(seqs2), seqlen, None)
    arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Exemplo n.º 3
0
def dnadist(seqs, output=None, verbose=True, force=False, args=None):
    if args == None:
        args = "y"

    validate_seqs(seqs)
    cwd = create_temp_dir()
    util.tic("dnadist on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)

    # run phylip
    exec_phylip("dnadist", args, verbose)

    util.toc()

    # parse output
    if output != None:
        os.rename("outfile", "../" + output)
        cleanup_temp_dir(cwd)
        return labels
    else:
        name, mat = read_dist_matrix("outfile")
        cleanup_temp_dir(cwd)
        return labels, mat
Exemplo n.º 4
0
    def addEvents(self, eventsfile):

        if not tableExists(self.cur, "Events"):
            self.makeEventsTable()

        util.tic("add events")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")

        self.cur.execute("SELECT famid FROM Families;")
        famids = [x[0] for x in self.cur]

        for famid in famids:
            if famid not in events_lookup:
                continue
            row = events_lookup[famid]

            for sp in self.stree.nodes:
                sp = str(sp)

                self.cur.execute(
                    """INSERT INTO Events VALUES 
                                ("%s", "%s", %d, %d, %d, %d);""" %
                    (famid, sp, row[sp + "-genes"], row[sp + "-dup"],
                     row[sp + "-loss"], row[sp + "-appear"]))
        util.toc()
Exemplo n.º 5
0
    def addFamilies(self, eventsfile, discard=[]):

        if not tableExists(self.cur, "Families"):
            self.makeFamiliesTable()

        util.tic("add families")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")
        familyGeneNames = self.makeFamilyGeneNames()
        discard = set(discard)

        for row in events_tab:
            famid = row["partid"]
            if famid in discard:
                util.logger("discarding '%s'" % famid)
                continue

            tree = treelib.read_tree(self.getTreeFile(famid))
            treelen = sum(x.dist for x in tree)
            seqs = fasta.read_fasta(self.getFastaFile(famid))
            seqlen = stats.median(map(len, seqs.values()))

            self.cur.execute(
                """INSERT INTO Families VALUES 
                                ("%s", "%s", %f, %f, %f, %d, %d, %d,
                                 "%s");""" %
                (row["partid"], familyGeneNames.get(row["partid"],
                                                    ("", ""))[0],
                 row["famrate"], treelen, seqlen * 3, row["dup"], row["loss"],
                 row["genes"], familyGeneNames.get(row["partid"],
                                                   ("", ""))[1]))
        util.toc()
    def addEvents(self, eventsfile):

        if not tableExists(self.cur, "Events"):
            self.makeEventsTable()

        util.tic("add events")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")

        self.cur.execute("SELECT famid FROM Families;")
        famids = [x[0] for x in self.cur]

        for famid in famids:
            if famid not in events_lookup:
                continue
            row = events_lookup[famid]

            for sp in self.stree.nodes:
                sp = str(sp)

                self.cur.execute(
                    """INSERT INTO Events VALUES
                    ("%s", "%s", %d, %d, %d, %d);""" %
                    (famid, sp,
                     row[sp+"-genes"],
                     row[sp+"-dup"],
                     row[sp+"-loss"],
                     row[sp+"-appear"]))
        util.toc()
    def addGoTerms(self, gofile):

        if not tableExists(self.cur, "GoTerms"):
            self.makeGoTermsTable()

        util.tic("add go terms")
        goterms = tablelib.read_table(gofile)
        goterms_lookup = goterms.groupby("orf")
        goterms_bygoid = goterms.groupby("goid")

        for goterm in goterms_bygoid:
            term = goterms_bygoid[goterm][0]

            if '"' in term["term"]:
                print term

            self.cur.execute("""INSERT INTO GoTerms VALUES ("%s", "%s")""" %
                             (term["goid"], term["term"]))

        for gene, terms in goterms_lookup.iteritems():
            for term in terms:
                self.cur.execute(
                    """INSERT INTO GeneGoTerms VALUES ("%s", "%s");""" %
                    (gene, term["goid"]))
        util.toc()
    def addFamilies(self, eventsfile, discard=[]):

        if not tableExists(self.cur, "Families"):
            self.makeFamiliesTable()

        util.tic("add families")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")
        familyGeneNames = self.makeFamilyGeneNames()
        discard = set(discard)

        for row in events_tab:
            famid = row["partid"]
            if famid in discard:
                util.logger("discarding '%s'" % famid)
                continue

            tree = treelib.read_tree(self.getTreeFile(famid))
            treelen = sum(x.dist for x in tree)
            seqs = fasta.read_fasta(self.getFastaFile(famid))
            seqlen = stats.median(map(len, seqs.values()))

            self.cur.execute(
                """INSERT INTO Families VALUES
                ("%s", "%s", %f, %f, %f, %d, %d, %d,
                "%s");""" %
                (row["partid"],
                 familyGeneNames.get(row["partid"], ("", ""))[0],
                 row["famrate"], treelen, seqlen * 3,
                 row["dup"], row["loss"], row["genes"],
                 familyGeneNames.get(row["partid"], ("", ""))[1]))
        util.toc()
Exemplo n.º 9
0
    def recon_helper(self, nsearch=1000):
        """Perform reconciliation"""

        self.maxp = -util.INF
        self.maxrecon = None
        proposal = self.proposer.init_proposal()
        init_proposal = proposal.copy()

        for i in xrange(nsearch):
            if i % 10 == 0:
                print "search", i

            # evaluate the probability of proposal
            util.tic("eval")
            p = self.eval_proposal(proposal)
            util.toc()

            # evaluate the search, then keep or discard the proposal
            util.tic("prop")
            self.eval_search(p, proposal)
            proposal = self.proposer.next_proposal()  # set the next proposal
            util.toc()

        # all proposals bad, use initial proposal
        if not self.maxrecon:
            self.maxrecon = init_proposal

        # rename locus tree nodes
        dlcoal.rename_nodes(self.maxrecon.locus_tree,
                            self.name_internal)  # how about coal_tree names?

        return self.maxrecon
Exemplo n.º 10
0
    def addGoTerms(self, gofile):

        if not tableExists(self.cur, "GoTerms"):
            self.makeGoTermsTable()

        util.tic("add go terms")
        goterms = tablelib.read_table(gofile)
        goterms_lookup = goterms.groupby("orf")
        goterms_bygoid = goterms.groupby("goid")

        for goterm in goterms_bygoid:
            term = goterms_bygoid[goterm][0]

            if '"' in term["term"]:
                print term

            self.cur.execute("""INSERT INTO GoTerms VALUES ("%s", "%s")""" %
                             (term["goid"], term["term"]))

        for gene, terms in goterms_lookup.iteritems():
            for term in terms:
                self.cur.execute(
                    """INSERT INTO GeneGoTerms VALUES ("%s", "%s");""" %
                    (gene, term["goid"]))
        util.toc()
Exemplo n.º 11
0
def boot_proml(seqs, iters = 100, seed = 1, jumble=5, output=None, 
               verbose=True, force = False):
    validate_seqs(seqs)
    cwd = create_temp_dir()
    util.tic("bootProml on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)
    
    exec_phylip("seqboot", "y\n%d" % seed, verbose)
    
    os.rename("outfile", "infile")
    exec_phylip("proml", "m\nD\n%d\n%d\n%d\ny" % (iters, seed, jumble), verbose)
    
    util.toc()        
    
    # read tree samples
    if output != None:
        os.rename("outtree", "../" + output)
        cleanup_temp_dir(cwd)
        return labels
    else:
        trees = []
        infile = file("outtree")
        for i in xrange(iters):
            tree = treelib.Tree()
            tree.read_newick(infile)
            rename_tree_with_names(tree, labels)
            trees.append(tree)
        infile.close()
        cleanup_temp_dir(cwd)
        return trees
Exemplo n.º 12
0
def trainTree(conf, stree, gene2species):
    args = conf["REST"]
    treefiles = []
    
    for arg in args:
        treefiles.extend(util.shellparser(arg))

    util.tic("reading trees")
    trees = []
    prog = progress.ProgressBar(len(treefiles))
    for treefile in treefiles:
        prog.update()
        trees.append(treelib.read_tree(treefile))
        
        # even out top two branches
        totlen = trees[-1].root.children[0].dist + \
                 trees[-1].root.children[1].dist
        trees[-1].root.children[0].dist = totlen / 2.0
        trees[-1].root.children[1].dist = totlen / 2.0
        
    util.toc()
    
    params = Spidir.learnModel(trees, stree, gene2species, conf["trainstats"],
                               filenames=treefiles)
    
    Spidir.writeParams(conf["param"], params)
Exemplo n.º 13
0
    def _test_ml_speed(self):

        # params
        bgfreq = [.258, .267, .266, .209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/flies.nt/0/0.tree")
        align = fasta.readFasta("test/data/flies.nt/0/0.align")

        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        util.tic("find ML")
        for i in xrange(10):
            l = spidir.find_ml_branch_lengths_hky(tree,
                                                  util.mget(
                                                      align, tree.leafNames()),
                                                  bgfreq,
                                                  kappa,
                                                  maxiter=10)
        util.toc()

        dists.append([n.dist for n in nodes])
        likes.append(l)
Exemplo n.º 14
0
    def _test_ml_speed(self):
        
        # params
        bgfreq = [.258,.267,.266,.209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/flies.nt/0/0.tree")
        align = fasta.readFasta("test/data/flies.nt/0/0.align")


        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        util.tic("find ML")
        for i in xrange(10):
            l = spidir.find_ml_branch_lengths_hky(
                tree,
                util.mget(align, tree.leafNames()),
                bgfreq, kappa,
                maxiter=10)            
        util.toc()

        dists.append([n.dist for n in nodes])
        likes.append(l)
Exemplo n.º 15
0
def dnadist(seqs, output=None, verbose=True, force = False, args=None):
    if args == None:
        args = "y"
    
    validate_seqs(seqs)
    cwd = create_temp_dir()
    util.tic("dnadist on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)
    
    # run phylip
    exec_phylip("dnadist", args, verbose)
    
    util.toc()    
    
    # parse output
    if output != None:
        os.rename("outfile", "../" + output)
        cleanup_temp_dir(cwd)
        return labels
    else:
        name, mat = read_dist_matrix("outfile")    
        cleanup_temp_dir(cwd)
        return labels, mat
Exemplo n.º 16
0
    def draw_placed(self):
        vis = []

        util.tic("create draw code")

        # draw frags
        for frag in self.frags:
            vis.append(self.frag_widget(frag))

        # draw genes
        for reg, l in self.region_layout.iteritems():
            vis.append(
                translate(l.x, l.y, self.gene_widget(self.db.get_region(reg))))

        # draw matches
        drawn = set()
        for frag in self.frags:
            vis.append(
                self.draw_matches(frag.genome, frag.chrom, frag.start,
                                  frag.end, drawn))

        util.toc()

        self.groupid = group(*vis)
        return self.groupid
Exemplo n.º 17
0
def calc_joint_prob(arg, seqs, ntimes=20, mu=2.5e-8, rho=1.5e-8, popsizes=1e4,
                    times=None, verbose=False, delete_arg=True):
    """
    Calculate arg_joint_prob
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("calc likelihood")

    trees, names = arg2ctrees(arg, times)
    seqs, nseqs, seqlen = seqs2cseqs(seqs, names)

    p = argweaver_joint_prob(
        trees, times, len(times), popsizes, mu, rho, seqs, nseqs, seqlen)
    if delete_arg:
        delete_local_trees(trees)

    if verbose:
        util.toc()

    return p
    def walk(node):
        for child in node.children:
            walk(child)

        if not node.is_leaf():
            blastfiles = []
            leaves1 = node.children[0].leaf_names()
            leaves2 = node.children[1].leaf_names()

            # determine sibling blast files
            for leaf1 in leaves1:
                for leaf2 in leaves2:
                    if leaf1 in blastFileLookup and \
                       leaf2 in blastFileLookup[leaf1]:
                        blastfiles.append(blastFileLookup[leaf1][leaf2])

            # determine outgroup blast files (all other files, potentially)
            # go up one level, blastfiles for leaves, and subtract
            # sibling files
            outblastfiles = []
            if node.parent:
                inleaves = leaves1 + leaves2
                outleaves = set(node.parent.leaf_names()) - set(inleaves)

                for leaf1 in inleaves:
                    for leaf2 in outleaves:
                        if leaf1 in blastFileLookup and \
                           leaf2 in blastFileLookup[leaf1]:
                            outblastfiles.append(blastFileLookup[leaf1][leaf2])

            util.tic("merging")
            util.logger("leaves1: ", leaves1)
            util.logger("leaves2: ", leaves2)

            if "merge" in conf and \
               conf["merge"] == "avg":
                node.parts = mergeAvg(conf,
                                      genes,
                                      node.children[0].parts,
                                      node.children[1].parts,
                                      blastfiles,
                                      outblastfiles)
            else:
                node.parts = mergeBuh(conf,
                                      genes,
                                      node.children[0].parts,
                                      node.children[1].parts,
                                      blastfiles)

            if "output" in conf and len(node.parts) > 0:
                util.write_delim(conf["output"] +
                                 str(node.name) +
                                 ".part", node.parts)

            util.logger("number of parts: ", len(node.parts))
            if len(node.parts) > 0:
                util.logger("largest part:", max(map(len, node.parts)))

            util.toc()
Exemplo n.º 19
0
    def _test_ml(self):
        """Test ML code"""

        # params
        bgfreq = [.258, .267, .266, .209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/flies.nt/0/0.tree")
        align = fasta.readFasta("test/data/flies.nt/0/0.align")

        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        util.tic("find ML")
        for i in range(40):
            l = spidir.find_ml_branch_lengths_hky(tree,
                                                  util.mget(
                                                      align, tree.leafNames()),
                                                  bgfreq,
                                                  kappa,
                                                  parsinit=False,
                                                  maxiter=1)

            dists.append([n.dist for n in nodes])
            likes.append(l)
        util.toc()

        print likes

        prep_dir("test/output/ml/")

        # distances plot
        util.rplot_start("test/output/ml/ml_branches.pdf")
        util.rplot("plot",
                   util.cget(dists, 0),
                   ylim=[0, max(dists[0])],
                   t="l",
                   main="branch length convergence",
                   xlab="iterations",
                   ylab="branch lengths (sub/site)")
        for d in zip(*dists):
            util.rplot("lines", d)
        util.rplot_end(True)

        print util.cget(dists, 4)

        # likelihood plot
        util.rplot_start("test/output/ml/ml_likelihood.pdf")
        util.rplot("plot",
                   likes,
                   t="l",
                   xlab="iterations",
                   ylab="log likelihood",
                   main="likelihood convergence")
        util.rplot_end(True)
Exemplo n.º 20
0
def draw_raxml_tree(tr, adef):
    util.tic("Tree to string...")
    treestr = raxml.tree_to_string(tr, adef)
    util.toc()

    util.tic("Drawing tree...")
    T = treelib.parse_newick(treestr)
    T2 = treelib.unroot(T)
    treelib.draw_tree(T2, out=sys.stdout, minlen=5, maxlen=5)
    util.toc()
Exemplo n.º 21
0
def draw_raxml_tree(tr, adef):
    util.tic("Tree to string...")
    treestr = raxml.tree_to_string(tr, adef)
    util.toc()

    util.tic("Drawing tree...")
    T = treelib.parse_newick(treestr)
    T2 = treelib.unroot(T)
    treelib.draw_tree(T2, out=sys.stdout, minlen=5, maxlen=5)
    util.toc()
Exemplo n.º 22
0
def pamp(seqs, tree, seqtype="dna", saveOutput="", verbose=False, safe=True):

    if safe and seqtype == "dna":
        seqs = alignlib.mapalign(seqs, valfunc=removeStopCodons)

    phylip.validate_seqs(seqs)
    cwd = phylip.create_temp_dir()

    util.tic("pamp on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    nex = nexus.Nexus("align", "w")
    nex.write_matrix(seqs.keys(), seqs.values(), seqtype, seqs.alignlen())
    nex.close()
    treefile = open("tree", "w")
    treefile.write("%d 1\n" % len(tree.leaves()))
    tree.write(treefile, writeData=lambda x: "")
    treefile.close()

    # create control file
    out = file("pamp.ctl", "w")
    print >> out, "seqfile = align"
    print >> out, "treefile = tree"
    print >> out, "outfile = out"

    if seqtype == "dna":
        print >> out, "seqtype = 0"
    elif seqtype == "pep":
        print >> out, "seqtype = 2"
    else:
        raise Exception("unknown seqtype '%s'" % seqtype)
    print >> out, "ncatG = 8"
    print >> out, "nhomo = 0"
    out.close()

    # run pamp
    if verbose:
        os.system("pamp paml.ctl")
    else:
        os.system("pamp paml.ctl > /dev/null")

    res = PamlResults("out")
    aln = res.getPampReconstruction()
    aln.write("recon.mfa")
    tree2 = res.getBranchNames()
    renameTreeAlign(tree2, aln)

    if saveOutput != "":
        phylip.save_temp_dir(cwd, saveOutput)
    else:
        phylip.cleanup_temp_dir(cwd)

    util.toc()

    return tree2, aln
def pamp(seqs, tree, seqtype="dna", saveOutput="", verbose=False, safe=True):
    
    if safe and seqtype == "dna":
        seqs = alignlib.mapalign(seqs, valfunc=removeStopCodons)
    
    phylip.validate_seqs(seqs)
    cwd = phylip.create_temp_dir()

    util.tic("pamp on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    nex = nexus.Nexus("align", "w")
    nex.write_matrix(seqs.keys(), seqs.values(), seqtype, seqs.alignlen())
    nex.close()
    treefile = open("tree", "w")
    treefile.write("%d 1\n" % len(tree.leaves()))
    tree.write(treefile, writeData=lambda x: "")
    treefile.close()
    
    # create control file
    out = file("pamp.ctl", "w")
    print >>out, "seqfile = align"
    print >>out, "treefile = tree"
    print >>out, "outfile = out"    

    if seqtype == "dna":
        print >>out, "seqtype = 0"
    elif seqtype == "pep":
        print >>out, "seqtype = 2"
    else:
        raise Exception("unknown seqtype '%s'" % seqtype)
    print >>out, "ncatG = 8"
    print >>out, "nhomo = 0"
    out.close()
    
    # run pamp
    if verbose:
        os.system("pamp paml.ctl")
    else:
        os.system("pamp paml.ctl > /dev/null")

    res = PamlResults("out")
    aln = res.getPampReconstruction()
    aln.write("recon.mfa")
    tree2 = res.getBranchNames()
    renameTreeAlign(tree2, aln)
    
    if saveOutput != "":
        phylip.save_temp_dir(cwd, saveOutput)
    else:
        phylip.cleanup_temp_dir(cwd)
    
    util.toc()

    return tree2, aln
Exemplo n.º 24
0
    def _test_ml(self):
        """Test ML code"""

        # params
        bgfreq = [.258,.267,.266,.209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/flies.nt/0/0.tree")
        align = fasta.readFasta("test/data/flies.nt/0/0.align")


        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        util.tic("find ML")
        for i in range(40):
            l = spidir.find_ml_branch_lengths_hky(
                    tree,
                    util.mget(align, tree.leafNames()),
                    bgfreq, kappa,
                    parsinit=False,
                    maxiter=1)
            
            dists.append([n.dist for n in nodes])
            likes.append(l)
        util.toc()

        print likes

        prep_dir("test/output/ml/")

        # distances plot
        util.rplot_start("test/output/ml/ml_branches.pdf")
        util.rplot("plot", util.cget(dists, 0),
                   ylim=[0, max(dists[0])], t="l",
                   main="branch length convergence",
                   xlab="iterations",
                   ylab="branch lengths (sub/site)")
        for d in zip(* dists):
            util.rplot("lines", d)
        util.rplot_end(True)

        print util.cget(dists, 4)

        # likelihood plot
        util.rplot_start("test/output/ml/ml_likelihood.pdf")
        util.rplot("plot", likes, t="l",
                   xlab="iterations",
                   ylab="log likelihood",
                   main="likelihood convergence")
        util.rplot_end(True)
Exemplo n.º 25
0
def resample_arg_region(arg, seqs, region_start, region_end,
                        ntimes=20, rho=1.5e-8, mu=2.5e-8,
                        popsizes=1e4, times=None, carg=False,
                        refine=1, verbose=False):
    """
    Sample ARG for sequences
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("resample arg")

    # convert arg to c++
    if verbose:
        util.tic("convert arg")
    trees, names = arg2ctrees(arg, times)
    if verbose:
        util.toc()

    # get sequences in same order
    # and add all other sequences not in arg yet
    leaves = set(names)
    for name, seq in seqs.items():
        if name not in leaves:
            names.append(name)
    seqs2, nseqs, seqlen = seqs2cseqs(seqs, names)

    # resample arg
    seqlen = len(seqs[names[0]])

    trees = argweaver_resample_arg_region(
        trees, times, len(times),
        popsizes, rho, mu, seqs2, nseqs, seqlen,
        region_start, region_end, refine)

    #trees = argweaver_resample_arg_region(
    #    trees, times, len(times),
    #    popsizes, rho, mu, seqs2, nseqs, seqlen,
    #    region_start, region_end)

    # convert arg back to python
    if carg:
        arg = (trees, names)
    else:
        arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Exemplo n.º 26
0
def sample_all_arg(seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4,
                   refine=1, times=None, verbose=False, carg=False,
                   prob_path_switch=.1):
    """
    Sample ARG for sequences
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("resample arg")

    # convert arg to c++
    if verbose:
        util.tic("convert arg")

    arg = argweaver.make_trunk_arg(
        0, len(seqs.values()[0]), name=seqs.keys()[0])
    trees, names = arg2ctrees(arg, times)
    if verbose:
        util.toc()

    # get sequences in same order
    # and add all other sequences not in arg yet
    seqs2 = [seqs[name] for name in names]
    leaves = set(names)
    for name, seq in seqs.items():
        if name not in leaves:
            names.append(name)
            seqs2.append(seq)

    # resample arg
    seqlen = len(seqs[names[0]])
    trees = argweaver_resample_all_arg(
        trees, times, len(times),
        popsizes, rho, mu,
        (C.c_char_p * len(seqs2))(*seqs2), len(seqs2),
        seqlen, refine, prob_path_switch)

    if carg:
        arg = (trees, names)
    else:
        # convert arg back to python
        arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Exemplo n.º 27
0
    def walk(node):
        for child in node.children:
            walk(child)

        if not node.is_leaf():
            blastfiles = []
            leaves1 = node.children[0].leaf_names()
            leaves2 = node.children[1].leaf_names()

            # determine sibling blast files
            for leaf1 in leaves1:
                for leaf2 in leaves2:
                    if leaf1 in blastFileLookup and \
                       leaf2 in blastFileLookup[leaf1]:
                        blastfiles.append(blastFileLookup[leaf1][leaf2])

            # determine outgroup blast files (all other files, potentially)
            # go up one level, blastfiles for leaves, and subtract
            # sibling files
            outblastfiles = []
            if node.parent:
                inleaves = leaves1 + leaves2
                outleaves = set(node.parent.leaf_names()) - set(inleaves)

                for leaf1 in inleaves:
                    for leaf2 in outleaves:
                        if leaf1 in blastFileLookup and \
                           leaf2 in blastFileLookup[leaf1]:
                            outblastfiles.append(blastFileLookup[leaf1][leaf2])

            util.tic("merging")
            util.logger("leaves1: ", leaves1)
            util.logger("leaves2: ", leaves2)

            if "merge" in conf and \
               conf["merge"] == "avg":
                node.parts = mergeAvg(conf, genes, node.children[0].parts,
                                      node.children[1].parts, blastfiles,
                                      outblastfiles)
            else:
                node.parts = mergeBuh(conf, genes, node.children[0].parts,
                                      node.children[1].parts, blastfiles)

            if "output" in conf and len(node.parts) > 0:
                util.write_delim(conf["output"] + str(node.name) + ".part",
                                 node.parts)

            util.logger("number of parts: ", len(node.parts))
            if len(node.parts) > 0:
                util.logger("largest part:", max(map(len, node.parts)))

            util.toc()
Exemplo n.º 28
0
def ctrees2arg(trees, names, times, verbose=False, delete_arg=True):
    """
    Convert a C data structure for the ARG into a python ARG
    """

    if verbose:
        util.tic("convert arg")

    # get local trees info
    nnodes = get_local_trees_nnodes(trees)
    ntrees = get_local_trees_ntrees(trees)

    # allocate data structures for treeset
    ptrees = []
    ages = []
    sprs = []
    blocklens = [0] * ntrees
    for i in range(ntrees):
        ptrees.append([0] * nnodes)
        ages.append([0] * nnodes)
        sprs.append([0, 0, 0, 0])

    # populate data structures
    get_local_trees_ptrees(trees, ptrees, ages, sprs, blocklens)

    # fully convert to python
    for i in range(ntrees):
        ptrees[i] = ptrees[i][:nnodes]
        ages[i] = ages[i][:nnodes]
        sprs[i] = sprs[i][:4]

    # convert treeset to arg data structure
    blocks = []
    start = 0
    for blocklen in blocklens:
        end = start + blocklen
        blocks.append((start, end))
        start = end

    assert len(names) == ((nnodes + 1) / 2)

    arg = treeset2arg(ptrees, ages, sprs, blocks, names, times)

    if delete_arg:
        delete_local_trees(trees)

    if verbose:
        util.toc()

    return arg
Exemplo n.º 29
0
    def addGenes(self, species, gff_files, region_filter=lambda x: x):
        """populate genes table"""

        # clear Genes Table
        if not tableExists(self.cur, "Genes"):
            self.makeGenesTable()

        dups = set()

        util.tic("add genes")
        for sp, gff_file in zip(species, gff_files):
            for region in gff.read_gff(gff_file, regionFilter=region_filter):
                gene = region.data["ID"]
                #gene = row["name"]

                if gene in self.fams.genelookup:
                    famid = self.fams.getFamid(gene)
                    if len(self.fams.getGenes(famid)) < 2:
                        famid = "NONE"
                else:
                    famid = "NONE"

                if gene in dups:
                    continue
                dups.add(gene)

                assert region.start <= region.end

                if gene in self.gene2name:
                    common = self.gene2name[gene]["name"]
                    desc = self.gene2name[gene]["description"]
                else:
                    common = ""
                    desc = ""

                cmd = """INSERT INTO Genes VALUES 
                               ("%s", "%s", "%s", "%s", %d, %d, %d, "%s", "%s");""" % \
                            (gene,
                             common,
                             self.gene2species(gene),
                             region.seqname,
                             region.start,
                             region.end,
                             region.strand,
                             desc.replace('"', ''),
                             famid)

                self.cur.execute(cmd)
        util.toc()
    def addGenes(self, species, gff_files, region_filter=lambda x: x):
        """populate genes table"""

        # clear Genes Table
        if not tableExists(self.cur, "Genes"):
            self.makeGenesTable()

        dups = set()

        util.tic("add genes")
        for sp, gff_file in zip(species, gff_files):
            for region in gff.read_gff(gff_file, regionFilter=region_filter):
                gene = region.data["ID"]
                #gene = row["name"]

                if gene in self.fams.genelookup:
                    famid = self.fams.getFamid(gene)
                    if len(self.fams.getGenes(famid)) < 2:
                        famid = "NONE"
                else:
                    famid = "NONE"

                if gene in dups:
                    continue
                dups.add(gene)

                assert region.start <= region.end

                if gene in self.gene2name:
                    common = self.gene2name[gene]["name"]
                    desc = self.gene2name[gene]["description"]
                else:
                    common = ""
                    desc = ""

                cmd = ("""INSERT INTO Genes VALUES
                         ("%s", "%s", "%s", "%s", %d, %d, %d, "%s", "%s");""" %
                       (gene,
                        common,
                        self.gene2species(gene),
                        region.seqname,
                        region.start,
                        region.end,
                        region.strand,
                        desc.replace('"', ''),
                        famid))

                self.cur.execute(cmd)
        util.toc()
Exemplo n.º 31
0
def resample_mcmc_arg(arg, seqs, ntimes=20,
                      rho=1.5e-8, mu=2.5e-8, popsizes=1e4,
                      refine=1, times=None, verbose=False, carg=False,
                      window=200000, niters2=5):
    """
    Sample ARG for sequences
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("resample arg")

    # convert arg to c++
    if verbose:
        util.tic("convert arg")
    trees, names = arg2ctrees(arg, times)
    if verbose:
        util.toc()

    # get sequences in same order
    # and add all other sequences not in arg yet
    leaves = set(names)
    names = list(names)
    for name in seqs:
        if name not in leaves:
            names.append(name)
    seqs2, nseqs, seqlen = seqs2cseqs(seqs, names)

    # resample arg
    trees = argweaver_resample_mcmc_arg(
        trees, times, len(times),
        popsizes, rho, mu,
        seqs2, nseqs, seqlen, refine, niters2, window)

    if carg:
        arg = (trees, names)
    else:
        # convert arg back to python
        arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Exemplo n.º 32
0
 def processFunc():
     # remove old query tempfile if one exists
     if closure["oldtmp"] != None:
         os.remove(closure["oldtmp"])
         elapse = util.toc()
         closure["time"] += elapse
         
         util.log("blasted %d of %d sequences (%.1f%%), elapse %.0f m, left %.0f m" % (
             closure["index"], len(seqs.keys()), 
             100 * float(closure["index"]) / len(seqs.keys()),
             closure["time"] / 60.0, 
             elapse / split * (len(seqs.keys()) - closure["index"]) / 60.0))
         
     util.tic()
     
     # find new subset of query sequences
     i = closure["index"]
     names = seqs.keys()[i:i+split]
     
     # if no more sequences then quit
     if len(names) == 0:
         return False
     
     # start blast
     tmpfile = util.tempfile(".", "blastp", ".fasta")
     seqs.write(tmpfile, names = names)
     pipe = os.popen("blastall -p %s -d %s -i %s -m 8 -e .1 %s" % \
         (prog, databaseFile, tmpfile, options))
     
     # update variables
     closure["oldtmp"] = tmpfile
     closure["index"] = i + split
     
     return pipe
Exemplo n.º 33
0
 def processFunc():
     # remove old query tempfile if one exists
     if closure["oldtmp"] != None:
         os.remove(closure["oldtmp"])
         elapse = util.toc()
         closure["time"] += elapse
         
         util.log("blasted %d of %d sequences (%.1f%%), elapse %.0f m, left %.0f m" % (
             closure["index"], len(seqs.keys()), 
             100 * float(closure["index"]) / len(seqs.keys()),
             closure["time"] / 60.0, 
             elapse / split * (len(seqs.keys()) - closure["index"]) / 60.0))
         
     util.tic()
     
     # find new subset of query sequences
     i = closure["index"]
     names = seqs.keys()[i:i+split]
     
     # if no more sequences then quit
     if len(names) == 0:
         return False
     
     # start blast
     tmpfile = util.tempfile(".", "blastp", ".fasta")
     seqs.write(tmpfile, names = names)
     pipe = os.popen("blastall -p %s -d %s -i %s -m 8 -e .1 %s" % \
         (prog, databaseFile, tmpfile, options))
     
     # update variables
     closure["oldtmp"] = tmpfile
     closure["index"] = i + split
     
     return pipe
Exemplo n.º 34
0
def dndsMatrix(seqs, saveOutput="", verbose=False, safe=True):

    if safe:
        seqs = alignlib.mapalign(seqs, valfunc=removeStopCodons)

    phylip.validate_seqs(seqs)
    cwd = phylip.create_temp_dir()

    util.tic("yn00 on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    labels = phylip.write_phylip_align(file("seqfile.phylip", "w"), seqs)
    util.write_list(file("labels", "w"), labels)

    # create control file
    out = file("yn00.ctl", "w")
    print >> out, "seqfile = seqfile.phylip"
    print >> out, "outfile = outfile"
    out.close()

    # run yn00
    if verbose:
        os.system("yn00 yn00.ctl")
    else:
        os.system("yn00 yn00.ctl > /dev/null")

    try:
        dnmat = phylip.read_dist_matrix("2YN.dN")
        dsmat = phylip.read_dist_matrix("2YN.dS")
    except:
        # could not make distance matrix
        if safe:
            # make dummy matrix
            dnmat = labels, [[0] * len(labels)] * len(labels)
            dsmat = labels, [[0] * len(labels)] * len(labels)
        else:
            raise Exception("could not read dn or ds matrices")

    if saveOutput != "":
        phylip.save_temp_dir(cwd, saveOutput)
    else:
        phylip.cleanup_temp_dir(cwd)

    util.toc()

    return dnmat, dsmat
def dndsMatrix(seqs, saveOutput="", verbose=False, safe=True):
    
    if safe:
        seqs = alignlib.mapalign(seqs, valfunc=removeStopCodons)
    
    phylip.validate_seqs(seqs)
    cwd = phylip.create_temp_dir()

    util.tic("yn00 on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    labels = phylip.write_phylip_align(file("seqfile.phylip", "w"), seqs)
    util.write_list(file("labels", "w"), labels)    
    
    # create control file
    out = file("yn00.ctl", "w")
    print >>out, "seqfile = seqfile.phylip"
    print >>out, "outfile = outfile"
    out.close()
    
    # run yn00
    if verbose:
        os.system("yn00 yn00.ctl")
    else:
        os.system("yn00 yn00.ctl > /dev/null")
    
    try:
        dnmat = phylip.read_dist_matrix("2YN.dN")
        dsmat = phylip.read_dist_matrix("2YN.dS")
    except:
        # could not make distance matrix
        if safe:
            # make dummy matrix
            dnmat = labels, [[0] * len(labels)] * len(labels)
            dsmat = labels, [[0] * len(labels)] * len(labels)
        else:
            raise Exception("could not read dn or ds matrices")
    
    if saveOutput != "":
        phylip.save_temp_dir(cwd, saveOutput)
    else:
        phylip.cleanup_temp_dir(cwd)
    
    util.toc()
    
    return dnmat, dsmat
Exemplo n.º 36
0
    def addPfamGenes(self, pfamfile):
        """add pfam domains"""

        if not tableExists(self.cur, "PfamDomains"):
            self.makePfamTable()

        util.tic("add pfam domains")

        pfams = tablelib.read_table(pfamfile)

        for row in pfams:
            name = re.sub("\..*$", "", row["pfam_acc"])

            self.cur.execute("""INSERT INTO GenePfamDomains VALUES
                                ("%s", "%s", %d, %d, %f, %f);""" %
                             (row["locus"], name, row["start"], row["end"],
                              row["score"], row["evalue"]))
        util.toc()
    def addPfamGenes(self, pfamfile):
        """add pfam domains"""

        if not tableExists(self.cur, "PfamDomains"):
            self.makePfamTable()

        util.tic("add pfam domains")

        pfams = tablelib.read_table(pfamfile)

        for row in pfams:
            name = re.sub("\..*$", "", row["pfam_acc"])

            self.cur.execute("""INSERT INTO GenePfamDomains VALUES
                                ("%s", "%s", %d, %d, %f, %f);""" %
                             (row["locus"], name, row["start"],
                              row["end"], row["score"], row["evalue"]))
        util.toc()
Exemplo n.º 38
0
def boot_neighbor(seqs,
                  iters=100,
                  seed=None,
                  output=None,
                  verbose=True,
                  force=False):

    if seed == None:
        seed = random.randInt(0, 1000) * 2 + 1

    validate_seqs(seqs)
    cwd = create_temp_dir()
    util.tic("boot_neighbor on %d of length %d" %
             (len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)

    exec_phylip("seqboot", "r\n%d\ny\n%d" % (iters, seed), verbose)

    os.rename("outfile", "infile")
    exec_phylip("protdist", "m\nd\n%d\ny" % iters, verbose)

    os.rename("outfile", "infile")
    exec_phylip("neighbor", "m\n%d\n%d\ny" % (iters, seed), verbose)

    util.toc()

    # read tree samples
    if output != None:
        os.rename("outtree", "../" + output)
        cleanup_temp_dir(cwd)
        return labels
    else:
        trees = []
        infile = file("outtree")
        for i in xrange(iters):
            tree = treelib.Tree()
            tree.read_newick(infile)
            rename_tree_with_name(tree, labels)
            trees.append(tree)
        infile.close()
        cleanup_temp_dir(cwd)
        return trees
Exemplo n.º 39
0
def argweaver_forward_algorithm(arg, seqs, rho=1.5e-8,
                                mu=2.5e-8, popsizes=1e4, times=None,
                                ntimes=20, maxtime=180000,
                                verbose=False,
                                prior=[], internal=False, slow=False):
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=maxtime, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    probs = []

    if verbose:
        util.tic("forward")

    if is_carg(arg):
        trees, names = arg
    else:
        trees, names = arg2ctrees(arg, times)

    seqs2 = [seqs[node] for node in names]
    for name in seqs.keys():
        if name not in names:
            seqs2.append(seqs[name])
    seqlen = len(seqs2[0])

    fw = argweaver_forward_alg(trees, times, len(times),
                               popsizes, rho, mu,
                               (C.c_char_p * len(seqs2))(*seqs2), len(seqs2),
                               seqlen, len(prior) > 0, prior, internal,
                               slow)

    nstates = [0] * seqlen
    argweaver_get_nstates(trees, len(times), internal, nstates)

    probs = [row[:n] for row, n in zip(fw, nstates)]

    delete_forward_matrix(fw, seqlen)

    if verbose:
        util.toc()

    return probs
Exemplo n.º 40
0
def est_popsizes_trees(arg, times, step, verbose=False):

    if verbose:
        util.tic("convert arg")
    trees, names = arg2ctrees(arg, times)

    if verbose:
        util.toc()
        util.tic("estimate popsizes")

    popsizes = [0.0] * (len(times) - 1)
    argweaver_est_popsizes_trees(trees, times, len(times), step, popsizes)

    if verbose:
        util.toc()

    if not is_carg(arg):
        delete_local_trees(trees)

    return popsizes
Exemplo n.º 41
0
def boot_proml(seqs,
               iters=100,
               seed=1,
               jumble=5,
               output=None,
               verbose=True,
               force=False):
    validate_seqs(seqs)
    cwd = create_temp_dir()
    util.tic("bootProml on %d of length %d" %
             (len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)

    exec_phylip("seqboot", "y\n%d" % seed, verbose)

    os.rename("outfile", "infile")
    exec_phylip("proml", "m\nD\n%d\n%d\n%d\ny" % (iters, seed, jumble),
                verbose)

    util.toc()

    # read tree samples
    if output != None:
        os.rename("outtree", "../" + output)
        cleanup_temp_dir(cwd)
        return labels
    else:
        trees = []
        infile = file("outtree")
        for i in xrange(iters):
            tree = treelib.Tree()
            tree.read_newick(infile)
            rename_tree_with_names(tree, labels)
            trees.append(tree)
        infile.close()
        cleanup_temp_dir(cwd)
        return trees
Exemplo n.º 42
0
def boot_neighbor(seqs, iters=100, seed=None, output=None, 
                 verbose=True, force=False):
    
    if seed == None:
        seed = random.randInt(0, 1000) * 2 + 1
    
    validate_seqs(seqs)
    cwd = create_temp_dir()
    util.tic("boot_neighbor on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)
    
    exec_phylip("seqboot", "r\n%d\ny\n%d" % (iters, seed), verbose)
    
    os.rename("outfile", "infile")
    exec_phylip("protdist", "m\nd\n%d\ny" % iters, verbose)
    
    os.rename("outfile", "infile")
    exec_phylip("neighbor", "m\n%d\n%d\ny" % (iters, seed), verbose)

    util.toc()        
    
    # read tree samples
    if output != None:
        os.rename("outtree", "../" + output)
        cleanup_temp_dir(cwd)
        return labels
    else:
        trees = []
        infile = file("outtree")
        for i in xrange(iters):
            tree = treelib.Tree()
            tree.read_newick(infile)
            rename_tree_with_name(tree, labels)
            trees.append(tree)
        infile.close()
        cleanup_temp_dir(cwd)
        return trees
Exemplo n.º 43
0
def test_forward():

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(100e3 / 20)
    times = argweaver.get_time_points(ntimes=100)

    arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length)
    muts = arglib.sample_arg_mutations(arg, mu)
    seqs = arglib.make_alignment(arg, muts)

    print "muts", len(muts)
    print "recomb", len(arglib.get_recomb_pos(arg))

    argweaver.discretize_arg(arg, times)

    # remove chrom
    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    carg = argweaverc.arg2ctrees(arg, times)

    util.tic("C fast")
    probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times)
    util.toc()

    util.tic("C slow")
    probs2 = argweaverc.argweaver_forward_algorithm(carg,
                                                    seqs,
                                                    times=times,
                                                    slow=True)
    util.toc()

    for i, (col1, col2) in enumerate(izip(probs1, probs2)):
        for a, b in izip(col1, col2):
            fequal(a, b, rel=.0001)
Exemplo n.º 44
0
def sample_arg(seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4,
               refine=0, nremove=1, times=None, verbose=False,
               carg=False):
    """
    Sample ARG for sequences
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("sample arg")

    names = []
    seqs2 = []
    for name, seq in seqs.items():
        names.append(name)
        seqs2.append(seq)

    # sample arg
    trees = argweaver_sample_arg_refine(
        times, len(times),
        popsizes, rho, mu,
        (C.c_char_p * len(seqs))(*seqs2), len(seqs), len(seqs2[0]), refine,
        nremove)

    if carg:
        arg = (trees, names)
    else:
        # convert to python
        arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Exemplo n.º 45
0
    def recon(self, nsearch=1000):
        """Perform reconciliation"""
        
        self.init_search()
        proposal = self.proposer.init_proposal()
        self.maxrecon = proposal.copy()
        for i in xrange(nsearch):
            if i % 10 == 0:
                print "search", i

            util.tic("eval")
            p = self.eval_proposal(proposal)
            util.toc()

            util.tic("prop")
            self.eval_search(p, proposal)
            proposal = self.proposer.next_proposal()
            util.toc()
        
        # rename locus tree nodes
        dlcoal.rename_nodes(self.maxrecon.locus_tree, self.name_internal)
        
        return self.maxrecon
Exemplo n.º 46
0
def proml_treelk(aln, tree, verbose=True, force=False, args="u\ny"):
    validate_seqs(aln)
    cwd = create_temp_dir()

    util.tic("proml on %d of length %d" % (len(aln), len(aln.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), aln)
    write_in_tree("intree", tree, labels)

    # run phylip
    exec_phylip("proml", args, verbose)

    # parse logl
    logl = read_logl("outfile")

    # parse tree
    tree = read_out_tree("outtree", labels)

    cleanup_temp_dir(cwd)
    util.toc()

    return logl, tree
Exemplo n.º 47
0
def proml_treelk(aln, tree, verbose=True, force = False, args="u\ny"):
    validate_seqs(aln)
    cwd = create_temp_dir()

    util.tic("proml on %d of length %d" % (len(aln), len(aln.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), aln)
    write_in_tree("intree", tree, labels)
    
    # run phylip
    exec_phylip("proml", args, verbose)
    
    # parse logl
    logl = read_logl("outfile")
    
    # parse tree
    tree = read_out_tree("outtree", labels)
    
    cleanup_temp_dir(cwd)
    util.toc()
    
    return logl, tree
Exemplo n.º 48
0
def resample_arg_regions(arg, seqs, niters, width=1000,
                         ntimes=20, rho=1.5e-8, mu=2.5e-8,
                         popsize=1e4, times=None, carg=False,
                         verbose=False):
    seqlen = len(seqs.values()[0])

    if is_carg(arg):
        trees, names = arg
        arg2 = ctrees2arg(trees, names, times, verbose=verbose,
                          delete_arg=False)
        recomb_pos = list(x.pos for x in arg2 if x.event == "recomb")
    else:
        recomb_pos = list(x.pos for x in arg if x.event == "recomb")

    for it in range(niters):
        maxr = 0
        for i, j, a, b in stats.iter_window_index(recomb_pos, width):
            r = j - i + 1
            if r > maxr:
                maxr = r
                region = [max(recomb_pos[i]-10, 10),
                          min(recomb_pos[j]+10, seqlen - 10)]

        if verbose:
            util.tic("sample ARG region %s" % region)
        print arg
        arg = argweaver.resample_arg_region(arg, seqs, region[0], region[1],
                                            rho=rho, mu=mu, times=times,
                                            carg=carg, verbose=True)
        if not carg:
            recomb_pos = list(x.pos for x in arg if x.event == "recomb")
            if verbose:
                util.logger("%d: # recombs %d" % (it, len(recomb_pos)))
        if verbose:
            util.toc()

    return arg
    def draw_placed(self):
        vis = []
        
        util.tic("create draw code")
        
        # draw frags
        for frag in self.frags:
            vis.append(self.frag_widget(frag))

        # draw genes
        for reg, l in self.region_layout.iteritems():
            vis.append(translate(l.x, l.y, 
                                 self.gene_widget(self.db.get_region(reg))))
        
        # draw matches
        drawn = set()
        for frag in self.frags:
            vis.append(self.draw_matches(frag.genome, frag.chrom,
                                         frag.start, frag.end, drawn))
        
        util.toc()

        self.groupid = group(*vis)
        return self.groupid
Exemplo n.º 50
0
def align2tree(prog,
               seqs,
               verbose=True,
               force=False,
               args=None,
               usertree=None,
               saveOutput="",
               bootiter=1,
               seed=1,
               jumble=1):
    validate_seqs(seqs)
    cwd = create_temp_dir()

    util.tic("%s on %d of length %d" %
             (prog, len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)
    util.write_list(file("labels", "w"), labels)

    # initialize default arguments
    if args == None:
        args = "y"

    # create user tree if given
    if usertree != None:
        write_in_tree("intree", usertree, labels)
        args = "u\n" + args  # add user tree option

    # bootstrap alignment if needed
    if bootiter > 1:
        exec_phylip("seqboot", "r\n%d\ny\n%d" % (bootiter, seed), verbose)
        os.rename("outfile", "infile")

        # add bootstrap arguments
        args = "m\nD\n%d\n%d\n%d\n%s" % (bootiter, seed, jumble, args)

    # run phylip
    exec_phylip(prog, args, verbose)

    # check for PHYLIP GIVE UP
    if is_phylip_give_up("outfile"):
        tree = treelib.Tree()
        tree.make_root()

        # make star tree
        for key in seqs:
            tree.add_child(tree.root, treelib.TreeNode(key))

    else:
        # parse tree
        if bootiter == 1:
            tree = read_out_tree("outtree", labels, bootiter)

            # parse likelihood
            if prog in ["dnaml", "proml"]:
                tree.data["logl"] = read_logl("outfile")

        else:
            trees = read_out_tree("outtree", labels, bootiter)

    if saveOutput != "":
        save_temp_dir(cwd, saveOutput)
    else:
        cleanup_temp_dir(cwd)

    util.toc()

    if bootiter == 1:
        return tree
    else:
        return trees
Exemplo n.º 51
0
 def wrapper(*args, **kwargs):
     util.tic(func.__name__)
     result = func(*args, **kwargs)
     util.toc()
     return result
Exemplo n.º 52
0
def mergeBuh(conf, genes, parts1, parts2, blastfiles):
    """Merge by Best Unidirectional Hits"""

    # don't use this code without double checking it
    assert False

    lookup1 = item2part(parts1)
    lookup2 = item2part(parts2)

    best = util.Dict(dim=1, default=(0, None))

    util.tic("read hits")
    for blastfile, order in blastfiles:
        util.tic("determine best hits '%s'" % os.path.basename(blastfile))
        for hit in blast.BlastReader(blastfile):
            if order:
                gene1 = blast.query(hit)
                gene2 = blast.subject(hit)
                alnlen1 = blast.queryLength(hit)
                alnlen2 = blast.subjectLength(hit)
            else:
                gene2 = blast.query(hit)
                gene1 = blast.subject(hit)
                alnlen2 = blast.queryLength(hit)
                alnlen1 = blast.subjectLength(hit)
            score = blast.bitscore(hit)

            len1 = genes[gene1]["length"]
            len2 = genes[gene2]["length"]
            coverage = max(alnlen1 / float(len1), alnlen2 / float(len2))

            # discard a hit that does not pass basic cutoffs
            if blast.bitscore(hit) / float(blast.alignLength(hit)) < \
                   conf["bitspersite"] or \
               coverage < conf["coverage"] or \
               blast.evalue(hit) > conf["signif"]:
                continue

            #if blast.evalue(hit) > conf["signif"]:
            #    continue

            if gene1 in lookup1:
                part1 = (0, lookup1[gene1])
            else:
                parts1.append([gene1])
                lookup1[gene1] = len(parts1) - 1
                part1 = (0, len(parts1) - 1)

            if gene2 in lookup2:
                part2 = (1, lookup2[gene2])
            else:
                parts2.append([gene2])
                lookup2[gene2] = len(parts2) - 1
                part2 = (1, len(parts2) - 1)

            if score > best[part1][0]:
                best[part1] = (score, part2)
            if score > best[part2][0]:
                best[part2] = (score, part1)
        util.toc()

    util.toc()

    util.tic("determine clusters")
    sets = {}
    for gene in best:
        sets[gene] = sets.UnionFind([gene])

    for blastfile, order in blastfiles:
        util.tic("read hits '%s'" % os.path.basename(blastfile))
        for hit in blast.BlastReader(blastfile):
            if order:
                gene1 = blast.query(hit)
                gene2 = blast.subject(hit)
                alnlen1 = blast.queryLength(hit)
                alnlen2 = blast.subjectLength(hit)
            else:
                gene2 = blast.query(hit)
                gene1 = blast.subject(hit)
                alnlen2 = blast.queryLength(hit)
                alnlen1 = blast.subjectLength(hit)
            score = blast.bitscore(hit)

            len1 = genes[gene1]["length"]
            len2 = genes[gene2]["length"]
            coverage = max(alnlen1 / float(len1), alnlen2 / float(len2))

            # discard a hit that does not pass basic cutoffs
            if blast.bitscore(hit) / float(blast.alignLength(hit)) < \
                   conf["bitspersite"] or \
               coverage < conf["coverage"] or \
               blast.evalue(hit) > conf["signif"]:
                continue

            #if blast.evalue(hit) > conf["signif"]:
            #    continue

            part1 = (0, lookup1[gene1])
            part2 = (1, lookup2[gene2])

            if score >= best[part1][0] * conf["relcutoff"]:
                sets[part1].union(sets[part2])
            if score >= best[part2][0] * conf["relcutoff"]:
                sets[part2].union(sets[part1])
        util.toc()

    sets = util.unique([x.root() for x in sets.values()])

    parts = []
    joining = (parts1, parts2)
    for set in sets:
        parts.append([])
        for i, row in set.members():
            parts[-1].extend(joining[i][row])
    util.toc()

    return parts
Exemplo n.º 53
0
def mergeAvg(conf, genes, parts1, parts2, blastfiles, outblastfiles):
    lookup1 = item2part(parts1)
    lookup2 = item2part(parts2)

    # value is [sum, total]
    hits = util.Dict(dim=2, default=[0, 0])

    if "accept" in conf:
        accept = conf["accept"]
    else:
        accept = False

    util.tic("read hits")
    for blastfile, order in blastfiles:
        util.tic("determine best hits '%s'" % os.path.basename(blastfile))
        for hit in blast.BlastReader(blastfile):
            if order:
                gene1 = blast.query(hit)
                gene2 = blast.subject(hit)
                alnlen1 = blast.queryLength(hit)
                alnlen2 = blast.subjectLength(hit)
            else:
                gene2 = blast.query(hit)
                gene1 = blast.subject(hit)
                alnlen2 = blast.queryLength(hit)
                alnlen1 = blast.subjectLength(hit)
            score = blast.bitscore(hit)

            len1 = genes[gene1]["length"]
            len2 = genes[gene2]["length"]
            coveragesmall = min(alnlen1 / float(len1), alnlen2 / float(len2))
            coveragebig = max(alnlen1 / float(len1), alnlen2 / float(len2))

            # discard a hit that does not pass basic cutoffs
            if blast.bitscore(hit) / float(blast.alignLength(hit)) < \
                   conf["bitspersite"] or \
               coveragesmall < conf["coveragesmall"] or \
               coveragebig < conf["coveragebig"] or \
               blast.evalue(hit) > conf["signif"]:
                continue


            if accept and \
               (gene1 not in accept or
                gene2 not in accept):
                continue

            # create a key for a partition: (side, index)
            if gene1 in lookup1:
                part1 = (0, lookup1[gene1])
            else:
                parts1.append([gene1])
                lookup1[gene1] = len(parts1) - 1
                part1 = (0, len(parts1) - 1)

            if gene2 in lookup2:
                part2 = (1, lookup2[gene2])
            else:
                parts2.append([gene2])
                lookup2[gene2] = len(parts2) - 1
                part2 = (1, len(parts2) - 1)

            val = hits[part1][part2]
            val[0] += score
            val[1] += 1
            hits[part2][part1] = val

        util.toc()
    util.toc()

    util.tic("read outgroup hits")
    outbest = util.Dict(default=[0, 0])
    for blastfile, order in outblastfiles:
        util.tic("determine best hits '%s'" % os.path.basename(blastfile))
        for hit in blast.BlastReader(blastfile):
            if order:
                genein = blast.query(hit)
                geneout = blast.subject(hit)
            else:
                geneout = blast.query(hit)
                genein = blast.subject(hit)
            score = blast.bitscore(hit)

            # create a key for a partition: (side, index)
            if genein in lookup1:
                partin = (0, lookup1[genein])
            elif gene1 in lookup2:
                partin = (1, lookup2[genein])
            else:
                continue

            val = outbest[partin]
            val[0] += score
            val[1] += 1

        util.toc()
    util.toc()

    assert len(parts1) == len(unionPart(parts1))
    assert len(parts2) == len(unionPart(parts2))

    util.tic("determine clusters")
    sets = {}
    for i in xrange(len(parts1)):
        sets[(0, i)] = sets.UnionFind([(0, i)])
    for i in xrange(len(parts2)):
        sets[(1, i)] = sets.UnionFind([(1, i)])

    # merge top avg hits
    for part1 in hits:
        o1 = outbest[part1]
        outavg1 = float(o1[0]) / max(o1[1], 1)

        top = 0
        toppart = None

        for part2, (tot, num) in hits[part1].iteritems():
            avg = float(tot) / num
            o2 = outbest[part2]
            outavg2 = float(o2[0]) / max(o2[1], 1)

            if avg > outavg1 and avg > outavg2 and avg > top:
                top = avg
                toppart = part2

        if toppart:
            sets[part1].union(sets[toppart])

    sets = util.unique([x.root() for x in sets.values()])

    # create partition of genes
    parts = []
    joining = (parts1, parts2)
    for set in sets:
        parts.append([])
        for i, row in set:
            parts[-1].extend(joining[i][row])
    util.toc()

    assert len(parts) == len(unionPart(parts))

    return parts
Exemplo n.º 54
0
def phyml(seqs,
          verbose=True,
          args=None,
          usertree=None,
          seqtype="pep",
          saveOutput="",
          bootiter=0,
          opttree=True,
          optbranches=True,
          nrates=4):

    phylip.validate_seqs(seqs)
    cwd = phylip.create_temp_dir()

    util.tic("phyml on %d of length %d" % (len(seqs), len(seqs.values()[0])))

    # create input
    labels = phylip.write_phylip_align(file("infile", "w"), seqs)
    util.write_list(file("labels", "w"), labels)

    options = "y"

    # only bootstrap when iterations are above 1
    if bootiter == 1:
        bootiter = 0

    if usertree != None:
        usertree = treelib.unroot(usertree)
        phylip.write_in_tree("intree", usertree, labels)
        treefile = "intree"
    else:
        treefile = "BIONJ"

    optimize = ""
    if opttree:
        optimize += "y "
    else:
        optimize += "n "

    if optbranches:
        optimize += "y "
    else:
        optimize += "n "

    if args == None:
        if seqtype == "dna":
            args = "infile 0 s 1 %d HKY e e %d e %s %s" % \
                (bootiter, nrates, treefile, optimize)
        elif seqtype == "pep":
            args = "infile 1 s 1 %d JTT e %d e %s %s" % \
                (bootiter, nrates, treefile, optimize)
        else:
            assert False, "unknown sequence type '%s'" % seqtype

    phylip.exec_phylip("phyml %s" % args, options, verbose)

    # parse tree
    tree = phylip.read_out_tree("infile_phyml_tree.txt", labels)

    # parse likelihood
    tree.data["logl"] = float(file("infile_phyml_lk.txt").read())

    if saveOutput != "":
        phylip.save_temp_dir(cwd, saveOutput)
    else:
        phylip.cleanup_temp_dir(cwd)
    util.toc()

    return tree
Exemplo n.º 55
0
    if 0:
        from rasmus import util

        text = [
            "##types:" + "int\t" * 99 + "int", "\t".join(map(str, range(100)))
        ]

        for i in range(10000):
            text.append("1\t" * 99 + "1")
        text = "\n".join(text)

        stream = StringIO.StringIO(text)

        util.tic("read table")
        tab = readTable(stream)
        util.toc()

    #################################################
    # specialized types
    if 1:
        text = """\
##types:str	int	strand_type
name	num	strand
matt	123	+
alex	456	-
mike	789	+
john	0	+
"""

        class strand_type:
            def __init__(self, text=None):
Exemplo n.º 56
0
    def recon(self, niter=1000, nsearch_locus=1000, nsearch_coal=1000):
        """Perform reconciliation"""

        util.tic("reconciling...")

        coal_tree = self.init_coal_tree.copy()
        locus_tree = self.init_coal_tree.copy()

        for i in xrange(niter):
            #========================================
            # fix coal_tree, estimate locus_tree
            util.tic("iter %d: estimating locus_tree" % i)

            self.stage = "locus_tree"
            locus_search = lambda ltree: DLCLocusTreeSearch(ltree,
                                                            self.stree,
                                                            self.gene2species,
                                                            self.duprate,
                                                            self.lossrate,
                                                            nprescreen=self.
                                                            nprescreen_locus)
            self.proposer = DLC_ReconProposer_LocusTree(coal_tree,
                                                        self.stree,
                                                        self.gene2species,
                                                        search=locus_search)

            self.proposer.set_locus_tree(locus_tree.copy())
            maxrecon = self.recon_helper(nsearch_locus)

            util.toc()

            # update the reconciliation
            coal_tree = maxrecon.coal_tree.copy()
            locus_tree = maxrecon.locus_tree.copy()
            daughters = set(
                [locus_tree.nodes[x.name] for x in maxrecon.daughters])

            #========================================
            # fix locus_tree, estimate coal_tree
            util.tic("iter %d: estimating coal_tree" % i)

            self.stage = "coal_tree"
            coal_search = lambda ctree: DLCCoalTreeSearch(ctree,
                                                          locus_tree,
                                                          daughters,
                                                          self.popsizes,
                                                          nprescreen=self.
                                                          nprescreen_coal)

            self.proposer = DLC_ReconProposer_CoalTree(self.stree,
                                                       locus_tree,
                                                       daughters,
                                                       self.gene2species,
                                                       search=coal_search)

            self.proposer.set_coal_tree(coal_tree.copy())
            maxrecon = self.recon_helper(nsearch_coal)

            # update the reconciliation
            coal_tree = maxrecon.coal_tree.copy()
            locus_tree = maxrecon.locus_tree.copy()
            daughters = set(
                [locus_tree.nodes[x.name] for x in maxrecon.daughters])

            util.toc()

        util.toc()

        # return the best reconciliation
        return self.maxrecon
Exemplo n.º 57
0
def prob_gene_species_alignment_recon(alnfile,
                                      partfile,
                                      stree,
                                      popsizes,
                                      duprate,
                                      lossrate,
                                      subrate,
                                      beta,
                                      pretime,
                                      premean,
                                      coal_tree,
                                      coal_recon,
                                      nsamples_coal,
                                      locus_tree,
                                      locus_recon,
                                      nsamples_locus,
                                      daughters,
                                      rates,
                                      freqs,
                                      alphas,
                                      threads=1,
                                      seed=ALIGNMENT_SEED,
                                      eps=0.1,
                                      info=None):
    """
    Evaluate terms that depend on T^G and R^G.

    That is, fix T^L, R^L, and daughters and evaluate the double integral:
    int int P(t^L | T^L, R^L, S, theta) * P(T^G, R^G, t^G | t^L, T^L, daughters, R^L, theta) * P(A | T^G, t^G) dt^L dt^G

    This is the probability we used in the searching process. 

    alnfile           -- alignment file
    partfile          -- partition file
    stree             -- species tree
    popsizes          -- population sizes in species tree
    duprate           -- duplication rate
    lossrate          -- loss rate
    subrate           -- substitution rate
    beta              -- regularization parameter
    pretime           -- starting time before species tree
    premean           -- mean starting time before species tree

    coal_tree         -- coalescent tree
    coal_recon        -- reconciliation of coalescent tree to locus tree
    nsamples_coal     -- number of times to sample coal times t^G
    locus_tree        -- locus tree (has dup-loss)
    locus_recon       -- reconciliation of locus tree to species tree
    nsamples_locus    -- number of times to sample the locus tree times t^L
    daughters         -- daughter nodes
    
    rates, freqs, alphas  -- optimization parameters  

    """

    locus_events = phylo.label_events(locus_tree, locus_recon)

    # optimize the parameters
    # util.tic("optimize parameter")
    # rates, freqs, alphas = pllprob.optimize_parameters(alnfile, partfile, coal_tree,
    #                                                   threads=threads, seed=seed, eps=eps)
    # util.toc()
    # double integral
    double_integral_list = []
    double_integral = 0.0
    util.tic("recon prob")
    for i in xrange(nsamples_locus):

        # sample t^L, the unit should be in myr
        #util.tic("topo prob")
        locus_times = duploss.sample_dup_times(locus_tree,
                                               stree,
                                               locus_recon,
                                               duprate,
                                               lossrate,
                                               pretime,
                                               premean,
                                               events=locus_events)
        treelib.set_dists_from_timestamps(locus_tree, locus_times)

        # calculate P(T^G, R^G | T^L, t^L, daughters, theta)
        topology_prob = prob_locus_coal_recon_topology(coal_tree, coal_recon,
                                                       locus_tree, popsizes,
                                                       daughters)
        #util.toc()
        # for a fixed t^L, compute coal_prob
        # sample t^G for topology and compute the probabililty of observing the alignment using MonteCarlo integration
        coal_prob = 0.0
        alignment_prob_MonteCarlo = 0.0
        alignment_prob_list = []

        # check probability of lineage counts for this locus tree
        zero_lineage_prob = False

        #util.tic("set times")
        for lnode in locus_tree:
            lineages = coal.count_lineages_per_branch(coal_tree, coal_recon,
                                                      locus_tree)
            bottom_num, top_num = lineages[lnode]
            if lnode.parent:
                T = lnode.dist
            else:
                T = util.INF

            popsizes = popsizes
            lineage_prob = prob_coal_counts(bottom_num, top_num, T, popsizes)

            # set zero_lineage_prob = TRUE if one lineage returns zero probability
            if (lineage_prob == 0.0):
                zero_lineage_prob = True

        #util.toc()
        # if lineage_prob is zero, coal_prob is zero
        if zero_lineage_prob:
            coal_prob = -float("inf")

        # otherwise, we calculate the coal_prob
        else:
            for j in xrange(nsamples_coal):

                # sample coal times and set the coal_tree accordingly
                # locus tree branch lengths are in myr
                # make sure the input popsizes are scaled to fit the time unit (typically myr)

                try:
                    sample_coal_times_topology(coal_tree, coal_recon,
                                               locus_tree, popsizes)
                except (ZeroDivisionError, ValueError):
                    # bad sample
                    util.log("bad sample")
                    alignment_prob = -util.INF
                    continue

                #===============================================================================
                # (log) probability of observing the alignment
                #util.tic("alignment probability")

                # convert branch lengths from myr to sub/site
                for node in coal_tree:
                    node.dist *= subrate

                #util.tic("alignment prob")
                # set a regularization parameter beta
                print beta
                alignment_prob = beta * prob_alignment(alnfile,
                                                       partfile,
                                                       coal_tree,
                                                       rates,
                                                       freqs,
                                                       alphas,
                                                       threads=threads,
                                                       seed=seed,
                                                       eps=eps)
                #util.toc()
                ### util.log("p = %.6f" % alignment_prob)
                #util.toc()

                #===============================================================================
                ### util.log("   log p = %.6g" % alignment_prob)
                ### util.log("   p = %.6g" % exp(alignment_prob))
                alignment_prob_list.append(alignment_prob)

            ### util.log("p = %f" % alignment_prob_MonteCarlo)

            # log_sum_exp function exponentiate the log probability of observing alignment,
            # add them up, and take log again
            if len(alignment_prob_list) == 0:
                # all bad samples
                alignment_prob_MonteCarlo = -util.INF
            else:
                alignment_prob_MonteCarlo = log_sum_exp(
                    alignment_prob_list) - log(nsamples_coal)

            # P(T^G, R^G | T^L, t^L, daughters, theta) * $ P(t^G | ~) * P(A | T^G,t^G) dtG
            # coal_prob is a log probability
            coal_prob += topology_prob + alignment_prob_MonteCarlo

            # add coal probability to a list for further processing
        double_integral_list.append(coal_prob)

        # log_sum_exp function exponentiate the log probability of observing alignment,
        # add them up, and take log again
        double_integral = log_sum_exp(double_integral_list) - log(
            nsamples_locus)

        # logging info
        if info is not None:
            info["topology_prob"] = topology_prob  # one sample of t^L
            info[
                "alignment_prob"] = alignment_prob_MonteCarlo  # one sample of t^L, averaged over t^G
            info["coal_prob"] = double_integral
    util.toc()
    return double_integral
Exemplo n.º 58
0
def mrbayes(aln,
            nexfilename="",
            seqtype="pep",
            options=None,
            usertree=None,
            bootiter=0,
            verbose=True,
            saveOutput=""):
    util.tic("mrbayes on %d of length %d" % (len(aln), len(aln.values()[0])))

    if nexfilename == "":
        cwd = phylip.create_temp_dir()
    else:
        cwd = None

    # setup options
    if nexfilename == "":
        nexfilename = "infile.nex"
    if not options:
        options = {}
    setDefaultOptions(options)

    options["burninfrac"] = .25
    options["relburnin"] = "yes"

    # force best binary tree (if possible)
    options["extra"] += "sumt contype=allcompat;"

    # get gene names
    names = []
    namemap = {}

    for key in aln.keys():
        if "+" in key:
            key2 = key.replace("+", "_")
            names.append(key2)
            namemap[key2] = key
        else:
            names.append(key)

    # write input file
    out = file(nexfilename, "w")
    writeNexus(out, names, aln.values(), seqtype, options)

    # write options
    writeMrbayesOptions(out, options, seqtype=seqtype)
    out.close()

    # exec mrbayes
    if verbose:
        os.system("echo exe %s | mb" % nexfilename)
    else:
        os.system("echo exe %s | mb >/dev/null 2>&1" % nexfilename)

    # read tree
    tree = readNexusConTree(file(nexfilename + ".con"))

    # clean up
    if cwd != None:
        if saveOutput != "":
            phylip.save_temp_dir(cwd, saveOutput)
        else:
            phylip.cleanup_temp_dir(cwd)

    util.toc()

    for tmpname, origname in namemap.iteritems():
        tree.rename(tmpname, origname)

    return tree