示例#1
0
    def _test_branch_prior_predup(self):
        """Test branch prior"""

        prep_dir("test/output/branch_prior_predup")
        #out = open("test/output/branch_prior/flies.nt.approx.txt", "w")
        out = sys.stderr
        treeid = "predup"

        tree = read_tree("test/data/flies.predup.tree")
        drawTree(tree)

        stree = read_tree("test/data/flies.stree")
        gene2species = phylo.read_gene2species("test/data/flies.smap")
        params = spidir.read_params("test/data/flies.param")
        birth = .4
        death = .39
        pretime = 1.0
        nsamples = 100

        recon = phylo.reconcile(tree, stree, gene2species)
        events = phylo.label_events(tree, recon)
        p = [
            spidir.branch_prior(tree, stree, recon, events, params, birth,
                                death, pretime, nsamples, True)
            for i in xrange(30)
        ]
        p2 = [
            spidir.branch_prior(tree, stree, recon, events, params, birth,
                                death, pretime, nsamples, False)
            for i in xrange(30)
        ]
        print >> out, "\t".join(
            map(str,
                [treeid, mean(p), sdev(p),
                 mean(p2), sdev(p2)]))
示例#2
0
    def _test_branch_prior_predup(self):
        """Test branch prior"""

        prep_dir("test/output/branch_prior_predup")
        # out = open("test/output/branch_prior/flies.nt.approx.txt", "w")
        out = sys.stderr
        treeid = "predup"

        tree = read_tree("test/data/flies.predup.tree")
        drawTree(tree)

        stree = read_tree("test/data/flies.stree")
        gene2species = phylo.read_gene2species("test/data/flies.smap")
        params = spidir.read_params("test/data/flies.param")
        birth = 0.4
        death = 0.39
        pretime = 1.0
        nsamples = 100

        recon = phylo.reconcile(tree, stree, gene2species)
        events = phylo.label_events(tree, recon)
        p = [
            spidir.branch_prior(tree, stree, recon, events, params, birth, death, pretime, nsamples, True)
            for i in xrange(30)
        ]
        p2 = [
            spidir.branch_prior(tree, stree, recon, events, params, birth, death, pretime, nsamples, False)
            for i in xrange(30)
        ]
        print >> out, "\t".join(map(str, [treeid, mean(p), sdev(p), mean(p2), sdev(p2)]))
示例#3
0
    def _test_branch_prior_samples(self):
        """Test branch prior"""

        prep_dir("test/output/branch_prior")

        treeids = os.listdir("test/data/flies")
        treeids = ["3"]

        for treeid in treeids:

            tree = read_tree("test/data/flies-duploss/%s/%s.tree" % (treeid, treeid))

            print treeid
            draw_tree(tree)

            stree = read_tree("test/data/flies.stree")
            gene2species = phylo.read_gene2species("test/data/flies.smap")
            params = spidir.read_params("test/data/flies.param")
            birth = 0.0012
            death = 0.0013
            pretime = 1.0
            nsamples = 100

            recon = phylo.reconcile(tree, stree, gene2species)
            events = phylo.label_events(tree, recon)

            p = [
                spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=True)
                for i in xrange(30)
            ]

            # row = [treeid,
            #       mean(p), exc_default(lambda: sdev(p), INF)]
            print treeid, p
示例#4
0
    def test_search(self):
        """Test all terms"""

        prep_dir("test/output/all_terms_search")
        out = open("test/output/all_terms_search/flies.txt", "w")
        #out = sys.stderr

        treeids = os.listdir("test/data/flies")
        #treeids = ["3"]

        for treeid in treeids:
        
            tree_correct = read_tree("test/data/flies.nt/%s/%s.tree" %
                                    (treeid, treeid))
            align = read_fasta("test/data/flies.nt/%s/%s.align" %
                              (treeid, treeid))

            phylo.hash_order_tree(tree_correct)

            print >>out, treeid
            print >>out, "correct"
            drawTree(tree_correct, out=out)
            
            stree = read_tree("test/data/flies.norm.stree")
            gene2species = phylo.read_gene2species("test/data/flies.smap")
            params = spidir.read_params("test/data/flies.nt.param")
            birth = .4
            death = .39
            pretime = 1.0
            maxdoom = 20
            bgfreq = [.258,.267,.266,.209]
            kappa = 1.59

            genes = align.keys()
            seqs = align.values()
            
            tree = spidir.search_climb(genes, seqs,
                                       stree, gene2species,
                                       params, birth, death, pretime,
                                       bgfreq, kappa,
                                       maxdoom=maxdoom,
                                       niter=50, quickiter=100,
                                       nsamples=100, branch_approx=True)

            phylo.hash_order_tree(tree)
            
            

            print >>out, "constructed"
            drawTree(tree, out=out)
            

            print >>out, "is_correct:", (phylo.hash_tree(tree) ==
                                         phylo.hash_tree(tree_correct))
            

        out.close()
示例#5
0
    def test_all_terms(self):
        """Test all terms"""

        prep_dir("test/output/all_terms")
        out = open("test/output/all_terms/flies.txt", "w")
        #out = sys.stderr

        treeids = os.listdir("test/data/flies")[:100]
        #treeids = ["0"]

        for treeid in treeids:

            tree = read_tree("test/data/flies/%s/%s.nt.tree" %
                             (treeid, treeid))
            align = read_fasta("test/data/flies/%s/%s.nt.align" %
                               (treeid, treeid))

            print >> out, treeid
            draw_tree(tree, out=out)

            stree = read_tree("test/data/flies.norm.stree")
            gene2species = phylo.read_gene2species("test/data/flies.smap")
            params = spidir.read_params("test/data/flies.nt.param")
            birth = .4
            death = .39
            pretime = 1.0
            nsamples = 100
            maxdoom = 20
            bgfreq = [.258, .267, .266, .209]
            kappa = 1.59

            recon = phylo.reconcile(tree, stree, gene2species)
            events = phylo.label_events(tree, recon)

            branchp, topp, seqlk = spidir.calc_joint_prob(align,
                                                          tree,
                                                          stree,
                                                          recon,
                                                          events,
                                                          params,
                                                          birth,
                                                          death,
                                                          pretime,
                                                          bgfreq,
                                                          kappa,
                                                          maxdoom=maxdoom,
                                                          terms=True)
            joint = topp + branchp + seqlk

            print >> out, "topp   ", topp
            print >> out, "branchp", branchp
            print >> out, "seqlk  ", seqlk
            print >> out, "joint  ", joint

        out.close()
示例#6
0
    def optimize_model(self, gtree, stree, gene2species):
        """Optimizes the model"""
        CostModel.optimize_model(self, gtree, stree, gene2species)

        #=============================
        # read sequences
        if not self.align:
            self.parser.error("--align must be specified")
        self.align = fasta.read_fasta(self.align)

        #=============================
        # read SPIDIR parameters
        if not self.params:
            self.parser.error("--param must be specified")
        self.params = spidir.read_params(self.params)

        #=============================
        # determine background base frequency
        if self.bgfreq:
            # use supplied frequency
            vals = map(float, self.bgfreq.split(","))
            if len(vals) != 4:
                self.parser.error("invalid --bgfreq: %s" % self.bgfreq)
            self.bgfreq = vals
        else:
            # compute frequency from alignment
            self.bgfreq = alignlib.compute_bgfreq(self.align)

        #=============================
        # branch lengths
        if self.kappa >= 0:
            # use supplied kappa
            self.kappa = self.kappa
        else:
            # compute kappa from alignment
            # from spidir.find_ml_kapp_hky
            minkappa = 0.4; maxkappa = 5.0; stepkappa = 0.1
            maxlk = -util.INF
            maxk = minkappa

            for k in util.frange(minkappa, maxkappa, stepkappa):
                l = spidir.find_ml_branch_lengths_hky(gtree, self.align, self.bgfreq, k, maxiter=1,
                                                      parsinit=(k == minkappa))
                if l > maxlk:
                    maxlk = l
                    maxk = k

            self.kappa = maxk
示例#7
0
    def test_all_terms(self):
        """Test all terms"""

        prep_dir("test/output/all_terms")
        out = open("test/output/all_terms/flies.txt", "w")
        #out = sys.stderr

        treeids = os.listdir("test/data/flies")[:100]
        #treeids = ["0"]

        for treeid in treeids:
        
            tree = read_tree("test/data/flies/%s/%s.nt.tree" % (treeid, treeid))
            align = read_fasta("test/data/flies/%s/%s.nt.align" % (treeid, treeid))

            print >>out, treeid
            draw_tree(tree, out=out)
            
            stree = read_tree("test/data/flies.norm.stree")
            gene2species = phylo.read_gene2species("test/data/flies.smap")
            params = spidir.read_params("test/data/flies.nt.param")
            birth = .4
            death = .39
            pretime = 1.0
            nsamples = 100
            maxdoom = 20
            bgfreq = [.258,.267,.266,.209]
            kappa = 1.59
        
            recon = phylo.reconcile(tree, stree, gene2species)
            events = phylo.label_events(tree, recon)

            branchp, topp, seqlk = spidir.calc_joint_prob(
                align, tree, stree, recon, events, params,
                birth, death, pretime,
                bgfreq, kappa, maxdoom=maxdoom, terms=True)
            joint = topp + branchp + seqlk
            
            
            print >>out, "topp   ", topp
            print >>out, "branchp", branchp
            print >>out, "seqlk  ", seqlk
            print >>out, "joint  ", joint


        out.close()
示例#8
0
    def _test_branch_prior_samples(self):
        """Test branch prior"""

        prep_dir("test/output/branch_prior")

        treeids = os.listdir("test/data/flies")
        treeids = ["3"]

        for treeid in treeids:

            tree = read_tree("test/data/flies-duploss/%s/%s.tree" %
                             (treeid, treeid))

            print treeid
            draw_tree(tree)

            stree = read_tree("test/data/flies.stree")
            gene2species = phylo.read_gene2species("test/data/flies.smap")
            params = spidir.read_params("test/data/flies.param")
            birth = .0012
            death = .0013
            pretime = 1.0
            nsamples = 100

            recon = phylo.reconcile(tree, stree, gene2species)
            events = phylo.label_events(tree, recon)

            p = [
                spidir.branch_prior(tree,
                                    stree,
                                    recon,
                                    events,
                                    params,
                                    birth,
                                    death,
                                    nsamples=nsamples,
                                    approx=True) for i in xrange(30)
            ]

            #row = [treeid,
            #       mean(p), exc_default(lambda: sdev(p), INF)]
            print treeid, p
示例#9
0
    def _test_branch_prior_approx(self):
        """Test branch prior"""

        prep_dir("test/output/branch_prior")
        out = open("test/output/branch_prior/flies.approx.txt", "w")
        out = sys.stderr

        treeids = os.listdir("test/data/flies")

        for treeid in treeids:

            tree = read_tree("test/data/flies-duploss/%s/%s.nt.tree" % (treeid, treeid))

            print treeid
            draw_tree(tree)

            stree = read_tree("test/data/flies.stree")
            gene2species = phylo.read_gene2species("test/data/flies.smap")
            params = spidir.read_params("test/data/flies.param")
            birth = 0.0012
            death = 0.0013
            pretime = 1.0
            nsamples = 100

            recon = phylo.reconcile(tree, stree, gene2species)
            events = phylo.label_events(tree, recon)
            p = [
                spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=False)
                for i in xrange(30)
            ]
            p2 = [
                spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=True)
                for i in xrange(30)
            ]

            row = [treeid, mean(p), exc_default(lambda: sdev(p), INF), mean(p2), exc_default(lambda: sdev(p2), INF)]

            print >> out, "\t".join(map(str, row))
            self.assert_(INF not in row and -INF not in row)

        out.close()
示例#10
0
    def test_search(self):
        """Test all terms"""

        prep_dir("test/output/all_terms_search")
        out = open("test/output/all_terms_search/flies.txt", "w")
        #out = sys.stderr

        treeids = os.listdir("test/data/flies")
        #treeids = ["3"]

        for treeid in treeids:

            tree_correct = read_tree("test/data/flies.nt/%s/%s.tree" %
                                     (treeid, treeid))
            align = read_fasta("test/data/flies.nt/%s/%s.align" %
                               (treeid, treeid))

            phylo.hash_order_tree(tree_correct)

            print >> out, treeid
            print >> out, "correct"
            drawTree(tree_correct, out=out)

            stree = read_tree("test/data/flies.norm.stree")
            gene2species = phylo.read_gene2species("test/data/flies.smap")
            params = spidir.read_params("test/data/flies.nt.param")
            birth = .4
            death = .39
            pretime = 1.0
            maxdoom = 20
            bgfreq = [.258, .267, .266, .209]
            kappa = 1.59

            genes = align.keys()
            seqs = align.values()

            tree = spidir.search_climb(genes,
                                       seqs,
                                       stree,
                                       gene2species,
                                       params,
                                       birth,
                                       death,
                                       pretime,
                                       bgfreq,
                                       kappa,
                                       maxdoom=maxdoom,
                                       niter=50,
                                       quickiter=100,
                                       nsamples=100,
                                       branch_approx=True)

            phylo.hash_order_tree(tree)

            print >> out, "constructed"
            drawTree(tree, out=out)

            print >> out, "is_correct:", (
                phylo.hash_tree(tree) == phylo.hash_tree(tree_correct))

        out.close()
示例#11
0
    def _test_branch_prior_approx(self):
        """Test branch prior"""

        prep_dir("test/output/branch_prior")
        out = open("test/output/branch_prior/flies.approx.txt", "w")
        out = sys.stderr

        treeids = os.listdir("test/data/flies")

        for treeid in treeids:

            tree = read_tree("test/data/flies-duploss/%s/%s.nt.tree" %
                             (treeid, treeid))

            print treeid
            draw_tree(tree)

            stree = read_tree("test/data/flies.stree")
            gene2species = phylo.read_gene2species("test/data/flies.smap")
            params = spidir.read_params("test/data/flies.param")
            birth = .0012
            death = .0013
            pretime = 1.0
            nsamples = 100

            recon = phylo.reconcile(tree, stree, gene2species)
            events = phylo.label_events(tree, recon)
            p = [
                spidir.branch_prior(tree,
                                    stree,
                                    recon,
                                    events,
                                    params,
                                    birth,
                                    death,
                                    nsamples=nsamples,
                                    approx=False) for i in xrange(30)
            ]
            p2 = [
                spidir.branch_prior(tree,
                                    stree,
                                    recon,
                                    events,
                                    params,
                                    birth,
                                    death,
                                    nsamples=nsamples,
                                    approx=True) for i in xrange(30)
            ]

            row = [
                treeid,
                mean(p),
                exc_default(lambda: sdev(p), INF),
                mean(p2),
                exc_default(lambda: sdev(p2), INF)
            ]

            print >> out, "\t".join(map(str, row))
            self.assert_(INF not in row and -INF not in row)

        out.close()