def getSiteLikes(self):
     """Likelihoods, not log likes. Placed in self.siteLikes, a list."""
     self._commonCStuff()
     # second arg is getSiteLikes
     self.logLike = pf.p4_treeLogLike(self.cTree, 1)
     self.siteLikes = []
     for p in self.data.parts:
         self.siteLikes += pf.getSiteLikes(p.cPart)
    def calcLogLike(self, verbose=1, resetEmpiricalComps=True):
        """Calculate the likelihood of the tree, without optimization."""

        self._commonCStuff(resetEmpiricalComps=resetEmpiricalComps)
        # print "about to p4_treeLogLike()..."
        # second arg is getSiteLikes
        self.logLike = pf.p4_treeLogLike(self.cTree, 0)
        if verbose:
            print("Tree.calcLogLike(). %f" % self.logLike)
    def optLogLike(self, verbose=1, newtAndBrentPowell=1, allBrentPowell=0):
        """Calculate the likelihood of the tree, with optimization.

        There are two optimization methods-- choose one.  I've made
        'newtAndBrentPowell' the default, as it is fast and seems to be
        working.  The 'allBrentPowell' optimizer used to be the default,
        as it seems to be the most robust, although it is slow.  It would
        be good for checking important calculations.  
        """

        if verbose:
            theStartTime = time.clock()
        self._commonCStuff()

        # We want only one opt method.
        if newtAndBrentPowell:
            newtAndBrentPowell = 1
        if allBrentPowell:
            allBrentPowell = 1
        if (newtAndBrentPowell + allBrentPowell) != 1:
            gm = ['Tree.optLogLike()']
            gm.append("Choose 1 opt method.")
            raise P4Error(gm)

        # Do the opt.
        if allBrentPowell:
            pf.p4_allBrentPowellOptimize(self.cTree)
        else:
            pf.p4_newtSetup(self.cTree)
            pf.p4_newtAndBrentPowellOpt(self.cTree)

        # second arg is getSiteLikes
        self.logLike = pf.p4_treeLogLike(self.cTree, 0)

        # get the brLens
        brLens = pf.p4_getBrLens(self.cTree)
        for n in self.iterNodesNoRoot():
            n.br.len = brLens[n.nodeNum]

        # get the other free prams
        prams = pf.p4_getFreePrams(self.cTree)
        self.model.restoreFreePrams(prams)

        if verbose:
            print("optLogLike = %f" % self.logLike)
            theEndTime = time.clock()
            print("cpu time %s seconds." % (theEndTime - theStartTime))
    def ancestralStateDraw(self):
        """Make a draw from the inferred root character state distribution

        This method works on a tree with an attached model and data.

        Conditional on the tree, branch lengths, model, and data, this method
        infers the ancestral character states of the root node.  However, that
        inference is probabilistic, a distribution, and this method takes a
        single draw.  It returns a string.

        """

        gm = ['Tree.ancestralStateDraw().']
        self._commonCStuff()
        self.logLike = pf.p4_treeLogLike(self.cTree, 0)
        draw = numpy.empty(4, dtype=numpy.int32)
        ancSts = []
        for pNum in range(self.data.nParts):
            dp = self.data.parts[pNum]
            ancStsPart = []
            for seqPos in range(dp.nChar):
                pf.p4_drawAncState(self.cTree, pNum, seqPos, draw)
                if draw[1] >= 0:        # gamma cat if it is a variable site, else -1  
                    assert draw[2] == 0 # not invar
                    assert draw[0] >= 0 # char num
                    ancStsPart.append(dp.symbols[draw[0]])
                elif draw[2]:           # isInvar, zero if not
                    assert draw[0] == -1
                    assert draw[1] == -1
                    assert draw[3] >= 0    # invar char num
                    ancStsPart.append(dp.symbols[draw[3]])
                else:
                    gm.append("Problem with returned draw.  Got %s" % draw)
                    raise P4Error(gm)
            assert len(ancStsPart) == dp.nChar
            ancSts.append(''.join(ancStsPart))
        return ''.join(ancSts)
    def optTest(self):
        self._commonCStuff()
        theStartTime = time.clock()
        doXfer = 0
        for i in range(1):
            if doXfer:
                self.model.setCStuff()
                self.setCStuff()
            pf.p4_setPrams(self.cTree, -1)
            self.logLike = pf.p4_treeLogLike(self.cTree, 0)

            if doXfer:
                # get the brLens
                brLens = pf.p4_getBrLens(self.cTree)
                for i in range(len(self.nodes)):
                    n = self.nodes[i]
                    if n != self.root:
                        n.br.len = brLens[i]

                # get the other free prams
                prams = pf.p4_getFreePrams(self.cTree)
                self.model.restoreFreePrams(prams)

        print("time %s seconds." % (time.clock() - theStartTime))
    def getSiteRates(self):
        """Get posterior mean site rate, and gamma category.

        This says two things --
        1. The posterior mean site rate, calculated like PAML
        2. Which GDASRV category contributes most to the likelihood.

        The posterior mean site rate calculation requires that there be
        only one gdasrv over the tree, which will usually be the case.

        For placement in categories, if its a tie score, then it is placed
        in the first one.

        The list of site rates, and the list of categories, both with one
        value for each site, are put into separate numpy arrays, returned
        as a list, ie [siteRatesArray, categoriesArray]

        There is one of these lists for each data partition, and the results as a
        whole are returned as a list.  So if you only have one data
        partition, then you get a 1-item list, and that single item is a list with 2
        numpy arrays.  Ie [[siteRatesArray, categoriesArray]]

        If nGammaCat for a partition is 1, it will give that partition an
        array of ones for the site rates and zeros for the categories.

        """

        self._commonCStuff()
        # second arg is getSiteLikes
        self.logLike = pf.p4_treeLogLike(self.cTree, 0)
        #self.winningGammaCats = []
        # for p in self.data.parts:
        #    self.winningGammaCats += pf.getWinningGammaCats(p.cPart)
        results = []

        for partNum in range(len(self.data.parts)):
            if len(self.model.parts[partNum].gdasrvs) > 1:
                gm = ['Tree.getSiteRates()']
                gm.append("Part %i has %i gdasrvs.  Maximum 1 allowed." % (
                    partNum, len(self.model.parts[partNum].gdasrvs)))
                raise P4Error(gm)

        for partNum in range(len(self.data.parts)):
            p = self.data.parts[partNum]
            if self.model.parts[partNum].nGammaCat == 1:
                siteRates = numpy.ones(p.nChar, numpy.float)
                gammaCats = numpy.zeros(p.nChar, numpy.int32)
            elif self.model.parts[partNum].nGammaCat > 1:
                siteRates = numpy.zeros(p.nChar, numpy.float)
                gammaCats = numpy.zeros(p.nChar, numpy.int32)
                work = numpy.zeros(
                    self.model.parts[partNum].nGammaCat, numpy.float)
                for charNum in range(p.nChar):
                    gammaCats[charNum] = -1
                #pf.getWinningGammaCats(self.cTree, p.cPart, i, gammaCats, work)
                pf.getSiteRates(
                    self.cTree, p.cPart, partNum, siteRates, gammaCats, work)
                # print siteRates
                # print gammaCats
                # print work
                if 0:
                    counts = numpy.zeros(
                        self.model.parts[partNum].nGammaCat, numpy.int32)
                    for charNum in range(p.nChar):
                        counts[winningGammaCats[charNum]] += 1
                    print(counts)

            else:
                raise P4Error("This should not happen.")
            results.append([siteRates, gammaCats])
        return results
示例#7
0
    def optLogLike(self, verbose=1, method="BOBYQA", optBrLens=True):
        """Calculate the likelihood of the tree, with optimization.

        There are different optimization methods-- choose one.  I've
        made 'BOBYQA' the default, as it is very fast and seems to be
        working.  It is from the nlopt library.

        Other opt methods include ---

        newtAndBrentPowell -- fairly fast, and works well.  It was the
        default.  Perhaps use this in combination with BOBYQA, eg

        t.optLogLike(method="BOBYQA")
        t.optLogLike(method="newtAndBrentPowell")

        The 'allBrentPowell' optimizer was the default several years
        ago, as it seems to be the most robust, although it is slow.
        It might be good for checking important calculations.

        'newtAndBOBYQA' --- fast and seems to work well.

        As suggested above, for difficult optimizations it may help to
        repeat the call to optLogLike(), perhaps with a different
        method.

        Arg optBrLens (default True), can be turned off.  This week, 
        this only works with method="BOBYQA".
        """

        gm = ["Tree.optLogLike()"]
        if verbose:
            theStartTime = time.time()

        if 0:
            for n in self.iterNodesNoRoot():
                if n.br.len < var.BRLEN_MIN:
                    gm.append(
                        "All branch lengths should be greater than or equal to var.BRLEN_MIN,"
                    )
                    gm.append(f"    which at the moment is {var.BRLEN_MIN}")
                    gm.append(
                        f"Got a branch length of {n.br.len:.8f} {n.br.len:g}")
                    gm.append(
                        "Either make the branch length bigger, or lower var.BRLEN_MIN."
                    )
                    gm.append(
                        "You could, for example, t.stripBrLens() which makes all br lens default 0.1"
                    )
                    raise P4Error(gm)

        if not optBrLens:
            if method != "BOBYQA":
                gm.append("Turning arg optBrLens off only works with BOBYQA")
                raise P4Error(gm)

        self._commonCStuff()

        if method == "newtAndBrentPowell":
            pf.p4_newtSetup(self.cTree)
            pf.p4_newtAndBrentPowellOpt(self.cTree)
        elif method == "allBrentPowell":
            pf.p4_allBrentPowellOptimize(self.cTree)
        elif method == "newtAndBOBYQA":
            pf.p4_newtSetup(self.cTree)
            pf.p4_newtAndBOBYQAOpt(self.cTree)
        elif method == "BOBYQA":
            if optBrLens:
                pf.p4_allBOBYQAOptimize(self.cTree, 1)
            else:
                pf.p4_allBOBYQAOptimize(self.cTree, 0)
        else:
            gm.append(
                'method should be one of "newtAndBrentPowell", "allBrentPowell", "newtAndBOBYQA", or "BOBYQA"'
            )
            raise P4Error(gm)

        # Do a final like calc.  (second arg is getSiteLikes)
        self.logLike = pf.p4_treeLogLike(self.cTree, 0)

        # get the brLens
        brLens = pf.p4_getBrLens(self.cTree)
        for n in self.iterNodesNoRoot():
            n.br.len = brLens[n.nodeNum]

        # get the other free prams
        prams = pf.p4_getFreePrams(self.cTree)
        self.model.restoreFreePrams(prams)

        if verbose:
            print("optLogLike = %f" % self.logLike)
            theEndTime = time.time()
            print("cpu time %s seconds." % (theEndTime - theStartTime))