def getSiteLikes(self): """Likelihoods, not log likes. Placed in self.siteLikes, a list.""" self._commonCStuff() # second arg is getSiteLikes self.logLike = pf.p4_treeLogLike(self.cTree, 1) self.siteLikes = [] for p in self.data.parts: self.siteLikes += pf.getSiteLikes(p.cPart)
def calcLogLike(self, verbose=1, resetEmpiricalComps=True): """Calculate the likelihood of the tree, without optimization.""" self._commonCStuff(resetEmpiricalComps=resetEmpiricalComps) # print "about to p4_treeLogLike()..." # second arg is getSiteLikes self.logLike = pf.p4_treeLogLike(self.cTree, 0) if verbose: print("Tree.calcLogLike(). %f" % self.logLike)
def optLogLike(self, verbose=1, newtAndBrentPowell=1, allBrentPowell=0): """Calculate the likelihood of the tree, with optimization. There are two optimization methods-- choose one. I've made 'newtAndBrentPowell' the default, as it is fast and seems to be working. The 'allBrentPowell' optimizer used to be the default, as it seems to be the most robust, although it is slow. It would be good for checking important calculations. """ if verbose: theStartTime = time.clock() self._commonCStuff() # We want only one opt method. if newtAndBrentPowell: newtAndBrentPowell = 1 if allBrentPowell: allBrentPowell = 1 if (newtAndBrentPowell + allBrentPowell) != 1: gm = ['Tree.optLogLike()'] gm.append("Choose 1 opt method.") raise P4Error(gm) # Do the opt. if allBrentPowell: pf.p4_allBrentPowellOptimize(self.cTree) else: pf.p4_newtSetup(self.cTree) pf.p4_newtAndBrentPowellOpt(self.cTree) # second arg is getSiteLikes self.logLike = pf.p4_treeLogLike(self.cTree, 0) # get the brLens brLens = pf.p4_getBrLens(self.cTree) for n in self.iterNodesNoRoot(): n.br.len = brLens[n.nodeNum] # get the other free prams prams = pf.p4_getFreePrams(self.cTree) self.model.restoreFreePrams(prams) if verbose: print("optLogLike = %f" % self.logLike) theEndTime = time.clock() print("cpu time %s seconds." % (theEndTime - theStartTime))
def ancestralStateDraw(self): """Make a draw from the inferred root character state distribution This method works on a tree with an attached model and data. Conditional on the tree, branch lengths, model, and data, this method infers the ancestral character states of the root node. However, that inference is probabilistic, a distribution, and this method takes a single draw. It returns a string. """ gm = ['Tree.ancestralStateDraw().'] self._commonCStuff() self.logLike = pf.p4_treeLogLike(self.cTree, 0) draw = numpy.empty(4, dtype=numpy.int32) ancSts = [] for pNum in range(self.data.nParts): dp = self.data.parts[pNum] ancStsPart = [] for seqPos in range(dp.nChar): pf.p4_drawAncState(self.cTree, pNum, seqPos, draw) if draw[1] >= 0: # gamma cat if it is a variable site, else -1 assert draw[2] == 0 # not invar assert draw[0] >= 0 # char num ancStsPart.append(dp.symbols[draw[0]]) elif draw[2]: # isInvar, zero if not assert draw[0] == -1 assert draw[1] == -1 assert draw[3] >= 0 # invar char num ancStsPart.append(dp.symbols[draw[3]]) else: gm.append("Problem with returned draw. Got %s" % draw) raise P4Error(gm) assert len(ancStsPart) == dp.nChar ancSts.append(''.join(ancStsPart)) return ''.join(ancSts)
def optTest(self): self._commonCStuff() theStartTime = time.clock() doXfer = 0 for i in range(1): if doXfer: self.model.setCStuff() self.setCStuff() pf.p4_setPrams(self.cTree, -1) self.logLike = pf.p4_treeLogLike(self.cTree, 0) if doXfer: # get the brLens brLens = pf.p4_getBrLens(self.cTree) for i in range(len(self.nodes)): n = self.nodes[i] if n != self.root: n.br.len = brLens[i] # get the other free prams prams = pf.p4_getFreePrams(self.cTree) self.model.restoreFreePrams(prams) print("time %s seconds." % (time.clock() - theStartTime))
def getSiteRates(self): """Get posterior mean site rate, and gamma category. This says two things -- 1. The posterior mean site rate, calculated like PAML 2. Which GDASRV category contributes most to the likelihood. The posterior mean site rate calculation requires that there be only one gdasrv over the tree, which will usually be the case. For placement in categories, if its a tie score, then it is placed in the first one. The list of site rates, and the list of categories, both with one value for each site, are put into separate numpy arrays, returned as a list, ie [siteRatesArray, categoriesArray] There is one of these lists for each data partition, and the results as a whole are returned as a list. So if you only have one data partition, then you get a 1-item list, and that single item is a list with 2 numpy arrays. Ie [[siteRatesArray, categoriesArray]] If nGammaCat for a partition is 1, it will give that partition an array of ones for the site rates and zeros for the categories. """ self._commonCStuff() # second arg is getSiteLikes self.logLike = pf.p4_treeLogLike(self.cTree, 0) #self.winningGammaCats = [] # for p in self.data.parts: # self.winningGammaCats += pf.getWinningGammaCats(p.cPart) results = [] for partNum in range(len(self.data.parts)): if len(self.model.parts[partNum].gdasrvs) > 1: gm = ['Tree.getSiteRates()'] gm.append("Part %i has %i gdasrvs. Maximum 1 allowed." % ( partNum, len(self.model.parts[partNum].gdasrvs))) raise P4Error(gm) for partNum in range(len(self.data.parts)): p = self.data.parts[partNum] if self.model.parts[partNum].nGammaCat == 1: siteRates = numpy.ones(p.nChar, numpy.float) gammaCats = numpy.zeros(p.nChar, numpy.int32) elif self.model.parts[partNum].nGammaCat > 1: siteRates = numpy.zeros(p.nChar, numpy.float) gammaCats = numpy.zeros(p.nChar, numpy.int32) work = numpy.zeros( self.model.parts[partNum].nGammaCat, numpy.float) for charNum in range(p.nChar): gammaCats[charNum] = -1 #pf.getWinningGammaCats(self.cTree, p.cPart, i, gammaCats, work) pf.getSiteRates( self.cTree, p.cPart, partNum, siteRates, gammaCats, work) # print siteRates # print gammaCats # print work if 0: counts = numpy.zeros( self.model.parts[partNum].nGammaCat, numpy.int32) for charNum in range(p.nChar): counts[winningGammaCats[charNum]] += 1 print(counts) else: raise P4Error("This should not happen.") results.append([siteRates, gammaCats]) return results
def optLogLike(self, verbose=1, method="BOBYQA", optBrLens=True): """Calculate the likelihood of the tree, with optimization. There are different optimization methods-- choose one. I've made 'BOBYQA' the default, as it is very fast and seems to be working. It is from the nlopt library. Other opt methods include --- newtAndBrentPowell -- fairly fast, and works well. It was the default. Perhaps use this in combination with BOBYQA, eg t.optLogLike(method="BOBYQA") t.optLogLike(method="newtAndBrentPowell") The 'allBrentPowell' optimizer was the default several years ago, as it seems to be the most robust, although it is slow. It might be good for checking important calculations. 'newtAndBOBYQA' --- fast and seems to work well. As suggested above, for difficult optimizations it may help to repeat the call to optLogLike(), perhaps with a different method. Arg optBrLens (default True), can be turned off. This week, this only works with method="BOBYQA". """ gm = ["Tree.optLogLike()"] if verbose: theStartTime = time.time() if 0: for n in self.iterNodesNoRoot(): if n.br.len < var.BRLEN_MIN: gm.append( "All branch lengths should be greater than or equal to var.BRLEN_MIN," ) gm.append(f" which at the moment is {var.BRLEN_MIN}") gm.append( f"Got a branch length of {n.br.len:.8f} {n.br.len:g}") gm.append( "Either make the branch length bigger, or lower var.BRLEN_MIN." ) gm.append( "You could, for example, t.stripBrLens() which makes all br lens default 0.1" ) raise P4Error(gm) if not optBrLens: if method != "BOBYQA": gm.append("Turning arg optBrLens off only works with BOBYQA") raise P4Error(gm) self._commonCStuff() if method == "newtAndBrentPowell": pf.p4_newtSetup(self.cTree) pf.p4_newtAndBrentPowellOpt(self.cTree) elif method == "allBrentPowell": pf.p4_allBrentPowellOptimize(self.cTree) elif method == "newtAndBOBYQA": pf.p4_newtSetup(self.cTree) pf.p4_newtAndBOBYQAOpt(self.cTree) elif method == "BOBYQA": if optBrLens: pf.p4_allBOBYQAOptimize(self.cTree, 1) else: pf.p4_allBOBYQAOptimize(self.cTree, 0) else: gm.append( 'method should be one of "newtAndBrentPowell", "allBrentPowell", "newtAndBOBYQA", or "BOBYQA"' ) raise P4Error(gm) # Do a final like calc. (second arg is getSiteLikes) self.logLike = pf.p4_treeLogLike(self.cTree, 0) # get the brLens brLens = pf.p4_getBrLens(self.cTree) for n in self.iterNodesNoRoot(): n.br.len = brLens[n.nodeNum] # get the other free prams prams = pf.p4_getFreePrams(self.cTree) self.model.restoreFreePrams(prams) if verbose: print("optLogLike = %f" % self.logLike) theEndTime = time.time() print("cpu time %s seconds." % (theEndTime - theStartTime))