def calc_avg_rcmks(parser): options,args= parser.parse_args() njks= 101 nmks= 101 jks= numpy.linspace(0.5,0.8,njks) mks= numpy.linspace(-0.5,-3.,nmks) if options.basti: zs= numpy.array([0.004,0.008,0.01,0.0198,0.03,0.04]) zsolar= 0.019 elif options.parsec: zs= numpy.arange(0.0005,0.06005,0.0005) # zs= numpy.array([0.01,0.02]) zsolar= 0.019 else: zs= numpy.arange(0.0005,0.03005,0.0005) # zs= numpy.array([0.01,0.02]) zsolar= 0.019 if not os.path.exists(options.outfilename): logpz= localzdist(zs,zsolar=zsolar) logmkp= numpy.zeros((len(zs),njks,nmks)) logp= numpy.zeros((len(zs),njks,nmks)) funcargs= (zs,options,njks,jks,nmks,mks,logpz) multOut= multi.parallel_map((lambda x: indiv_calc(x, *funcargs)), range(len(zs)), numcores=numpy.amin([64,len(zs), multiprocessing.cpu_count()])) for ii in range(len(zs)): logmkp[ii,:,:]= multOut[ii][0,:,:] logp[ii,:,:]= multOut[ii][1,:,:] save_pickles(options.outfilename,logmkp,logp) else: savefile= open(options.outfilename,'rb') logmkp= pickle.load(savefile) logp= pickle.load(savefile) savefile.close() indx= numpy.isnan(logp) logp[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max logmkp[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max #Average the peak, so calculate the peak for ii in range(len(zs)): for jj in range(njks): maxmkindx= numpy.argmax(logp[ii,jj,:]) totlogp= maxentropy.logsumexp(logp[ii,jj,:]) logmkp[ii,jj,:]= logmkp[ii,jj,maxmkindx]-logp[ii,jj,maxmkindx]+totlogp logp[ii,jj,:]= totlogp avgmk= numpy.exp(maxentropy.logsumexp(logmkp.flatten())\ -maxentropy.logsumexp(logp.flatten())) solindx= numpy.argmin(numpy.fabs(zs-0.017)) avgmksolar= numpy.exp(maxentropy.logsumexp(logmkp[solindx,:,:].flatten())\ -maxentropy.logsumexp(logp[solindx,:,:].flatten())) print "Average mk: %f" % (-avgmk) print "Average mk if solar: %f" % (-avgmksolar) return -avgmk
def calc_model(params,options,data,logpiso,logpisodwarf,df,nlocs,locations,iso): avg_plate_model= numpy.zeros(nlocs) for ii in range(nlocs): #Calculate vlos | los indx= (data['LOCATION'] == locations[ii]) thesedata= data[indx] thislogpiso= logpiso[indx,:] if options.dwarf: thislogpisodwarf= logpisodwarf[indx,:] else: thislogpisodwarf= None vlos= numpy.linspace(-200.,200.,options.nvlos) pvlos= numpy.zeros(options.nvlos) if not options.multi is None: pvlos= multi.parallel_map((lambda x: pvlosplate(params,vlos[x], thesedata, df,options, thislogpiso, thislogpisodwarf,iso)), range(options.nvlos), numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),options.multi])) else: for jj in range(options.nvlos): print jj pvlos[jj]= pvlosplate(params,vlos[jj],thesedata,df,options, thislogpiso,thislogpisodwarf) pvlos-= logsumexp(pvlos) pvlos= numpy.exp(pvlos) #Calculate mean and velocity dispersion avg_plate_model[ii]= numpy.sum(vlos*pvlos) return avg_plate_model
def call_polymorphism(self, obs, post): """Get the polymorphism probability. This is the posterior probability that the strain is homozygous for the non-reference base with the highest count at this position. @param obs: one ref base count and three non-ref base counts @param post: the posterior hidden state probabilities @return: the polymorphism probability """ # unpack the posterior state distribution p_recent, p_ancient, p_garbage, p_misaligned = post # get the prior probability of polymorphism conditional on state p_recent_AA = self.states[0].get_posterior_distribution(obs)[2] p_ancient_AA = self.states[1].get_posterior_distribution(obs)[2] # compute the posterior probability of a polymorphism posterior_polymorphism = 0 posterior_polymorphism += p_recent * p_recent_AA posterior_polymorphism += p_ancient * p_ancient_AA # Given that a polymorphism occurred, # get the probability distribution over the # three non-reference nucleotides. r = self.seqerr log_Pr = math.log(r/4.0) log_PA = math.log(1 - 3*r/4.0) logs = [ obs[1]*log_PA + obs[2]*log_Pr + obs[3]*log_Pr, obs[1]*log_Pr + obs[2]*log_PA + obs[3]*log_Pr, obs[1]*log_Pr + obs[2]*log_Pr + obs[3]*log_PA] condmaxpost = math.exp(max(logs) - logsumexp(logs)) # get the posterior probability distribution maxpost = posterior_polymorphism * condmaxpost return maxpost
def word_bound(self, Elogtheta, Elogbeta, doc_ids=None): """ Note that this is not strictly speaking a likelihood. Compute the expectation of the log conditional likelihood of the data, E_q[log p(w_d | theta, beta, A_d)], where p(w_d | theta, beta, A_d) is the log conditional likelihood of the data. """ if doc_ids is None: docs = self.corpus else: docs = [self.corpus[d] for d in doc_ids] bound = 0.0 for d, doc in enumerate(docs): ids = numpy.array([id for id, _ in doc]) # Word IDs in doc. cts = numpy.array([cnt for _, cnt in doc]) # Word counts. bound_d = 0.0 for vi, v in enumerate(ids): bound_d += cts[vi] * logsumexp(Elogtheta[d, :] + Elogbeta[:, v]) bound += bound_d # Above is the same as: #Elogthetad = Elogtheta[d, :] #likelihood += numpy.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id]) for id, cnt in doc) return bound
def logsumexp(a, axis=None): """ Evaluates :math:`\log(\sum_i \exp(a_i) )` in a bit smarter manner. If axis is None (default) then tries to use scipy's logsumexp, otherwise computed logsumexp manually along first axis. :param a: positions where logsumexp will be evaluated :type a: numpy.array :param axis: along which axis logsumexp shall be computed, default=None :type axis: int :returns: function values :rtype: numpy.array """ if axis is None: # Use the scipy.maxentropy version. if hasattr(misc, 'logsumexp'): return misc.logsumexp(a) elif hasattr(maxentropy, 'logsumexp'): return maxentropy.logsumexp(a) else: axis = 0 a = asarray(a) shp = list(a.shape) shp[axis] = 1 a_max = a.max(axis=axis) s = log(exp(a - a_max.reshape(shp)).sum(axis=axis)) lse = a_max + s return lse
def run(*args): dprintn(8, "# Generating data") global hypotheses RANK = str(MPI.COMM_WORLD.Get_rank()) data_size = args[0] p_representation = defaultdict(int) # how often do you get the right representation p_response = defaultdict(int) # how often do you get the right response? p_representation_literal = defaultdict(int) # how often do you get the right representation p_response_literal = defaultdict(int) # how often do you get the right response? p_representation_presup = defaultdict(int) # how often do you get the right representation p_response_presup = defaultdict(int) # how often do you get the right response? dprintn(8, "# Generating data") data = generate_data(data_size) # recompute these dprintn(8, "# Computing posterior") #[ x.unclear_functions() for x in hypotheses ] [ x.compute_posterior(data) for x in hypotheses ] # normalize the posterior in fs dprintn(8, "# Computing normalizer") Z = logsumexp([x.posterior_score for x in hypotheses]) # and output the top hypotheses qq = FiniteBestSet(max=True, N=25) for h in hypotheses: qq.push(h, h.posterior_score) # get the tops for i, h in enumerate(qq.get_sorted()): for w in h.all_words(): fprintn(8, data_size, i, w, h.posterior_score, q(h.lex[w]), f=options.OUT_PATH+"-hypotheses."+RANK+".txt") # and compute the probability of being correct dprintn(8, "# Computing correct probability") for h in hypotheses: hstr = str(h) #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ] for w in words: p = exp(h.posterior_score - Z) key = w + ":" + hstr p_representation[w] += p * (agree_pct[key] == 1.) p_representation_presup[w] += p * (agree_pct_presup[key] == 1.) # if we always agree with the target, then we count as the right rep. p_representation_literal[w] += p * (agree_pct_literal[key] == 1.) # and just how often does the hypothesis agree? p_response[w] += p * agree_pct[key] p_response_presup[w] += p * agree_pct_presup[key] p_response_literal[w] += p * agree_pct_literal[key] dprintn(8, "# Outputting") for w in words: fprintn(10, rank, q(w), data_size, p_representation[w], p_representation_presup[w], p_representation_literal[w], p_response[w], p_response_presup[w], p_response_literal[w], f=options.OUT_PATH+"-stats."+RANK+".txt") return 0
def testErrs(options,args): ndfehs, ndafes= 201,201 dfehs= numpy.linspace(0.01,0.4,ndfehs) dafes= numpy.linspace(0.01,0.3,ndafes) if os.path.exists(args[0]): savefile= open(args[0],'rb') loglike= pickle.load(savefile) ii= pickle.load(savefile) jj= pickle.load(savefile) savefile.close() else: loglike= numpy.zeros((ndfehs,ndafes)) ii, jj= 0, 0 while ii < ndfehs: while jj < ndafes: sys.stdout.write('\r'+"Working on %i / %i" %(ii*ndafes+jj+1,ndafes*ndfehs)) sys.stdout.flush() loglike[ii,jj]= errsLogLike(dfehs[ii],dafes[jj],options) jj+= 1 ii+= 1 jj= 0 save_pickles(args[0],loglike,ii,jj) save_pickles(args[0],loglike,ii,jj) sys.stdout.write('\r'+_ERASESTR+'\r') sys.stdout.flush() if options.prior: prior= numpy.zeros((ndfehs,ndafes)) for ii in range(ndfehs): prior[ii,:]= -0.5*(dafes-0.1)**2./0.1**2.-0.5*(dfehs[ii]-0.2)**2./0.1**2. loglike+= prior loglike-= maxentropy.logsumexp(loglike) loglike= numpy.exp(loglike) loglike/= numpy.sum(loglike)*(dfehs[1]-dfehs[0])*(dafes[1]-dafes[0]) #Plot bovy_plot.bovy_print() bovy_plot.bovy_dens2d(loglike.T,origin='lower', cmap='gist_yarg', xlabel=r'\delta_{[\mathrm{Fe/H}]}', ylabel=r'\delta_{[\alpha/\mathrm{Fe}]}', xrange=[dfehs[0],dfehs[-1]], yrange=[dafes[0],dafes[-1]], contours=True, cntrmass=True, onedhists=True, levels= special.erf(0.5*numpy.arange(1,4))) if options.prior: bovy_plot.bovy_text(r'$\mathrm{with\ Gaussian\ prior:}$'+ '\n'+r'$\delta_{[\mathrm{Fe/H}]}= 0.2 \pm 0.1$' +'\n'+r'$\delta_{[\alpha/\mathrm{Fe}]}= 0.1 \pm 0.1$', top_right=True) bovy_plot.bovy_end_print(options.plotfile)
def bound(self, corpus, gamma=None, subsample_ratio=1.0): """ Estimate the variational bound of documents from `corpus`: E_q[log p(corpus)] - E_q[log q(corpus)] `gamma` are the variational parameters on topic weights for each `corpus` document (=2d matrix=what comes out of `inference()`). If not supplied, will be inferred from the model. """ score = 0.0 _lambda = self.state.get_lambda() Elogbeta = dirichlet_expectation(_lambda) for d, doc in enumerate( corpus ): # stream the input doc-by-doc, in case it's too large to fit in RAM if d % self.chunksize == 0: logger.debug("bound: at document #%i", d) if gamma is None: gammad, _ = self.inference([doc]) else: gammad = gamma[d] Elogthetad = dirichlet_expectation(gammad) # E[log p(doc | theta, beta)] score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc) # E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector score += np.sum((self.alpha - gammad) * Elogthetad) score += np.sum(gammaln(gammad) - gammaln(self.alpha)) score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad)) # Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures # that the likelihood is always rougly on the same scale. score *= subsample_ratio # E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar score += np.sum((self.eta - _lambda) * Elogbeta) score += np.sum(gammaln(_lambda) - gammaln(self.eta)) if np.ndim(self.eta) == 0: sum_eta = self.eta * self.num_terms else: sum_eta = np.sum(self.eta) score += np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1))) return score
def neg_log_likelihood(theta_sparse, hb = None): if not hb is None: h, b = hb else: h, b = dp(theta_sparse) log_kappa = logsumexp(h[0] + b[1]) nll = log_kappa nll -= h[0][0] for k in range(1, params['M']): nll -= h[k][0,0] for ind in theta_sparse: nll += params['lambda'] * np.abs(theta_sparse[ind]) return nll
def pvlosplate(params,vhelio,data,df,options,logpiso,logpisodwarf,iso): """ NAME: pvlosplate PURPOSE: calculate the vlos probability for a given location INPUT: params - parameters of the model vhelio - heliocentric los velocity to evaluate data - data array for this location df - df object(s) (?) options - options logpiso, logpisodwarf - precalculated isochrones OUTPUT: log of the probability HISTORY: 2012-02-20 - Written - Bovy (IAS) """ #Output is sum over data l,b,jk,h l= data['GLON']*_DEGTORAD b= data['GLAT']*_DEGTORAD sinl= numpy.sin(l) cosl= numpy.cos(l) sinb= numpy.sin(b) cosb= numpy.cos(b) jk= data['J0MAG']-data['K0MAG'] try: jk[(jk < 0.5)]= 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL except TypeError: pass #HACK h= data['H0MAG'] options.multi= 1 #To avoid conflict out= -mloglike(params,numpy.zeros(len(data))+vhelio, l, b, jk, h, df,options, sinl, cosl, cosb, sinb, logpiso, logpisodwarf,True,None,iso,data['FEH']) #None iso for now #indx= (out >= -0.1)*(out <= 0.1) #print out[indx], jk[indx], h[indx] return logsumexp(out)
def bound(self, corpus, gamma=None, subsample_ratio=1.0): """ Estimate the variational bound of documents from `corpus`: E_q[log p(corpus)] - E_q[log q(corpus)] `gamma` are the variational parameters on topic weights for each `corpus` document (=2d matrix=what comes out of `inference()`). If not supplied, will be inferred from the model. """ score = 0.0 _lambda = self.state.get_lambda() Elogbeta = dirichlet_expectation(_lambda) for d, doc in enumerate(corpus): # stream the input doc-by-doc, in case it's too large to fit in RAM if d % self.chunksize == 0: logger.debug("bound: at document #%i", d) if gamma is None: gammad, _ = self.inference([doc]) else: gammad = gamma[d] Elogthetad = dirichlet_expectation(gammad) # E[log p(doc | theta, beta)] score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc) # E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector score += np.sum((self.alpha - gammad) * Elogthetad) score += np.sum(gammaln(gammad) - gammaln(self.alpha)) score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad)) # Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures # that the likelihood is always rougly on the same scale. score *= subsample_ratio # E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar score += np.sum((self.eta - _lambda) * Elogbeta) score += np.sum(gammaln(_lambda) - gammaln(self.eta)) if np.ndim(self.eta) == 0: sum_eta = self.eta * self.num_terms else: sum_eta = np.sum(self.eta) score += np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1))) return score
def inference(self, doc): """ Perform inference on a single document. Return 3-tuple of (likelihood of this document, word-topic distribution phi, expected word counts gamma (~topic distribution)). A document is simply a bag-of-words collection which supports len() and iteration over (wordIndex, wordCount) 2-tuples. The model itself is not affected in any way (this function is read-only aka const). """ # init help structures totalWords = sum(wordCount for _, wordCount in doc) gamma = numpy.zeros( self.numTopics) + self.alpha + 1.0 * totalWords / self.numTopics phi = numpy.zeros(shape=(len(doc), self.numTopics)) + 1.0 / self.numTopics likelihood = likelihoodOld = converged = numpy.NAN # variational estimate for i in xrange(self.VAR_MAX_ITER): # logging.debug("inference step #%s, converged=%s, likelihood=%s, likelikelihoodOld=%s" % # (i, converged, likelihood, likelihoodOld)) if numpy.isfinite(converged) and converged <= self.VAR_CONVERGED: logging.debug("document converged in %i iterations" % i) break for n, (wordIndex, wordCount) in enumerate(doc): # compute phi vars, in log space, to prevent numerical nastiness tmp = digamma( gamma) + self.logProbW[:, wordIndex] # vector operation # convert phi and update gamma newPhi = numpy.exp(tmp - logsumexp(tmp)) gamma += wordCount * (newPhi - phi[n]) phi[n] = newPhi likelihood = self.computeLikelihood(doc, phi, gamma) assert numpy.isfinite(likelihood) converged = numpy.divide(likelihoodOld - likelihood, likelihoodOld) likelihoodOld = likelihood return likelihood, phi, gamma
def _parse_hz_dict_indiv(self,hz): htype= hz.get('type','exp') if htype == 'exp': zd= hz.get('h',0.0375) th= lambda z, tzd=zd: 1./2./tzd*numpy.exp(-numpy.fabs(z)/tzd) tH= lambda z, tzd= zd: (numpy.exp(-numpy.fabs(z)/tzd)-1. +numpy.fabs(z)/tzd)*tzd/2. tdH= lambda z, tzd= zd: 0.5*numpy.sign(z)\ *(1.-numpy.exp(-numpy.fabs(z)/tzd)) elif htype == 'sech2': zd= hz.get('h',0.0375) th= lambda z, tzd=zd: 1./numpy.cosh(z/2./tzd)**2./4./tzd # Avoid overflow in cosh tH= lambda z, tzd= zd: \ tzd*(logsumexp(numpy.array([z/2./tzd,-z/2./tzd]),axis=0)\ -numpy.log(2.)) tdH= lambda z, tzd= zd: numpy.tanh(z/2./tzd)/2. return (th,tH,tdH)
def bound(self, corpus, gamma=None, subsample_ratio=1.0): score = 0.0 _lambda = self.state.get_lambda() Elogbeta = dirichlet_expectation(_lambda) for d, doc in enumerate(corpus): if gamma is None: gammad, _ = self.inference([doc]) else: gammad = gamma[d] Elogthetad = dirichlet_expectation(gammad) score += numpy.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id]) for id, cnt in doc) score += numpy.sum((self.alpha - gammad) * Elogthetad) score += numpy.sum(gammaln(gammad) - gammaln(self.alpha)) score += gammaln(numpy.sum(self.alpha)) - gammaln(numpy.sum(gammad)) score *= subsample_ratio score += numpy.sum((self.eta - _lambda) * Elogbeta) score += numpy.sum(gammaln(_lambda) - gammaln(self.eta)) score += numpy.sum(gammaln(self.eta * self.num_terms) - gammaln(numpy.sum(_lambda, 1))) return score
def _eval_sumgaussians(x,xamp,xmean,xcovar): """x array [ndata,ndim], return log""" ndata= x.shape[0] da= x.shape[1] out= numpy.zeros(ndata) ngauss= len(xamp) loglike= numpy.zeros(ngauss) for ii in range(ndata): for kk in range(ngauss): if xamp[kk] == 0.: loglike[kk]= numpy.finfo(numpy.dtype(numpy.float64)).min continue tinv= linalg.inv(xcovar[kk,:,:]) delta= x[ii,:]-xmean[kk,:] loglike[kk]= numpy.log(xamp[kk])+0.5*numpy.log(linalg.det(tinv))\ -0.5*numpy.dot(delta,numpy.dot(tinv,delta))+\ da*_SQRTTWOPI out[ii]= maxentropy.logsumexp(loglike) return out
def bound(self, corpus, gamma=None): """ Estimate the variational bound of documents from `corpus`. `gamma` are the variational parameters on topic weights (one for each document in `corpus`). If not supplied, will be automatically inferred from the model. """ score = 0.0 Elogbeta = numpy.log(self.expElogbeta) for d, doc in enumerate(corpus): if d % self.chunks == 0: logger.info("PROGRESS: at document #%i" % d) if gamma is None: gammad, _ = self.inference([doc]) else: gammad = gamma[d, :] Elogthetad = dirichlet_expectation(gammad) expElogthetad = numpy.exp(Elogthetad) ids = [id for id, _ in doc] cts = numpy.array([cnt for _, cnt in doc]) phinorm = numpy.zeros(len(ids)) for i in xrange(len(ids)): phinorm[i] = logsumexp(Elogthetad + Elogbeta[:, ids[i]]) # E[log p(docs | theta, beta)] score += numpy.sum(cts * phinorm) # E[log p(theta | alpha) - log q(theta | gamma)] score += numpy.sum((self.alpha - gammad) * Elogthetad) score += numpy.sum(gammaln(gammad) - gammaln(self.alpha)) score += gammaln(self.alpha * self.numTopics) - gammaln( numpy.sum(gammad)) # E[log p(beta | eta) - log q (beta | lambda)] score += numpy.sum((self.eta - self._lambda) * Elogbeta) score += numpy.sum(gammaln(self._lambda) - gammaln(self.eta)) score += numpy.sum( gammaln(self.eta * self.numTerms) - gammaln(numpy.sum(self._lambda, 1))) return score
def inference(self, doc): """ Perform inference on a single document. Return 3-tuple of `(likelihood of this document, word-topic distribution phi, expected word counts gamma (~topic distribution))`. A document is simply a bag-of-words collection which supports len() and iteration over (wordIndex, wordCount) 2-tuples. The model itself is not affected in any way (this function is read-only aka const). """ # init help structures totalWords = sum(wordCount for _, wordCount in doc) gamma = numpy.zeros(self.numTopics) + self.alpha + 1.0 * totalWords / self.numTopics phi = numpy.zeros(shape = (len(doc), self.numTopics)) + 1.0 / self.numTopics likelihood = likelihoodOld = converged = numpy.NAN # variational estimate for i in xrange(self.VAR_MAX_ITER): # logger.debug("inference step #%s, converged=%s, likelihood=%s, likelikelihoodOld=%s" % # (i, converged, likelihood, likelihoodOld)) if numpy.isfinite(converged) and converged <= self.VAR_CONVERGED: logger.debug("document converged in %i iterations" % i) break for n, (wordIndex, wordCount) in enumerate(doc): # compute phi vars, in log space, to prevent numerical nastiness tmp = digamma(gamma) + self.logProbW[:, wordIndex] # vector operation # convert phi and update gamma newPhi = numpy.exp(tmp - logsumexp(tmp)) gamma += wordCount * (newPhi - phi[n]) phi[n] = newPhi likelihood = self.computeLikelihood(doc, phi, gamma) assert numpy.isfinite(likelihood) converged = numpy.divide(likelihoodOld - likelihood, likelihoodOld) likelihoodOld = likelihood return likelihood, phi, gamma
def bound(self, corpus, gamma=None): """ Estimate the variational bound of documents from `corpus`. `gamma` are the variational parameters on topic weights (one for each document in `corpus`). If not supplied, will be automatically inferred from the model. """ score = 0.0 Elogbeta = numpy.log(self.expElogbeta) for d, doc in enumerate(corpus): if d % self.chunks == 0: logger.info("PROGRESS: at document #%i" % d) if gamma is None: gammad, _ = self.inference([doc]) else: gammad = gamma[d, :] Elogthetad = dirichlet_expectation(gammad) expElogthetad = numpy.exp(Elogthetad) ids = [id for id, _ in doc] cts = numpy.array([cnt for _, cnt in doc]) phinorm = numpy.zeros(len(ids)) for i in xrange(len(ids)): phinorm[i] = logsumexp(Elogthetad + Elogbeta[:, ids[i]]) # E[log p(docs | theta, beta)] score += numpy.sum(cts * phinorm) # E[log p(theta | alpha) - log q(theta | gamma)] score += numpy.sum((self.alpha - gammad) * Elogthetad) score += numpy.sum(gammaln(gammad) - gammaln(self.alpha)) score += gammaln(self.alpha * self.numTopics) - gammaln(numpy.sum(gammad)) # E[log p(beta | eta) - log q (beta | lambda)] score += numpy.sum((self.eta - self._lambda) * Elogbeta) score += numpy.sum(gammaln(self._lambda) - gammaln(self.eta)) score += numpy.sum(gammaln(self.eta * self.numTerms) - gammaln(numpy.sum(self._lambda, 1))) return score
def dp(theta_sparse): theta = theta_dense(theta_sparse) h = [None] * params['M'] h[0] = np.empty(n_w[0]) for w in range(n_w[0]): h[0][w] = np.sum(theta * hits_pre[0][w]) for k in range(1, params['M']): h[k] = np.empty((n_w[k-1], n_w[k])) for w_prev in range(n_w[k-1]): for w in range(n_w[k]): h[k][w_prev,w] = np.sum(theta * hits_pre[k][w_prev,w]) b = [None] * (params['M']+1) b[params['M']] = np.zeros(n_w[params['M']-1]) for k in range(params['M']-1, 0, -1): b[k] = np.empty(n_w[k-1]) for w_prev in range(n_w[k-1]): b[k][w_prev] = logsumexp(h[k][w_prev] + b[k+1]) return h, b
def _eval_gauss_grid(x,y,xamp,xmean,xcovar): nx= len(x) ny= len(y) out= numpy.zeros((nx,ny)) ngauss= len(xamp) dim= xmean.shape[1] loglike= numpy.zeros(ngauss) for ii in range(nx): for jj in range(ny): a= numpy.array([x[ii],y[jj]]) for kk in range(ngauss): if xamp[kk] == 0.: loglike[kk]= numpy.finfo(numpy.dtype(numpy.float64)).min continue tinv= numpy.linalg.inv(xcovar[kk,:,:]) delta= a-xmean[kk,:] loglike[kk]= numpy.log(xamp[kk])+0.5*numpy.log(numpy.linalg.det(tinv))\ -0.5*numpy.dot(delta,numpy.dot(tinv,delta))+\ dim*_SQRTTWOPI out[ii,jj]= logsumexp(loglike) return out
def pvlosplate(params, vhelio, data, df, options, logpiso, logpisodwarf, iso): """ NAME: pvlosplate PURPOSE: calculate the vlos probability for a given location INPUT: params - parameters of the model vhelio - heliocentric los velocity to evaluate data - data array for this location df - df object(s) (?) options - options logpiso, logpisodwarf - precalculated isochrones OUTPUT: log of the probability HISTORY: 2012-02-20 - Written - Bovy (IAS) """ #Output is sum over data l,b,jk,h l = data['GLON'] * _DEGTORAD b = data['GLAT'] * _DEGTORAD sinl = numpy.sin(l) cosl = numpy.cos(l) sinb = numpy.sin(b) cosb = numpy.cos(b) jk = data['J0MAG'] - data['K0MAG'] try: jk[(jk < 0.5)] = 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL except TypeError: pass #HACK h = data['H0MAG'] options.multi = 1 #To avoid conflict out = -mloglike(params, numpy.zeros(len(data)) + vhelio, l, b, jk, h, df, options, sinl, cosl, cosb, sinb, logpiso, logpisodwarf, True, None, iso, data['FEH']) #None iso for now #indx= (out >= -0.1)*(out <= 0.1) #print out[indx], jk[indx], h[indx] return logsumexp(out)
def bound(self, corpus, gamma=None, subsample_ratio=1.0): score = 0.0 _lambda = self.state.get_lambda() Elogbeta = dirichlet_expectation(_lambda) for d, doc in enumerate(corpus): if gamma is None: gammad, _ = self.inference([doc]) else: gammad = gamma[d] Elogthetad = dirichlet_expectation(gammad) score += numpy.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id]) for id, cnt in doc) score += numpy.sum((self.alpha - gammad) * Elogthetad) score += numpy.sum(gammaln(gammad) - gammaln(self.alpha)) score += gammaln(numpy.sum(self.alpha)) - gammaln( numpy.sum(gammad)) score *= subsample_ratio score += numpy.sum((self.eta - _lambda) * Elogbeta) score += numpy.sum(gammaln(_lambda) - gammaln(self.eta)) score += numpy.sum( gammaln(self.eta * self.num_terms) - gammaln(numpy.sum(_lambda, 1))) return score
def run(*args): dprintn(8, "# Generating data") global hypotheses data_size = args[0] here_correct = dict() # how often is each word right? for w in words: here_correct[w] = 0.0 dprintn(8, "# Generating data") data = generate_data(data_size) # recompute these dprintn(8, "# Computing posterior") [ x.compute_posterior(data) for x in hypotheses ] # normalize the posterior in fs dprintn(8, "# Computing normalizer") Z = logsumexp([x.lp for x in hypotheses]) # and compute the probability of being correct dprintn(8, "# Computing correct probability") for h in hypotheses: #print data_size, len(data), exp(h.lp), correct[ str(h)+":"+w ] for w in words: # the posterior times the prob of agreement with the right one, weighted by number of iterations here_correct[w] += exp(h.lp-Z) * correct[ str(h)+":"+w ] dprintn(8, "# Outputting") o = open(OUT_PATH+str(rank), 'a') for w in words: print >>o, rank, data_size, here_correct[w], q(w) o.close() return 0
def func_bg(index): """ Author: Marusa Zerjal, 2019 - 07 - 18 Multiprocessing function should be pickable :param index: :return: """ star_mean = star_means[index] star_cov = star_covs[index] try: bg_lnol = get_lnoverlaps(star_cov, star_mean, background_covs, background_means, nstars) bg_lnol = logsumexp(bg_lnol) # sum in linear space # Do we really want to make exceptions here? If the sum fails then # there's something wrong with the data. except: # TC: Changed sign to negative (surely if it fails, we want it to # have a neglible background overlap? print('bg ln overlap failed, setting it to -inf') bg_lnol = -np.inf return bg_lnol
#Calculate vlos | los vlos= numpy.linspace(-200.,200.,options.nvlos) pvlos= numpy.zeros(options.nvlos) if not options.multi is None: pvlos= multi.parallel_map((lambda x: pvlosplate(params,vlos[x], thesedata,df,options, thislogpiso, thislogpisodwarf,iso)), range(options.nvlos), numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),options.multi])) else: for ii in range(options.nvlos): print ii pvlos[ii]= pvlosplate(params,vlos[ii],thesedata,df,options, thislogpiso,thislogpisodwarf,iso) pvlos-= logsumexp(pvlos) pvlos= numpy.exp(pvlos) if _PLOTZERO: pvloszero= numpy.zeros(options.nvlos) params[2]= -3.8 if not options.multi is None: pvloszero= multi.parallel_map((lambda x: pvlosplate(params,vlos[x], thesedata,df,options, thislogpiso, thislogpisodwarf,iso)), range(options.nvlos), numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),options.multi])) else: for ii in range(options.nvlos): print ii pvloszero[ii]= pvlosplate(params,vlos[ii],thesedata,df,options,
def plot_rovo(filename, plotfilename): if not os.path.exists(filename): raise IOError("given filename does not exist") savefile = open(filename, 'rb') params = pickle.load(savefile) savefile.close() if _ANALYTIC: #Calculate by fixing everything except for Ro anv vo options = plot_pdfs.set_options(None) nros = 15 noos = 15 ros = numpy.linspace(7., 13., nros) oos = numpy.linspace(20., 30., noos) ll = numpy.zeros((noos, nros)) for ii in range(noos): if not _MULTI is None: theseparamss = [] for jj in range(nros): theseparams = copy.copy(params) theseparams[0] = oos[ii] * ros[jj] / _REFV0 theseparams[1] = ros[jj] / _REFR0 theseparamss.append(theseparams) thisll = multi.parallel_map( (lambda x: numpy.sum( logl.logl(init=theseparamss[x], options=options))), range(nros), numcores=numpy.amin( [nros, _MULTI, multiprocessing.cpu_count()])) ll[ii, :] = thisll else: for jj in range(nros): theseparams = copy.copy(params) theseparams[0] = oos[ii] * ros[jj] / _REFV0 theseparams[1] = ros[jj] / _REFR0 ll[ii, jj] = numpy.sum( logl.logl(init=theseparams, options=options)) #Normalize ll -= logsumexp(ll) ll = numpy.exp(ll) levels = list(special.erf(0.5 * numpy.arange(1, 4))) bovy_plot.bovy_dens2d( ll.T, origin='lower', levels=levels, xlabel=r'$\Omega_0\ [\mathrm{km\ s}^{-1}\ \mathrm{kpc}^{-1}]$', ylabel=r'$R_0\ [\mathrm{kpc}]$', xrange=[20., 35.], yrange=[7., 13.], contours=True, cntrcolors='k', onedhists=True, cmap='gist_yarg') else: vos = numpy.array([s[0] for s in params]) * _REFV0 ros = numpy.array([s[1] for s in params]) * _REFR0 bovy_plot.bovy_print() levels = list(special.erf(0.5 * numpy.arange(1, 4))) levels.append(1.01) #HACK to not plot outliers bovy_plot.scatterplot( vos / ros, ros, 'k,', levels=levels, xlabel=r'$\Omega_0\ [\mathrm{km\ s}^{-1}\ \mathrm{kpc}^{-1}]$', ylabel=r'$R_0\ [\mathrm{kpc}]$', bins=31, xrange=[200. / 8., 250. / 8.], yrange=[7., 9.], contours=True, cntrcolors='k', onedhists=True, cmap='gist_yarg') bovy_plot.bovy_end_print(plotfilename)
def plot_rovo(filename,plotfilename): if not os.path.exists(filename): raise IOError("given filename does not exist") savefile= open(filename,'rb') params= pickle.load(savefile) savefile.close() if _ANALYTIC: #Calculate by fixing everything except for Ro anv vo options= plot_pdfs.set_options(None) nros= 15 noos= 15 ros= numpy.linspace(7.,13.,nros) oos= numpy.linspace(20.,30.,noos) ll= numpy.zeros((noos,nros)) for ii in range(noos): if not _MULTI is None: theseparamss= [] for jj in range(nros): theseparams= copy.copy(params) theseparams[0]= oos[ii]*ros[jj]/_REFV0 theseparams[1]= ros[jj]/_REFR0 theseparamss.append(theseparams) thisll= multi.parallel_map((lambda x: numpy.sum(logl.logl(init=theseparamss[x],options=options))), range(nros), numcores=numpy.amin([nros,_MULTI,multiprocessing.cpu_count()])) ll[ii,:]= thisll else: for jj in range(nros): theseparams= copy.copy(params) theseparams[0]= oos[ii]*ros[jj]/_REFV0 theseparams[1]= ros[jj]/_REFR0 ll[ii,jj]= numpy.sum(logl.logl(init=theseparams, options=options)) #Normalize ll-= logsumexp(ll) ll= numpy.exp(ll) levels= list(special.erf(0.5*numpy.arange(1,4))) bovy_plot.bovy_dens2d(ll.T,origin='lower',levels=levels, xlabel=r'$\Omega_0\ [\mathrm{km\ s}^{-1}\ \mathrm{kpc}^{-1}]$', ylabel=r'$R_0\ [\mathrm{kpc}]$', xrange=[20.,35.], yrange=[7.,13.], contours=True, cntrcolors='k', onedhists=True, cmap='gist_yarg') else: vos= numpy.array([s[0] for s in params])*_REFV0 ros= numpy.array([s[1] for s in params])*_REFR0 bovy_plot.bovy_print() levels= list(special.erf(0.5*numpy.arange(1,4))) levels.append(1.01) #HACK to not plot outliers bovy_plot.scatterplot(vos/ros,ros,'k,',levels=levels, xlabel=r'$\Omega_0\ [\mathrm{km\ s}^{-1}\ \mathrm{kpc}^{-1}]$', ylabel=r'$R_0\ [\mathrm{kpc}]$', bins=31, xrange=[200./8.,250./8.], yrange=[7.,9.], contours=True, cntrcolors='k', onedhists=True, cmap='gist_yarg') bovy_plot.bovy_end_print(plotfilename)
def _fit_orbit_mlogl(new_vxvv,vxvv,vxvv_err,pot,radec,lb,tmockAA, ro,vo,obs): """The log likelihood for fitting an orbit""" #Use this _parse_args routine, which does forward and backward integration iR,ivR,ivT,iz,ivz,iphi= tmockAA._parse_args(True,False, new_vxvv[0], new_vxvv[1], new_vxvv[2], new_vxvv[3], new_vxvv[4], new_vxvv[5]) if radec or lb: #Need to transform to ra,dec #First transform to X,Y,Z,vX,vY,vZ (Galactic) X,Y,Z = coords.galcencyl_to_XYZ(iR.flatten(),iphi.flatten(), iz.flatten(), Xsun=obs[0]/ro, Ysun=obs[1]/ro, Zsun=obs[2]/ro) vX,vY,vZ = coords.galcencyl_to_vxvyvz(ivR.flatten(),ivT.flatten(), ivz.flatten(),iphi.flatten(), vsun=nu.array(\ obs[3:6])/vo) bad_indx= (X == 0.)*(Y == 0.)*(Z == 0.) if True in bad_indx: X[bad_indx]+= ro/10000. lbdvrpmllpmbb= coords.rectgal_to_sphergal(X*ro,Y*ro,Z*ro, vX*vo,vY*vo,vZ*vo, degree=True) if lb: orb_vxvv= nu.array([lbdvrpmllpmbb[:,0], lbdvrpmllpmbb[:,1], lbdvrpmllpmbb[:,2], lbdvrpmllpmbb[:,4], lbdvrpmllpmbb[:,5], lbdvrpmllpmbb[:,3]]).T else: #Further transform to ra,dec,pmra,pmdec radec= coords.lb_to_radec(lbdvrpmllpmbb[:,0], lbdvrpmllpmbb[:,1],degree=True) pmrapmdec= coords.pmllpmbb_to_pmrapmdec(lbdvrpmllpmbb[:,4], lbdvrpmllpmbb[:,5], lbdvrpmllpmbb[:,0], lbdvrpmllpmbb[:,1], degree=True) orb_vxvv= nu.array([radec[:,0],radec[:,1], lbdvrpmllpmbb[:,2], pmrapmdec[:,0],pmrapmdec[:,1], lbdvrpmllpmbb[:,3]]).T else: #shape=(2tintJ-1,6) orb_vxvv= nu.array([iR.flatten(),ivR.flatten(),ivT.flatten(), iz.flatten(),ivz.flatten(),iphi.flatten()]).T out= 0. for ii in range(vxvv.shape[0]): sub_vxvv= (orb_vxvv-vxvv[ii,:].flatten())**2. #print sub_vxvv[nu.argmin(nu.sum(sub_vxvv,axis=1))] if not vxvv_err is None: sub_vxvv/= vxvv_err[ii,:]**2. else: sub_vxvv/= 0.01**2. out+= logsumexp(-0.5*nu.sum(sub_vxvv,axis=1)) return -out
def localzdist(z,zsolar=0.019): #From 2 Gaussian XD fit to Casagrande et al. (2011) feh= isodist.Z2FEH(z,zsolar=zsolar) logfehdist= maxentropy.logsumexp([numpy.log(0.8)-numpy.log(0.15)-0.5*(feh-0.016)**2./0.15**2., numpy.log(0.2)-numpy.log(0.22)-0.5*(feh+0.15)**2./0.22**2.]) return logfehdist-numpy.log(z)
def get_background_overlaps_with_covariances(background_means, star_means, star_covs): """ author: Marusa Zerjal 2019 - 05 - 25 Determine background overlaps using means and covariances for both background and stars. Covariance matrices for the background are Identity*bandwidth. Parameters ---------- background_means: [nstars,6] float array_like Phase-space positions of some star set that greatly envelops points in question. Typically contents of gaia_xyzuvw.npy, or the output of >> tabletool.build_data_dict_from_table( '../data/gaia_cartesian_full_6d_table.fits', historical=True)['means'] star_means: [npoints,6] float array_like Phase-space positions of stellar data that we are fitting components to star_covs: [npoints,6,6] float array_like Phase-space covariances of stellar data that we are fitting components to Returns ------- bg_lnols: [nstars] float array_like Background log overlaps of stars with background probability density function. Notes ----- We invert the vertical values (Z and U) because the typical background density should be symmetric along the vertical axis, and this distances stars from their siblings. I.e. association stars aren't assigned higher background overlaps by virtue of being an association star. Edits ----- TC 2019-05-28: changed signature such that it follows similar usage as get_kernel_densitites """ # Inverting the vertical values star_means = np.copy(star_means) star_means[:, 2] *= -1 star_means[:, 5] *= -1 # Background covs with bandwidth using Scott's rule d = 6.0 # number of dimensions nstars = background_means.shape[0] bandwidth = nstars**(-1.0 / (d + 4.0)) background_cov = np.cov(background_means.T) * bandwidth**2 background_covs = np.array(nstars * [background_cov]) # same cov for every star # shapes of the c_get_lnoverlaps input must be: (6, 6), (6,), (120, 6, 6), (120, 6) # So I do it in a loop for every star bg_lnols = [] for i, (star_mean, star_cov) in enumerate(zip(star_means, star_covs)): print('bgols', i) #print('{} of {}'.format(i, len(star_means))) #print(star_cov) #print('det', np.linalg.det(star_cov)) #bg_lnol = get_lnoverlaps(star_cov, star_mean, background_covs, # background_means, nstars) try: #print('***********', nstars, star_cov, star_mean, background_covs, background_means) bg_lnol = get_lnoverlaps(star_cov, star_mean, background_covs, background_means, nstars) #print('intermediate', bg_lnol) # bg_lnol = np.log(np.sum(np.exp(bg_lnol))) # sum in linear space bg_lnol = logsumexp(bg_lnol) # sum in linear space # Do we really want to make exceptions here? If the sum fails then # there's something wrong with the data. except: # TC: Changed sign to negative (surely if it fails, we want it to # have a neglible background overlap? print('bg ln overlap failed, setting it to -inf') bg_lnol = -np.inf bg_lnols.append(bg_lnol) #print(bg_lnol) #print('') # This should be parallelized #bg_lnols = [np.sum(get_lnoverlaps(star_cov, star_mean, background_covs, background_means, nstars)) for star_mean, star_cov in zip(star_means, star_covs)] #print(bg_lnols) return bg_lnols
def createFakeData(parser): options, args = parser.parse_args() if len(args) == 0: parser.print_help() return if os.path.exists(options.plotfile): print "Outfile " + options.plotfile + " exists ..." print "Returning ..." return None #Read the data numpy.random.seed(options.seed) print "Reading the data ..." data = readVclosData( postshutdown=options.postshutdown, fehcut=options.fehcut, cohort=options.cohort, lmin=options.lmin, bmax=options.bmax, validfeh=options.indivfeh, #if indivfeh, we need validfeh ak=True, cutmultiples=options.cutmultiples, jkmax=options.jkmax) #HACK indx = (data['J0MAG'] - data['K0MAG'] < 0.5) data['J0MAG'][indx] = 0.5 + data['K0MAG'][indx] #Set up the isochrone #Set up the isochrone if not options.isofile is None and os.path.exists(options.isofile): print "Loading the isochrone model ..." isofile = open(options.isofile, 'rb') iso = pickle.load(isofile) if options.indivfeh: zs = pickle.load(isofile) elif options.varfeh: locl = pickle.load(isofile) isofile.close() else: print "Setting up the isochrone model ..." if options.indivfeh: #Load all isochrones iso = [] zs = numpy.arange(0.0005, 0.03005, 0.0005) for ii in range(len(zs)): iso.append( isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, Z=zs[ii])) elif options.varfeh: locs = list(set(data['LOCATION'])) iso = [] for ii in range(len(locs)): indx = (data['LOCATION'] == locs[ii]) locl = numpy.mean(data['GLON'][indx] * _DEGTORAD) iso.append( isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, marginalizefeh=True, glon=locl)) else: iso = isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, expsfh=options.expsfh) if options.dwarf: iso = [ iso, isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, dwarf=True, expsfh=options.expsfh) ] else: iso = [iso] if not options.isofile is None: isofile = open(options.isofile, 'wb') pickle.dump(iso, isofile) if options.indivfeh: pickle.dump(zs, isofile) elif options.varfeh: pickle.dump(locl, isofile) isofile.close() df = None print "Pre-calculating isochrone distance prior ..." logpiso = numpy.zeros((len(data), _BINTEGRATENBINS)) ds = numpy.linspace(_BINTEGRATEDMIN, _BINTEGRATEDMAX, _BINTEGRATENBINS) dm = _dm(ds) for ii in range(len(data)): mh = data['H0MAG'][ii] - dm if options.indivfeh: #Find closest Z thisZ = isodist.FEH2Z(data[ii]['FEH']) indx = numpy.argmin((thisZ - zs)) logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + (data['J0MAG'] - data['K0MAG'])[ii], mh) elif options.varfeh: #Find correct iso indx = (locl == data[ii]['LOCATION']) logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + (data['J0MAG'] - data['K0MAG'])[ii], mh) else: logpiso[ii, :] = iso[0](numpy.zeros(_BINTEGRATENBINS) + (data['J0MAG'] - data['K0MAG'])[ii], mh) if options.dwarf: logpisodwarf = numpy.zeros((len(data), _BINTEGRATENBINS)) dwarfds = numpy.linspace(_BINTEGRATEDMIN_DWARF, _BINTEGRATEDMAX_DWARF, _BINTEGRATENBINS) dm = _dm(dwarfds) for ii in range(len(data)): mh = data['H0MAG'][ii] - dm logpisodwarf[ii, :] = iso[1](numpy.zeros(_BINTEGRATENBINS) + (data['J0MAG'] - data['K0MAG'])[ii], mh) else: logpisodwarf = None #Load initial parameters from file savefile = open(args[0], 'rb') params = pickle.load(savefile) savefile.close() #Prep data l = data['GLON'] * _DEGTORAD b = data['GLAT'] * _DEGTORAD sinl = numpy.sin(l) cosl = numpy.cos(l) sinb = numpy.sin(b) cosb = numpy.cos(b) jk = data['J0MAG'] - data['K0MAG'] jk[(jk < 0.5)] = 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL h = data['H0MAG'] #Re-sample vlos = numpy.linspace(-200., 200., options.nvlos) pvlos = numpy.zeros((len(data), options.nvlos)) if options.dwarf: thislogpisodwarf = logpisodwarf else: thislogpisodwarf = None if not options.multi is None and options.multi > 1: thismulti = options.multi options.multi = 1 #To avoid conflict thispvlos = multi.parallel_map( (lambda x: -mloglike(params, numpy.zeros(len(data)) + vlos[x], l, b, jk, h, df, options, sinl, cosl, cosb, sinb, logpiso, thislogpisodwarf, True, None, None, None)), range(options.nvlos), numcores=numpy.amin( [len(vlos), multiprocessing.cpu_count(), thismulti])) for jj in range(options.nvlos): pvlos[:, jj] = thispvlos[jj] else: for jj in range(options.nvlos): pvlos[:, jj] = -mloglike( params, numpy.zeros(len(data)) + vlos[jj], l, b, jk, h, df, options, sinl, cosl, cosb, sinb, logpiso, thislogpisodwarf, True, None, None, None) """ for jj in range(options.nvlos): pvlos[:,jj]= -mloglike(params,numpy.zeros(len(data))+vlos[jj], l, b, jk, h, df,options, sinl, cosl, cosb, sinb, logpiso, thislogpisodwarf,True,None,None,None) """ for ii in range(len(data)): pvlos[ii, :] -= logsumexp(pvlos[ii, :]) pvlos[ii, :] = numpy.exp(pvlos[ii, :]) pvlos[ii, :] = numpy.cumsum(pvlos[ii, :]) pvlos[ii, :] /= pvlos[ii, -1] #Draw randindx = numpy.random.uniform() kk = 0 while pvlos[ii, kk] < randindx: kk += 1 data['VHELIO'][ii] = vlos[kk] #Dump raw fitsio.write(options.plotfile, data, clobber=True)
def eval_distpdf(ds,mdict=None,mivardict=None,logg=None,logg_ivar=None, teff=None,teff_ivar=None,logage=None,logage_ivar=None, Z=None,Z_ivar=None,feh=None,feh_ivar=None, afe=None,afe_ivar=None, padova=None,padova_type=None, normalize=False, ageprior=None): """ NAME: eval_distpdf PURPOSE: evaluate the distance PDF for an object INPUT: ds- list or ndarray of distance (or a single distance), in kpc mdict= dictionary of apparent magnitudes (e.g., {'J':12.,'Ks':13.}) mivardict= dictionary of magnitude inverse variances (matched to mdict) logg= observed logg logg_ivar= inverse variance of logg measurement teff= observed T_eff [K] logg_ivar= inverse variance of T_eff measurement logage= observed log_10 age [Gyr] logage_ivar= inverse variance of log_10 age measurement Z= observed metallicity Z_ivar= inverse variance of Z measurement feh= observed metallicity (alternative to Z) feh_ivar= inverse variance of FeH measurement afe= observed [\alpha/Fe] afe_ivar= [\alpha/Fe] inverse variance padova= if True, use Padova isochrones, if set to a PadovaIsochrone objects, use this padova_type= type of PadovaIsochrone to use (e.g., 2mass-spitzer-wise) normalize= if True, normalize output PDF (default: False) ageprior= - None: flat in log age - flat: flat in age OUTPUT: log of probability HISTORY: 2011-04-28 - Written - Bovy (NYU) """ #load isochrones if not padova is None and isinstance(padova,PadovaIsochrone): iso= padova elif not padova is None and isinstance(padova,bool) and padova: iso= PadovaIsochrone(type=padova_type) #Parse metallicity info if not feh is None: raise NotImplementedError("'feh' not yet implemented") #set up output if isinstance(ds,(list,nu.ndarray)): scalarOut= False if isinstance(ds,list): _ds= nu.array(ds) else: _ds= ds elif isinstance(ds,float): scalarOut= True _ds= [ds] #Pre-calculate all absolute magnitudes absmagdict= {} for key in mdict.keys(): absmagdict[key]= -_distmodulus(_ds)+mdict[key] #loop through isochrones ZS= iso.Zs() logages= iso.logages() allout= nu.zeros((len(_ds),len(ZS),len(logages))) for zz in range(len(ZS)): for aa in range(len(logages)): thisiso= iso(logages[aa],Z=ZS[zz]) dmpm= nu.roll(thisiso['M_ini'],-1)-thisiso['M_ini'] loglike= nu.zeros((len(_ds),len(thisiso['M_ini'])-1)) loglike-= nu.log(thisiso['M_ini'][-1]) for ii in range(1,len(thisiso['M_ini'])-1): if dmpm[ii] > 0.: loglike[:,ii]+= nu.log(dmpm[ii]) else: loglike[:,ii]= nu.finfo(nu.dtype(nu.float64)).min continue #no use in continuing here if not teff is None: loglike[:,ii]-= (teff-10**thisiso['logTe'][ii])**2.*teff_ivar if not logg is None: loglike[:,ii]-= (logg-thisiso['logg'][ii])**2.*logg_ivar for key in mdict.keys(): #print absmagdict[key][2], thisiso[key][ii] loglike[:,ii]-= (absmagdict[key]-thisiso[key][ii])**2.\ *mivardict[key] #marginalize over mass for jj in range(len(_ds)): allout[jj,zz,aa]= logsumexp(loglike[jj,:]) #add age constraint and prior if not logage is None: allout[:,zz,aa]+= -(logage-logages[aa])**2.*logage_ivar if not ageprior is None: if isinstance(ageprior,str) and ageprior.lower() == 'flat': allout[:,zz,aa]+= logages[aa]*_LOGTOLN #add Z constraint and prior if not Z is None: allout[:,zz,:]+= -(Z-ZS[zz])**2.*Z_ivar #prepare final output out= nu.zeros(len(_ds)) for jj in range(len(_ds)): out[jj]= logsumexp(allout[jj,:,:]) if normalize and not scalarOut: out-= logsumexp(out)+nu.log(ds[1]-ds[0]) #return if scalarOut: return out[0] else: return out
pvlos = multi.parallel_map((lambda x: pvlosplate( params, vlos[x], thesedata, df, options, thislogpiso, thislogpisodwarf, iso)), range(options.nvlos), numcores=numpy.amin([ len(vlos), multiprocessing.cpu_count(), options.multi ])) else: for ii in range(options.nvlos): print ii pvlos[ii] = pvlosplate(params, vlos[ii], thesedata, df, options, thislogpiso, thislogpisodwarf, iso) pvlos -= logsumexp(pvlos) pvlos = numpy.exp(pvlos) if _PLOTZERO: pvloszero = numpy.zeros(options.nvlos) params[2] = -3.8 if not options.multi is None: pvloszero = multi.parallel_map( (lambda x: pvlosplate(params, vlos[ x], thesedata, df, options, thislogpiso, thislogpisodwarf, iso)), range(options.nvlos), numcores=numpy.amin([ len(vlos), multiprocessing.cpu_count(), options.multi ])) else:
def map_vc_like_simple(parser): """ NAME: map_vc_like_simple PURPOSE: map the vc likelihood assuming knowledge of the DF INPUT: parser - from optparse OUTPUT: stuff as specified by the options HISTORY: 2011-04-20 - Written - Bovy (NYU) """ (options, args) = parser.parse_args() if len(args) == 0: parser.print_help() sys.exit(-1) #Set up DF dfc = dehnendf(beta=0., profileParams=(options.rd, options.rs, options.so), correct=True, niter=20) #Load data picklefile = open(args[0], 'rb') out = pickle.load(picklefile) picklefile.close() ndata = len(out) if options.linearfit: plot_linear(out, options.los * _DEGTORAD, options, dfc) return None #Map likelihood vcirc = nu.linspace(options.vmin, options.vmax, options.nvcirc) if not options.nbeta is None: betas = nu.linspace(options.betamin, options.betamax, options.nbeta) like = nu.zeros((options.nvcirc, options.nbeta)) for ii in range(options.nvcirc): for kk in range(options.nbeta): thislike = 0. for jj in range(ndata): thislike += single_vlos_loglike(vcirc[ii], out[jj], dfc, options, options.los * _DEGTORAD, beta=betas[kk]) like[ii, kk] = thislike like -= logsumexp(like.flatten()) + m.log(vcirc[1] - vcirc[0]) bovy_plot.bovy_print() bovy_plot.bovy_dens2d(nu.exp(like).T, origin='lower', xrange=[options.vmin,options.vmax], yrange=[options.betamin,options.betamax], aspect=(options.vmax-options.vmin)/\ (options.betamax-options.betamin), cmap='gist_yarg', xlabel=r'$v_c / v_0$', ylabel=r'$\beta$', contours=True,cntrmass=True, levels=[0.682,0.954,0.997]) bovy_plot.bovy_text(r'$\sigma_R(R_0) = %4.2f \ v_0$' % options.so\ +'\n'+\ r'$l = %i^\circ$' % round(options.los), top_left=True) bovy_plot.bovy_end_print(options.plotfilename) else: like = nu.zeros(options.nvcirc) for ii in range(options.nvcirc): thislike = 0. for jj in range(ndata): thislike += single_vlos_loglike(vcirc[ii], out[jj], dfc, options, options.los * _DEGTORAD) like[ii] = thislike like -= logsumexp(like) + m.log(vcirc[1] - vcirc[0]) #Calculate mean and sigma vcmean = nu.sum(vcirc * nu.exp(like) * (vcirc[1] - vcirc[0])) vc2mean = nu.sum(vcirc**2. * nu.exp(like) * (vcirc[1] - vcirc[0])) #Plot bovy_plot.bovy_print() bovy_plot.bovy_plot(vcirc, nu.exp(like), 'k-', xlabel=r'$v_c / v_0$', ylabel=r'$p(\mathrm{data} | v_c)$') bovy_plot.bovy_text(r'$\langle v_c \rangle = %4.2f \ v_0$' % vcmean +'\n'+ r'$\sqrt{\langle v_c^2 \rangle - \langle v_c \rangle^2} = %4.2f \ v_0$' % (m.sqrt(vc2mean-vcmean**2.)) +'\n'+\ r'$\sigma_R(R_0) = %4.2f \ v_0$' % options.so+'\n'+\ r'$l = %i^\circ$' % round(options.los), top_left=True) bovy_plot.bovy_end_print(options.plotfilename)
def plot_bestfit(parser): (options, args) = parser.parse_args() if len(args) == 0 or options.plotfilename is None: parser.print_help() return # Read the data print "Reading the data ..." data = readVclosData( postshutdown=options.postshutdown, fehcut=options.fehcut, cohort=options.cohort, lmin=options.lmin, bmax=options.bmax, ak=True, cutmultiples=options.cutmultiples, validfeh=options.indivfeh, # if indivfeh, we need validfeh jkmax=options.jkmax, datafilename=options.fakedata, ) # HACK indx = data["J0MAG"] - data["K0MAG"] < 0.5 data["J0MAG"][indx] = 0.5 + data["K0MAG"][indx] # Cut inner disk locations # data= data[(data['GLON'] > 75.)] # Cut outliers # data= data[(data['VHELIO'] < 200.)*(data['VHELIO'] > -200.)] print "Using %i data points ..." % len(data) # Set up the isochrone if not options.isofile is None and os.path.exists(options.isofile): print "Loading the isochrone model ..." isofile = open(options.isofile, "rb") iso = pickle.load(isofile) if options.indivfeh: zs = pickle.load(isofile) if options.varfeh: locl = pickle.load(isofile) isofile.close() else: print "Setting up the isochrone model ..." if options.indivfeh: # Load all isochrones iso = [] zs = numpy.arange(0.0005, 0.03005, 0.0005) for ii in range(len(zs)): iso.append(isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, Z=zs[ii])) elif options.varfeh: locs = list(set(data["LOCATION"])) iso = [] for ii in range(len(locs)): indx = data["LOCATION"] == locs[ii] locl = numpy.mean(data["GLON"][indx] * _DEGTORAD) iso.append( isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, marginalizefeh=True, glon=locl) ) else: iso = isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, expsfh=options.expsfh) if options.dwarf: iso = [iso, isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, dwarf=True, expsfh=options.expsfh)] else: iso = [iso] if not options.isofile is None: isofile = open(options.isofile, "wb") pickle.dump(iso, isofile) if options.indivfeh: pickle.dump(zs, isofile) elif options.varfeh: pickle.dump(locl, isofile) isofile.close() df = None print "Pre-calculating isochrone distance prior ..." logpiso = numpy.zeros((len(data), _BINTEGRATENBINS)) ds = numpy.linspace(_BINTEGRATEDMIN, _BINTEGRATEDMAX, _BINTEGRATENBINS) dm = _dm(ds) for ii in range(len(data)): mh = data["H0MAG"][ii] - dm if options.indivfeh: # Find closest Z thisZ = isodist.FEH2Z(data[ii]["FEH"]) indx = numpy.argmin(numpy.fabs(thisZ - zs)) logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + (data["J0MAG"] - data["K0MAG"])[ii], mh) elif options.varfeh: # Find correct iso indx = locl == data[ii]["LOCATION"] logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + (data["J0MAG"] - data["K0MAG"])[ii], mh) else: logpiso[ii, :] = iso[0](numpy.zeros(_BINTEGRATENBINS) + (data["J0MAG"] - data["K0MAG"])[ii], mh) if options.dwarf: logpisodwarf = numpy.zeros((len(data), _BINTEGRATENBINS)) dwarfds = numpy.linspace(_BINTEGRATEDMIN_DWARF, _BINTEGRATEDMAX_DWARF, _BINTEGRATENBINS) dm = _dm(dwarfds) for ii in range(len(data)): mh = data["H0MAG"][ii] - dm logpisodwarf[ii, :] = iso[1](numpy.zeros(_BINTEGRATENBINS) + (data["J0MAG"] - data["K0MAG"])[ii], mh) else: logpisodwarf = None # Calculate data means etc. # Calculate means locations = list(set(data["LOCATION"])) nlocs = len(locations) l_plate = numpy.zeros(nlocs) avg_plate = numpy.zeros(nlocs) sig_plate = numpy.zeros(nlocs) siga_plate = numpy.zeros(nlocs) sigerr_plate = numpy.zeros(nlocs) for ii in range(nlocs): indx = data["LOCATION"] == locations[ii] l_plate[ii] = numpy.mean(data["GLON"][indx]) avg_plate[ii] = numpy.mean(data["VHELIO"][indx]) sig_plate[ii] = numpy.std(data["VHELIO"][indx]) siga_plate[ii] = numpy.std(data["VHELIO"][indx]) / numpy.sqrt(numpy.sum(indx)) sigerr_plate[ii] = bootstrap_sigerr(data["VHELIO"][indx]) # Calculate plate means and variances from the model # Load initial parameters from file savefile = open(args[0], "rb") params = pickle.load(savefile) if not options.index is None: params = params[options.index] savefile.close() # params[0]= 245./235. # params[1]= 8.5/8. avg_plate_model = numpy.zeros(nlocs) sig_plate_model = numpy.zeros(nlocs) for ii in range(nlocs): # Calculate vlos | los indx = data["LOCATION"] == locations[ii] thesedata = data[indx] thislogpiso = logpiso[indx, :] if options.dwarf: thislogpisodwarf = logpisodwarf[indx, :] else: thislogpisodwarf = None vlos = numpy.linspace(-200.0, 200.0, options.nvlos) pvlos = numpy.zeros(options.nvlos) if not options.multi is None: pvlos = multi.parallel_map( (lambda x: pvlosplate(params, vlos[x], thesedata, df, options, thislogpiso, thislogpisodwarf, iso)), range(options.nvlos), numcores=numpy.amin([len(vlos), multiprocessing.cpu_count(), options.multi]), ) else: for jj in range(options.nvlos): print jj pvlos[jj] = pvlosplate(params, vlos[jj], thesedata, df, options, thislogpiso, thislogpisodwarf, iso) pvlos -= logsumexp(pvlos) pvlos = numpy.exp(pvlos) # Calculate mean and velocity dispersion avg_plate_model[ii] = numpy.sum(vlos * pvlos) sig_plate_model[ii] = numpy.sqrt(numpy.sum(vlos ** 2.0 * pvlos) - avg_plate_model[ii] ** 2.0) # Plot everything left, bottom, width, height = 0.1, 0.4, 0.8, 0.5 axTop = pyplot.axes([left, bottom, width, height]) left, bottom, width, height = 0.1, 0.1, 0.8, 0.3 axMean = pyplot.axes([left, bottom, width, height]) # left, bottom, width, height= 0.1, 0.1, 0.8, 0.2 # axSig= pyplot.axes([left,bottom,width,height]) fig = pyplot.gcf() fig.sca(axTop) pyplot.ylabel(r"$\mathrm{Heliocentric\ velocity}\ [\mathrm{km\ s}^{-1}]$") pyplot.xlabel(r"$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$") pyplot.xlim(0.0, 360.0) pyplot.ylim(-200.0, 200.0) nullfmt = NullFormatter() # no labels axTop.xaxis.set_major_formatter(nullfmt) bovy_plot.bovy_plot(data["GLON"], data["VHELIO"], "k,", yrange=[-200.0, 200.0], xrange=[0.0, 360.0], overplot=True) ndata_t = int(math.floor(len(data) / 1000.0)) ndata_h = len(data) - ndata_t * 1000 bovy_plot.bovy_plot(l_plate, avg_plate, "o", overplot=True, mfc="0.5", mec="none") bovy_plot.bovy_plot(l_plate, avg_plate_model, "x", overplot=True, ms=10.0, mew=1.5, color="0.7") # Legend bovy_plot.bovy_plot([260.0], [150.0], "k,", overplot=True) bovy_plot.bovy_plot([260.0], [120.0], "o", mfc="0.5", mec="none", overplot=True) bovy_plot.bovy_plot([260.0], [90.0], "x", ms=10.0, mew=1.5, color="0.7", overplot=True) bovy_plot.bovy_text(270.0, 145.0, r"$\mathrm{data}$") bovy_plot.bovy_text(270.0, 115.0, r"$\mathrm{data\ mean}$") bovy_plot.bovy_text(270.0, 85.0, r"$\mathrm{model\ mean}$") bovy_plot._add_ticks() # Now plot the difference fig.sca(axMean) bovy_plot.bovy_plot([0.0, 360.0], [0.0, 0.0], "-", color="0.5", overplot=True) bovy_plot.bovy_plot(l_plate, avg_plate - avg_plate_model, "ko", overplot=True) pyplot.errorbar( l_plate, avg_plate - avg_plate_model, yerr=siga_plate, marker="o", color="k", linestyle="none", elinestyle="-" ) pyplot.ylabel(r"$\bar{V}_{\mathrm{data}}-\bar{V}_{\mathrm{model}}$") pyplot.ylim(-14.5, 14.5) pyplot.xlim(0.0, 360.0) bovy_plot._add_ticks() # axMean.xaxis.set_major_formatter(nullfmt) pyplot.xlabel(r"$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$") pyplot.xlim(0.0, 360.0) bovy_plot._add_ticks() # Save bovy_plot.bovy_end_print(options.plotfilename) return None # Sigma fig.sca(axSig) pyplot.plot([0.0, 360.0], [1.0, 1.0], "-", color="0.5") bovy_plot.bovy_plot(l_plate, sig_plate / sig_plate_model, "ko", overplot=True) pyplot.errorbar( l_plate, sig_plate / sig_plate_model, yerr=sigerr_plate / sig_plate_model, marker="o", color="k", linestyle="none", elinestyle="-", ) pyplot.ylabel(r"$\sigma_{\mathrm{los}}^{\mathrm{data}}/ \sigma_{\mathrm{los}}^{\mathrm{model}}$") pyplot.ylim(0.5, 1.5)
def classQSO(parser): (options,args)= parser.parse_args() if len(args) == 0: parser.print_help() return if os.path.exists(options.outfile): print options.outfile+" exists" print "Remove this file before running ..." print "Returning ..." return None #Load fit params: Quasars if os.path.exists(options.qsomodel): qsofile= open(options.qsomodel,'rb') try: xamp_qso= pickle.load(qsofile) xmean_qso= pickle.load(qsofile) xcovar_qso= pickle.load(qsofile) finally: qsofile.close() else: print "Input to 'qsomodel' not recognized ..." print "Returning ..." return #Stars if os.path.exists(options.starmodel): starfile= open(options.starmodel,'rb') try: xamp_star= pickle.load(starfile) xmean_star= pickle.load(starfile) xcovar_star= pickle.load(starfile) finally: starfile.close() else: print "Input to 'starmodel' not recognized ..." print "Returning ..." return ##RR Lyrae if os.path.exists(options.rrlyraemodel): rrlyraefile= open(options.rrlyraemodel,'rb') try: xamp_rrlyrae= pickle.load(rrlyraefile) xmean_rrlyrae= pickle.load(rrlyraefile) xcovar_rrlyrae= pickle.load(rrlyraefile) finally: rrlyraefile.close() else: print "Input to 'rrlyraemodel' not recognized ..." print "Returning ..." return #Restore samples savefilename= args[0] print "Reading data ..." if os.path.exists(savefilename): savefile= open(savefilename,'rb') samples= pickle.load(savefile) type= pickle.load(savefile) band= pickle.load(savefile) mean= pickle.load(savefile) savefile.close() else: print "Input file does not exist ..." print "Returning ..." return #Restore samples savefilename= args[1] print "Reading best fits ..." if os.path.exists(savefilename): savefile= open(savefilename,'rb') params= pickle.load(savefile) type= pickle.load(savefile) band= pickle.load(savefile) mean= pickle.load(savefile) savefile.close() else: print "Input file does not exist ..." print "Returning ..." return #Load the overall data, to later match back to ra and dec if 'nuvx' in args[0].lower(): sources= fitsio.read('../data/nUVX_woname.fit') elif 'uvx' in args[0].lower(): sources= fitsio.read('../data/uvx_woname.fit') sourcesDict= {} for ii in range(len(sources)): sourcesDict[sources[ii]['ONAME'].strip().replace(' ', '')+'.fit']= ii #Classify each source ndata= len(samples) print ndata logpxagamma_qso= numpy.zeros(ndata) logpxagamma_star= numpy.zeros(ndata) logpxagamma_rrlyrae= numpy.zeros(ndata) ras= numpy.zeros(ndata) decs= numpy.zeros(ndata) outgammas= numpy.zeros(ndata) outlogAs= numpy.zeros(ndata) for ii, key in enumerate(samples.keys()): sys.stdout.write('\r'+_ERASESTR+'\r') sys.stdout.flush() sys.stdout.write('\rWorking on %i / %i\r' % (ii+1,ndata)) sys.stdout.flush() outgammas[ii]= params[key]['gamma'][0] outlogAs[ii]= params[key]['logA'][0]/2. if type == 'powerlawSF': #Stack as A,g,Ac,gc loggammas= [] logAs= [] try: for sample in samples[key]: loggammas.append(numpy.log(sample['gamma'][0])) logAs.append(sample['logA'][0]) #RITABAN except TypeError: loggammas.append(numpy.log(samples[key]['gamma'][0])) logAs.append(samples[key]['logA'][0]) loggammas= numpy.array(loggammas) logAs= numpy.array(logAs) weights= -loggammas #the 1/gamma to get a flat prior in log gamma, but expressed as log(1/gamma) weights-= maxentropy.logsumexp(weights) #sum weights = 1 #Stack the data thisydata= numpy.reshape(loggammas, (len(loggammas),1)) thisydata2= numpy.reshape(logAs,(len(logAs),1)) thisydata=numpy.column_stack( [ thisydata, thisydata2 ] ) #Evaluate quasar/star/RR lyrae distributions logpxagamma_qso[ii]= maxentropy.logsumexp(weights+_eval_sumgaussians(thisydata, xamp_qso, xmean_qso, xcovar_qso)) logpxagamma_star[ii]= maxentropy.logsumexp(weights+_eval_sumgaussians(thisydata, xamp_star, xmean_star, xcovar_star)) logpxagamma_rrlyrae[ii]= maxentropy.logsumexp(weights+_eval_sumgaussians(thisydata, xamp_rrlyrae, xmean_rrlyrae, xcovar_rrlyrae)) #Find RA and Dec try: ratmp= sources[sourcesDict[key]]['RA'] dectmp= sources[sourcesDict[key]]['DEC'] except KeyError: print "Failed to match for RA and Dec ..." continue else: ras[ii]= ratmp decs[ii]= dectmp sys.stdout.write('\r'+_ERASESTR+'\r') sys.stdout.flush() #Save saveClass(logpxagamma_qso, logpxagamma_star, logpxagamma_rrlyrae, ras,decs, outgammas,outlogAs, options.outfile) return None
def plot_distanceprior(parser): (options,args)= parser.parse_args() #Read the data print "Reading the data ..." data= readVclosData(postshutdown=options.postshutdown, fehcut=options.fehcut, cohort=options.cohort, lmin=options.lmin, bmax=options.bmax, ak=True, cutmultiples=options.cutmultiples, validfeh=options.indivfeh, #if indivfeh, we need validfeh jkmax=options.jkmax, datafilename=options.fakedata) l= data['GLON']*_DEGTORAD b= data['GLAT']*_DEGTORAD sinl= numpy.sin(l) cosl= numpy.cos(l) sinb= numpy.sin(b) cosb= numpy.cos(b) jk= data['J0MAG']-data['K0MAG'] jk[(jk < 0.5)]= 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL h= data['H0MAG'] #Set up the isochrone if not options.isofile is None and os.path.exists(options.isofile): print "Loading the isochrone model ..." isofile= open(options.isofile,'rb') iso= pickle.load(isofile) if options.indivfeh: zs= pickle.load(isofile) elif options.varfeh: locl= pickle.load(isofile) isofile.close() else: print "Setting up the isochrone model ..." if options.indivfeh: #Load all isochrones iso= [] zs= numpy.arange(0.0005,0.03005,0.0005) for ii in range(len(zs)): iso.append(isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, Z=zs[ii])) elif options.varfeh: locs= list(set(data['LOCATION'])) iso= [] for ii in range(len(locs)): indx= (data['LOCATION'] == locs[ii]) locl= numpy.mean(data['GLON'][indx]*_DEGTORAD) iso.append(isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, marginalizefeh=True, glon=locl)) else: iso= isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z, expsfh=options.expsfh) #Set up polar grid res= 51 xgrid= numpy.linspace(0.,2.*math.pi*(1.-1./res/2.), 2*res) ygrid= numpy.linspace(0.5,2.8,res) plotxgrid= numpy.linspace(xgrid[0]-(xgrid[1]-xgrid[0])/2., xgrid[-1]+(xgrid[1]-xgrid[0])/2., len(xgrid)+1) plotygrid= numpy.linspace(ygrid[0]-(ygrid[1]-ygrid[0])/2., ygrid[-1]+(ygrid[1]-ygrid[0])/2., len(ygrid)+1) plotthis= numpy.zeros((2*res,res,len(data)))-numpy.finfo(numpy.dtype(numpy.float64)).max #_BINTEGRATENBINS= 11 #For quick testing ds= numpy.linspace(_BINTEGRATEDMIN,_BINTEGRATEDMAX,_BINTEGRATENBINS) logpiso= numpy.zeros((len(data),_BINTEGRATENBINS)) dm= _dm(ds) for ii in range(len(data)): mh= h[ii]-dm if options.indivfeh: #Find closest Z thisZ= isodist.FEH2Z(data[ii]['FEH']) indx= numpy.argmin(numpy.fabs(thisZ-zs)) logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+jk[ii],mh) elif options.varfeh: #Find correct iso indx= (locl == data[ii]['LOCATION']) logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+jk[ii],mh) else: logpiso[ii,:]= iso(numpy.zeros(_BINTEGRATENBINS)+jk[ii],mh) for jj in range(_BINTEGRATENBINS): d= ds[jj]/_REFR0 R= numpy.sqrt(1.+d**2.-2.*d*cosl) indx= (R == 0.) R[indx]+= 0.0001 theta= numpy.arcsin(d/R*sinl) indx= (1./cosl < d)*(cosl > 0.) theta[indx]= numpy.pi-theta[indx] indx= (theta < 0.) theta[indx]+= 2.*math.pi thisout= _logpd([0.,1.],d,None,None, None,None,None, options,R,theta, 1.,0.,logpiso[:,jj]) #Find bin to which these contribute thetabin= numpy.floor((theta-xgrid[0])/(xgrid[1]-xgrid[0])+0.5) Rbin= numpy.floor((R-plotygrid[0])/(plotygrid[1]-plotygrid[0])) indx= (thetabin < 0) thetabin[indx]= 0 Rbin[indx]= 0 thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max indx= (thetabin >= 2*res) thetabin[indx]= 0. #Has to be #Rbin[indx]= 0 #thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max indx= (Rbin < 0) thetabin[indx]= 0 Rbin[indx]= 0 thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max indx= (Rbin >= res) thetabin[indx]= 0 Rbin[indx]= 0 thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max thetabin= thetabin.astype('int') Rbin= Rbin.astype('int') for ii in range(len(data)): plotthis[thetabin,Rbin,ii]= thisout[ii] #Normalize for ii in range(2*res): for jj in range(res): plotthis[ii,jj,0]= logsumexp(plotthis[ii,jj,:]) plotthis= plotthis[:,:,0] plotthis-= numpy.amax(plotthis) plotthis= numpy.exp(plotthis) plotthis[(plotthis == 0.)]= numpy.nan #Get los locations= list(set(data['LOCATION'])) nlocs= len(locations) l_plate= numpy.zeros(nlocs) for ii in range(nlocs): indx= (data['LOCATION'] == locations[ii]) l_plate[ii]= numpy.mean(data['GLON'][indx]) bovy_plot.bovy_print() ax= pyplot.subplot(111,projection='galpolar')#galpolar is in bovy_plot vmin, vmax= 0., 1. out= ax.pcolor(plotxgrid,plotygrid,plotthis.T,cmap='gist_yarg', vmin=vmin,vmax=vmax,zorder=2) #Overlay los for ii in range(nlocs): lds= numpy.linspace(0.,2.95,501) lt= numpy.zeros(len(lds)) lr= numpy.zeros(len(lds)) lr= numpy.sqrt(1.+lds**2.-2.*lds*numpy.cos(l_plate[ii]*_DEGTORAD)) lt= numpy.arcsin(lds/lr*numpy.sin(l_plate[ii]*_DEGTORAD)) indx= (1./numpy.cos(l_plate[ii]*_DEGTORAD) < lds)*(numpy.cos(l_plate[ii]*_DEGTORAD) > 0.) lt[indx]= numpy.pi-lt[indx] ax.plot(lt,lr, ls='--',color='w',zorder=3) from matplotlib.patches import Arrow, FancyArrowPatch arr= FancyArrowPatch(posA=(-math.pi/2.,1.8), posB=(-math.pi/4.,1.8), arrowstyle='->', connectionstyle='arc3,rad=%4.2f' % (-math.pi/16.), shrinkA=2.0, shrinkB=2.0, mutation_scale=20.0, mutation_aspect=None,fc='k') ax.add_patch(arr) bovy_plot.bovy_text(-math.pi/2.,1.97,r'$\mathrm{Galactic\ rotation}$', rotation=-22.5) radii= numpy.array([0.5,1.,1.5,2.,2.5]) labels= [] for r in radii: ax.plot(numpy.linspace(0.,2.*math.pi,501,), numpy.zeros(501)+r,ls='-',color='0.65',zorder=1,lw=0.5) labels.append(r'$%i$' % int(r*8.)) pyplot.rgrids(radii,labels=labels,angle=-32.5) bovy_plot.bovy_text(5.785,2.82,r'$\mathrm{kpc}$') azs= numpy.array([0.,45.,90.,135.,180.,225.,270.,315.])*_DEGTORAD for az in azs: ax.plot(numpy.zeros(501)+az, numpy.linspace(0.,2.8,501),'-',color='0.6',lw=0.5,zorder=1) #Sun bovy_plot.bovy_text(0.065,.9075,r'$\odot$') pyplot.ylim(0.,2.8) bovy_plot.bovy_end_print(options.plotfile)
def _fit_orbit_mlogl(new_vxvv, vxvv, vxvv_err, pot, radec, lb, customsky, lb_to_customsky, pmllpmbb_to_customsky, tmockAA, ro, vo, obs): """The log likelihood for fitting an orbit""" #Use this _parse_args routine, which does forward and backward integration iR, ivR, ivT, iz, ivz, iphi = tmockAA._parse_args(True, False, new_vxvv[0], new_vxvv[1], new_vxvv[2], new_vxvv[3], new_vxvv[4], new_vxvv[5]) if radec or lb or customsky: #Need to transform to (l,b), (ra,dec), or a custom set #First transform to X,Y,Z,vX,vY,vZ (Galactic) X, Y, Z = coords.galcencyl_to_XYZ(iR.flatten(), iphi.flatten(), iz.flatten(), Xsun=obs[0] / ro, Zsun=obs[2] / ro).T vX,vY,vZ = coords.galcencyl_to_vxvyvz(ivR.flatten(),ivT.flatten(), ivz.flatten(),iphi.flatten(), vsun=nu.array(\ obs[3:6])/vo,Xsun=obs[0]/ro,Zsun=obs[2]/ro).T bad_indx = (X == 0.) * (Y == 0.) * (Z == 0.) if True in bad_indx: X[bad_indx] += ro / 10000. lbdvrpmllpmbb = coords.rectgal_to_sphergal(X * ro, Y * ro, Z * ro, vX * vo, vY * vo, vZ * vo, degree=True) if lb: orb_vxvv = nu.array([ lbdvrpmllpmbb[:, 0], lbdvrpmllpmbb[:, 1], lbdvrpmllpmbb[:, 2], lbdvrpmllpmbb[:, 4], lbdvrpmllpmbb[:, 5], lbdvrpmllpmbb[:, 3] ]).T elif radec: #Further transform to ra,dec,pmra,pmdec radec = coords.lb_to_radec(lbdvrpmllpmbb[:, 0], lbdvrpmllpmbb[:, 1], degree=True, epoch=None) pmrapmdec = coords.pmllpmbb_to_pmrapmdec(lbdvrpmllpmbb[:, 4], lbdvrpmllpmbb[:, 5], lbdvrpmllpmbb[:, 0], lbdvrpmllpmbb[:, 1], degree=True, epoch=None) orb_vxvv = nu.array([ radec[:, 0], radec[:, 1], lbdvrpmllpmbb[:, 2], pmrapmdec[:, 0], pmrapmdec[:, 1], lbdvrpmllpmbb[:, 3] ]).T elif customsky: #Further transform to ra,dec,pmra,pmdec customradec = lb_to_customsky(lbdvrpmllpmbb[:, 0], lbdvrpmllpmbb[:, 1], degree=True) custompmrapmdec = pmllpmbb_to_customsky(lbdvrpmllpmbb[:, 4], lbdvrpmllpmbb[:, 5], lbdvrpmllpmbb[:, 0], lbdvrpmllpmbb[:, 1], degree=True) orb_vxvv = nu.array([ customradec[:, 0], customradec[:, 1], lbdvrpmllpmbb[:, 2], custompmrapmdec[:, 0], custompmrapmdec[:, 1], lbdvrpmllpmbb[:, 3] ]).T else: #shape=(2tintJ-1,6) orb_vxvv = nu.array([ iR.flatten(), ivR.flatten(), ivT.flatten(), iz.flatten(), ivz.flatten(), iphi.flatten() ]).T out = 0. for ii in range(vxvv.shape[0]): sub_vxvv = (orb_vxvv - vxvv[ii, :].flatten())**2. #print(sub_vxvv[nu.argmin(nu.sum(sub_vxvv,axis=1))]) if not vxvv_err is None: sub_vxvv /= vxvv_err[ii, :]**2. else: sub_vxvv /= 0.01**2. out += logsumexp(-0.5 * nu.sum(sub_vxvv, axis=1)) return -out
def logdotexp_vec_mat(loga, logM): return numpy.array([maxentropy.logsumexp(loga + x) for x in logM.T], copy=False)
def logdotexp_mat_vec(logM, logb): return numpy.array([maxentropy.logsumexp(x + logb) for x in logM], copy=False)
def plot_chi2(parser): (options,args)= parser.parse_args() if len(args) == 0 or options.plotfilename is None: parser.print_help() return #Read the data print "Reading the data ..." data= readVclosData(postshutdown=options.postshutdown, fehcut=options.fehcut, cohort=options.cohort, lmin=options.lmin, bmax=options.bmax, ak=True, cutmultiples=options.cutmultiples, validfeh=options.indivfeh, #if indivfeh, we need validfeh jkmax=options.jkmax, datafilename=options.fakedata) #HACK indx= (data['J0MAG']-data['K0MAG'] < 0.5) data['J0MAG'][indx]= 0.5+data['K0MAG'][indx] #Cut inner disk locations #data= data[(data['GLON'] > 75.)] #Cut outliers #data= data[(data['VHELIO'] < 200.)*(data['VHELIO'] > -200.)] print "Using %i data points ..." % len(data) #Set up the isochrone if not options.isofile is None and os.path.exists(options.isofile): print "Loading the isochrone model ..." isofile= open(options.isofile,'rb') iso= pickle.load(isofile) if options.indivfeh: zs= pickle.load(isofile) if options.varfeh: locl= pickle.load(isofile) isofile.close() else: print "Setting up the isochrone model ..." if options.indivfeh: #Load all isochrones iso= [] zs= numpy.arange(0.0005,0.03005,0.0005) for ii in range(len(zs)): iso.append(isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, Z=zs[ii])) elif options.varfeh: locs= list(set(data['LOCATION'])) iso= [] for ii in range(len(locs)): indx= (data['LOCATION'] == locs[ii]) locl= numpy.mean(data['GLON'][indx]*_DEGTORAD) iso.append(isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, marginalizefeh=True, glon=locl)) else: iso= isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z, expsfh=options.expsfh) if options.dwarf: iso= [iso, isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z, dwarf=True,expsfh=options.expsfh)] else: iso= [iso] if not options.isofile is None: isofile= open(options.isofile,'wb') pickle.dump(iso,isofile) if options.indivfeh: pickle.dump(zs,isofile) elif options.varfeh: pickle.dump(locl,isofile) isofile.close() df= None print "Pre-calculating isochrone distance prior ..." logpiso= numpy.zeros((len(data),_BINTEGRATENBINS)) ds= numpy.linspace(_BINTEGRATEDMIN,_BINTEGRATEDMAX, _BINTEGRATENBINS) dm= _dm(ds) for ii in range(len(data)): mh= data['H0MAG'][ii]-dm if options.indivfeh: #Find closest Z thisZ= isodist.FEH2Z(data[ii]['FEH']) indx= numpy.argmin((thisZ-zs)) logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+(data['J0MAG']-data['K0MAG'])[ii],mh) elif options.varfeh: #Find correct iso indx= (locl == data[ii]['LOCATION']) logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+(data['J0MAG']-data['K0MAG'])[ii],mh) else: logpiso[ii,:]= iso[0](numpy.zeros(_BINTEGRATENBINS) +(data['J0MAG']-data['K0MAG'])[ii],mh) if options.dwarf: logpisodwarf= numpy.zeros((len(data),_BINTEGRATENBINS)) dwarfds= numpy.linspace(_BINTEGRATEDMIN_DWARF,_BINTEGRATEDMAX_DWARF, _BINTEGRATENBINS) dm= _dm(dwarfds) for ii in range(len(data)): mh= data['H0MAG'][ii]-dm logpisodwarf[ii,:]= iso[1](numpy.zeros(_BINTEGRATENBINS) +(data['J0MAG']-data['K0MAG'])[ii],mh) else: logpisodwarf= None #Load initial parameters from file savefile= open(args[0],'rb') params= pickle.load(savefile) if not options.index is None: params= params[options.index] savefile.close() #params[0]= 245./235. #params[1]= 8.5/8. #Calculate data means etc. #Calculate means locations= list(set(data['LOCATION'])) nlocs= len(locations) l_plate= numpy.zeros(nlocs) avg_plate= numpy.zeros(nlocs) sig_plate= numpy.zeros(nlocs) siga_plate= numpy.zeros(nlocs) sigerr_plate= numpy.zeros(nlocs) fidlogl= logl.logl(init=params,data=data,options=options) logl_plate= numpy.zeros(nlocs) for ii in range(nlocs): indx= (data['LOCATION'] == locations[ii]) l_plate[ii]= numpy.mean(data['GLON'][indx]) avg_plate[ii]= numpy.mean(data['VHELIO'][indx]) sig_plate[ii]= numpy.std(data['VHELIO'][indx]) siga_plate[ii]= numpy.std(data['VHELIO'][indx])/numpy.sqrt(numpy.sum(indx)) sigerr_plate[ii]= bootstrap_sigerr(data['VHELIO'][indx]) #Logl logl_plate[ii]= -2.*(numpy.sum(fidlogl[indx])-numpy.sum(fidlogl)/len(indx)*numpy.sum(indx)) #Calculate plate means and variances from the model avg_plate_model= numpy.zeros(nlocs) sig_plate_model= numpy.zeros(nlocs) for ii in range(nlocs): #Calculate vlos | los indx= (data['LOCATION'] == locations[ii]) thesedata= data[indx] thislogpiso= logpiso[indx,:] if options.dwarf: thislogpisodwarf= logpisodwarf[indx,:] else: thislogpisodwarf= None vlos= numpy.linspace(-200.,200.,options.nvlos) pvlos= numpy.zeros(options.nvlos) if not options.multi is None: pvlos= multi.parallel_map((lambda x: pvlosplate(params,vlos[x], thesedata, df,options, thislogpiso, thislogpisodwarf,iso)), range(options.nvlos), numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),options.multi])) else: for jj in range(options.nvlos): print jj pvlos[jj]= pvlosplate(params,vlos[jj],thesedata,df,options, thislogpiso,thislogpisodwarf,iso) pvlos-= logsumexp(pvlos) pvlos= numpy.exp(pvlos) #Calculate mean and velocity dispersion avg_plate_model[ii]= numpy.sum(vlos*pvlos) sig_plate_model[ii]= numpy.sqrt(numpy.sum(vlos**2.*pvlos)\ -avg_plate_model[ii]**2.) #Plot everything left, bottom, width, height= 0.1, 0.4, 0.8, 0.3 axTop= pyplot.axes([left,bottom,width,height]) left, bottom, width, height= 0.1, 0.1, 0.8, 0.3 axChi2= pyplot.axes([left,bottom,width,height]) #left, bottom, width, height= 0.1, 0.1, 0.8, 0.2 #axSig= pyplot.axes([left,bottom,width,height]) fig= pyplot.gcf() #Plot the difference fig.sca(axTop) bovy_plot.bovy_plot([0.,360.],[0.,0.],'-',color='0.5',overplot=True) bovy_plot.bovy_plot(l_plate, avg_plate-avg_plate_model, 'ko',overplot=True) pyplot.errorbar(l_plate,avg_plate-avg_plate_model, yerr=siga_plate,marker='o',color='k',linestyle='none',elinestyle='-') pyplot.ylabel(r'$\langle v_{\mathrm{los}}\rangle_{\mathrm{data}}-\langle v_{\mathrm{los}}\rangle_{\mathrm{model}}$') pyplot.ylim(-14.5,14.5) pyplot.xlim(0.,360.) bovy_plot._add_ticks() nullfmt = NullFormatter() # no labels axTop.xaxis.set_major_formatter(nullfmt) #pyplot.xlabel(r'$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$') pyplot.xlim(0.,360.) bovy_plot._add_ticks() #Plot the chi2 fig.sca(axChi2) bovy_plot.bovy_plot([0.,360.],[0.,0.],'-',color='0.5',overplot=True) bovy_plot.bovy_plot(l_plate, logl_plate, 'ko',overplot=True) pyplot.ylabel(r'$\Delta \chi^2$') #pyplot.ylim(numpy.amin(logl_plate),numpy.amax(logl_plate)) pyplot.ylim(-150.,150.) pyplot.xlim(0.,360.) bovy_plot._add_ticks() pyplot.xlabel(r'$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$') pyplot.xlim(0.,360.) bovy_plot._add_ticks() #Save bovy_plot.bovy_end_print(options.plotfilename) return None
background_means = comm.bcast(background_means, root=0) background_covs = comm.bcast(background_covs, root=0) # SCATTER DATA star_means = comm.scatter(star_means, root=0) star_covs = comm.scatter(star_covs, root=0) #print(rank, len(star_means)) # EVERY PROCESS DOES THIS FOR ITS DATA bg_ln_ols = [] for star_cov, star_mean in zip(star_covs, star_means): try: bg_lnol = get_lnoverlaps(star_cov, star_mean, background_covs, background_means, nstars) bg_lnol = logsumexp(bg_lnol) # sum in linear space except: # TC: Changed sign to negative (surely if it fails, we want it to # have a neglible background overlap? print('bg ln overlap failed, setting it to -inf') bg_lnol = -np.inf bg_ln_ols.append(bg_lnol) #print(rank, bg_ln_ols) # GATHER DATA bg_ln_ols_result = comm.gather(bg_ln_ols, root=0) if rank == 0: bg_ln_ols_result = list(itertools.chain.from_iterable(bg_ln_ols_result)) np.savetxt('bgols_multiprocessing_%d.dat' % NI, bg_ln_ols_result)
def map_vc_like_simple(parser): """ NAME: map_vc_like_simple PURPOSE: map the vc likelihood assuming knowledge of the DF INPUT: parser - from optparse OUTPUT: stuff as specified by the options HISTORY: 2011-04-20 - Written - Bovy (NYU) """ (options,args)= parser.parse_args() if len(args) == 0: parser.print_help() sys.exit(-1) #Set up DF dfc= dehnendf(beta=0.,profileParams=(options.rd,options.rs,options.so), correct=True,niter=20) #Load data picklefile= open(args[0],'rb') out= pickle.load(picklefile) picklefile.close() ndata= len(out) if options.linearfit: plot_linear(out,options.los*_DEGTORAD,options,dfc) return None #Map likelihood vcirc= nu.linspace(options.vmin,options.vmax,options.nvcirc) if not options.nbeta is None: betas= nu.linspace(options.betamin,options.betamax,options.nbeta) like= nu.zeros((options.nvcirc,options.nbeta)) for ii in range(options.nvcirc): for kk in range(options.nbeta): thislike= 0. for jj in range(ndata): thislike+= single_vlos_loglike(vcirc[ii],out[jj],dfc, options, options.los*_DEGTORAD, beta=betas[kk]) like[ii,kk]= thislike like-= logsumexp(like.flatten())+m.log(vcirc[1]-vcirc[0]) bovy_plot.bovy_print() bovy_plot.bovy_dens2d(nu.exp(like).T, origin='lower', xrange=[options.vmin,options.vmax], yrange=[options.betamin,options.betamax], aspect=(options.vmax-options.vmin)/\ (options.betamax-options.betamin), cmap='gist_yarg', xlabel=r'$v_c / v_0$', ylabel=r'$\beta$', contours=True,cntrmass=True, levels=[0.682,0.954,0.997]) bovy_plot.bovy_text(r'$\sigma_R(R_0) = %4.2f \ v_0$' % options.so\ +'\n'+\ r'$l = %i^\circ$' % round(options.los), top_left=True) bovy_plot.bovy_end_print(options.plotfilename) else: like= nu.zeros(options.nvcirc) for ii in range(options.nvcirc): thislike= 0. for jj in range(ndata): thislike+= single_vlos_loglike(vcirc[ii],out[jj],dfc,options, options.los*_DEGTORAD) like[ii]= thislike like-= logsumexp(like)+m.log(vcirc[1]-vcirc[0]) #Calculate mean and sigma vcmean= nu.sum(vcirc*nu.exp(like)*(vcirc[1]-vcirc[0])) vc2mean= nu.sum(vcirc**2.*nu.exp(like)*(vcirc[1]-vcirc[0])) #Plot bovy_plot.bovy_print() bovy_plot.bovy_plot(vcirc,nu.exp(like),'k-',xlabel=r'$v_c / v_0$', ylabel=r'$p(\mathrm{data} | v_c)$') bovy_plot.bovy_text(r'$\langle v_c \rangle = %4.2f \ v_0$' % vcmean +'\n'+ r'$\sqrt{\langle v_c^2 \rangle - \langle v_c \rangle^2} = %4.2f \ v_0$' % (m.sqrt(vc2mean-vcmean**2.)) +'\n'+\ r'$\sigma_R(R_0) = %4.2f \ v_0$' % options.so+'\n'+\ r'$l = %i^\circ$' % round(options.los), top_left=True) bovy_plot.bovy_end_print(options.plotfilename)
def createFakeData(parser): options, args= parser.parse_args() if len(args) == 0: parser.print_help() return if os.path.exists(options.plotfile): print "Outfile "+options.plotfile+" exists ..." print "Returning ..." return None #Read the data numpy.random.seed(options.seed) print "Reading the data ..." data= readVclosData(postshutdown=options.postshutdown, fehcut=options.fehcut, cohort=options.cohort, lmin=options.lmin, bmax=options.bmax, validfeh=options.indivfeh, #if indivfeh, we need validfeh ak=True, cutmultiples=options.cutmultiples, jkmax=options.jkmax) #HACK indx= (data['J0MAG']-data['K0MAG'] < 0.5) data['J0MAG'][indx]= 0.5+data['K0MAG'][indx] #Set up the isochrone #Set up the isochrone if not options.isofile is None and os.path.exists(options.isofile): print "Loading the isochrone model ..." isofile= open(options.isofile,'rb') iso= pickle.load(isofile) if options.indivfeh: zs= pickle.load(isofile) elif options.varfeh: locl= pickle.load(isofile) isofile.close() else: print "Setting up the isochrone model ..." if options.indivfeh: #Load all isochrones iso= [] zs= numpy.arange(0.0005,0.03005,0.0005) for ii in range(len(zs)): iso.append(isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, Z=zs[ii])) elif options.varfeh: locs= list(set(data['LOCATION'])) iso= [] for ii in range(len(locs)): indx= (data['LOCATION'] == locs[ii]) locl= numpy.mean(data['GLON'][indx]*_DEGTORAD) iso.append(isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, marginalizefeh=True, glon=locl)) else: iso= isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z, expsfh=options.expsfh) if options.dwarf: iso= [iso, isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z, dwarf=True,expsfh=options.expsfh)] else: iso= [iso] if not options.isofile is None: isofile= open(options.isofile,'wb') pickle.dump(iso,isofile) if options.indivfeh: pickle.dump(zs,isofile) elif options.varfeh: pickle.dump(locl,isofile) isofile.close() df= None print "Pre-calculating isochrone distance prior ..." logpiso= numpy.zeros((len(data),_BINTEGRATENBINS)) ds= numpy.linspace(_BINTEGRATEDMIN,_BINTEGRATEDMAX, _BINTEGRATENBINS) dm= _dm(ds) for ii in range(len(data)): mh= data['H0MAG'][ii]-dm if options.indivfeh: #Find closest Z thisZ= isodist.FEH2Z(data[ii]['FEH']) indx= numpy.argmin((thisZ-zs)) logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+(data['J0MAG']-data['K0MAG'])[ii],mh) elif options.varfeh: #Find correct iso indx= (locl == data[ii]['LOCATION']) logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+(data['J0MAG']-data['K0MAG'])[ii],mh) else: logpiso[ii,:]= iso[0](numpy.zeros(_BINTEGRATENBINS) +(data['J0MAG']-data['K0MAG'])[ii],mh) if options.dwarf: logpisodwarf= numpy.zeros((len(data),_BINTEGRATENBINS)) dwarfds= numpy.linspace(_BINTEGRATEDMIN_DWARF,_BINTEGRATEDMAX_DWARF, _BINTEGRATENBINS) dm= _dm(dwarfds) for ii in range(len(data)): mh= data['H0MAG'][ii]-dm logpisodwarf[ii,:]= iso[1](numpy.zeros(_BINTEGRATENBINS) +(data['J0MAG']-data['K0MAG'])[ii],mh) else: logpisodwarf= None #Load initial parameters from file savefile= open(args[0],'rb') params= pickle.load(savefile) savefile.close() #Prep data l= data['GLON']*_DEGTORAD b= data['GLAT']*_DEGTORAD sinl= numpy.sin(l) cosl= numpy.cos(l) sinb= numpy.sin(b) cosb= numpy.cos(b) jk= data['J0MAG']-data['K0MAG'] jk[(jk < 0.5)]= 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL h= data['H0MAG'] #Re-sample vlos= numpy.linspace(-200.,200.,options.nvlos) pvlos= numpy.zeros((len(data),options.nvlos)) if options.dwarf: thislogpisodwarf= logpisodwarf else: thislogpisodwarf= None if not options.multi is None and options.multi > 1: thismulti= options.multi options.multi= 1 #To avoid conflict thispvlos= multi.parallel_map((lambda x: -mloglike(params, numpy.zeros(len(data))+vlos[x], l, b, jk, h, df,options, sinl, cosl, cosb, sinb, logpiso, thislogpisodwarf, True, None,None,None)), range(options.nvlos), numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),thismulti])) for jj in range(options.nvlos): pvlos[:,jj]= thispvlos[jj] else: for jj in range(options.nvlos): pvlos[:,jj]= -mloglike(params,numpy.zeros(len(data))+vlos[jj], l, b, jk, h, df,options, sinl, cosl, cosb, sinb, logpiso, thislogpisodwarf,True,None,None,None) """ for jj in range(options.nvlos): pvlos[:,jj]= -mloglike(params,numpy.zeros(len(data))+vlos[jj], l, b, jk, h, df,options, sinl, cosl, cosb, sinb, logpiso, thislogpisodwarf,True,None,None,None) """ for ii in range(len(data)): pvlos[ii,:]-= logsumexp(pvlos[ii,:]) pvlos[ii,:]= numpy.exp(pvlos[ii,:]) pvlos[ii,:]= numpy.cumsum(pvlos[ii,:]) pvlos[ii,:]/= pvlos[ii,-1] #Draw randindx= numpy.random.uniform() kk= 0 while pvlos[ii,kk] < randindx: kk+= 1 data['VHELIO'][ii]= vlos[kk] #Dump raw fitsio.write(options.plotfile,data,clobber=True)
def plot_distanceprior(parser): (options, args) = parser.parse_args() #Read the data print "Reading the data ..." data = readVclosData( postshutdown=options.postshutdown, fehcut=options.fehcut, cohort=options.cohort, lmin=options.lmin, bmax=options.bmax, ak=True, cutmultiples=options.cutmultiples, validfeh=options.indivfeh, #if indivfeh, we need validfeh jkmax=options.jkmax, datafilename=options.fakedata) l = data['GLON'] * _DEGTORAD b = data['GLAT'] * _DEGTORAD sinl = numpy.sin(l) cosl = numpy.cos(l) sinb = numpy.sin(b) cosb = numpy.cos(b) jk = data['J0MAG'] - data['K0MAG'] jk[(jk < 0.5)] = 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL h = data['H0MAG'] #Set up the isochrone if not options.isofile is None and os.path.exists(options.isofile): print "Loading the isochrone model ..." isofile = open(options.isofile, 'rb') iso = pickle.load(isofile) if options.indivfeh: zs = pickle.load(isofile) elif options.varfeh: locl = pickle.load(isofile) isofile.close() else: print "Setting up the isochrone model ..." if options.indivfeh: #Load all isochrones iso = [] zs = numpy.arange(0.0005, 0.03005, 0.0005) for ii in range(len(zs)): iso.append( isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, Z=zs[ii])) elif options.varfeh: locs = list(set(data['LOCATION'])) iso = [] for ii in range(len(locs)): indx = (data['LOCATION'] == locs[ii]) locl = numpy.mean(data['GLON'][indx] * _DEGTORAD) iso.append( isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, marginalizefeh=True, glon=locl)) else: iso = isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, expsfh=options.expsfh) #Set up polar grid res = 51 xgrid = numpy.linspace(0., 2. * math.pi * (1. - 1. / res / 2.), 2 * res) ygrid = numpy.linspace(0.5, 2.8, res) plotxgrid = numpy.linspace(xgrid[0] - (xgrid[1] - xgrid[0]) / 2., xgrid[-1] + (xgrid[1] - xgrid[0]) / 2., len(xgrid) + 1) plotygrid = numpy.linspace(ygrid[0] - (ygrid[1] - ygrid[0]) / 2., ygrid[-1] + (ygrid[1] - ygrid[0]) / 2., len(ygrid) + 1) plotthis = numpy.zeros((2 * res, res, len(data))) - numpy.finfo( numpy.dtype(numpy.float64)).max #_BINTEGRATENBINS= 11 #For quick testing ds = numpy.linspace(_BINTEGRATEDMIN, _BINTEGRATEDMAX, _BINTEGRATENBINS) logpiso = numpy.zeros((len(data), _BINTEGRATENBINS)) dm = _dm(ds) for ii in range(len(data)): mh = h[ii] - dm if options.indivfeh: #Find closest Z thisZ = isodist.FEH2Z(data[ii]['FEH']) indx = numpy.argmin(numpy.fabs(thisZ - zs)) logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + jk[ii], mh) elif options.varfeh: #Find correct iso indx = (locl == data[ii]['LOCATION']) logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + jk[ii], mh) else: logpiso[ii, :] = iso(numpy.zeros(_BINTEGRATENBINS) + jk[ii], mh) for jj in range(_BINTEGRATENBINS): d = ds[jj] / _REFR0 R = numpy.sqrt(1. + d**2. - 2. * d * cosl) indx = (R == 0.) R[indx] += 0.0001 theta = numpy.arcsin(d / R * sinl) indx = (1. / cosl < d) * (cosl > 0.) theta[indx] = numpy.pi - theta[indx] indx = (theta < 0.) theta[indx] += 2. * math.pi thisout = _logpd([0., 1.], d, None, None, None, None, None, options, R, theta, 1., 0., logpiso[:, jj]) #Find bin to which these contribute thetabin = numpy.floor((theta - xgrid[0]) / (xgrid[1] - xgrid[0]) + 0.5) Rbin = numpy.floor((R - plotygrid[0]) / (plotygrid[1] - plotygrid[0])) indx = (thetabin < 0) thetabin[indx] = 0 Rbin[indx] = 0 thisout[indx] = -numpy.finfo(numpy.dtype(numpy.float64)).max indx = (thetabin >= 2 * res) thetabin[indx] = 0. #Has to be #Rbin[indx]= 0 #thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max indx = (Rbin < 0) thetabin[indx] = 0 Rbin[indx] = 0 thisout[indx] = -numpy.finfo(numpy.dtype(numpy.float64)).max indx = (Rbin >= res) thetabin[indx] = 0 Rbin[indx] = 0 thisout[indx] = -numpy.finfo(numpy.dtype(numpy.float64)).max thetabin = thetabin.astype('int') Rbin = Rbin.astype('int') for ii in range(len(data)): plotthis[thetabin, Rbin, ii] = thisout[ii] #Normalize for ii in range(2 * res): for jj in range(res): plotthis[ii, jj, 0] = logsumexp(plotthis[ii, jj, :]) plotthis = plotthis[:, :, 0] plotthis -= numpy.amax(plotthis) plotthis = numpy.exp(plotthis) plotthis[(plotthis == 0.)] = numpy.nan #Get los locations = list(set(data['LOCATION'])) nlocs = len(locations) l_plate = numpy.zeros(nlocs) for ii in range(nlocs): indx = (data['LOCATION'] == locations[ii]) l_plate[ii] = numpy.mean(data['GLON'][indx]) bovy_plot.bovy_print() ax = pyplot.subplot(111, projection='galpolar') #galpolar is in bovy_plot vmin, vmax = 0., 1. out = ax.pcolor(plotxgrid, plotygrid, plotthis.T, cmap='gist_yarg', vmin=vmin, vmax=vmax, zorder=2) #Overlay los for ii in range(nlocs): lds = numpy.linspace(0., 2.95, 501) lt = numpy.zeros(len(lds)) lr = numpy.zeros(len(lds)) lr = numpy.sqrt(1. + lds**2. - 2. * lds * numpy.cos(l_plate[ii] * _DEGTORAD)) lt = numpy.arcsin(lds / lr * numpy.sin(l_plate[ii] * _DEGTORAD)) indx = (1. / numpy.cos(l_plate[ii] * _DEGTORAD) < lds) * (numpy.cos( l_plate[ii] * _DEGTORAD) > 0.) lt[indx] = numpy.pi - lt[indx] ax.plot(lt, lr, ls='--', color='w', zorder=3) from matplotlib.patches import Arrow, FancyArrowPatch arr = FancyArrowPatch(posA=(-math.pi / 2., 1.8), posB=(-math.pi / 4., 1.8), arrowstyle='->', connectionstyle='arc3,rad=%4.2f' % (-math.pi / 16.), shrinkA=2.0, shrinkB=2.0, mutation_scale=20.0, mutation_aspect=None, fc='k') ax.add_patch(arr) bovy_plot.bovy_text(-math.pi / 2., 1.97, r'$\mathrm{Galactic\ rotation}$', rotation=-22.5) radii = numpy.array([0.5, 1., 1.5, 2., 2.5]) labels = [] for r in radii: ax.plot(numpy.linspace( 0., 2. * math.pi, 501, ), numpy.zeros(501) + r, ls='-', color='0.65', zorder=1, lw=0.5) labels.append(r'$%i$' % int(r * 8.)) pyplot.rgrids(radii, labels=labels, angle=-32.5) bovy_plot.bovy_text(5.785, 2.82, r'$\mathrm{kpc}$') azs = numpy.array([0., 45., 90., 135., 180., 225., 270., 315.]) * _DEGTORAD for az in azs: ax.plot(numpy.zeros(501) + az, numpy.linspace(0., 2.8, 501), '-', color='0.6', lw=0.5, zorder=1) #Sun bovy_plot.bovy_text(0.065, .9075, r'$\odot$') pyplot.ylim(0., 2.8) bovy_plot.bovy_end_print(options.plotfile)
def plot_bestfit(parser): (options, args) = parser.parse_args() if len(args) == 0 or options.plotfilename is None: parser.print_help() return #Read the data print "Reading the data ..." data = readVclosData( postshutdown=options.postshutdown, fehcut=options.fehcut, cohort=options.cohort, lmin=options.lmin, bmax=options.bmax, ak=True, cutmultiples=options.cutmultiples, validfeh=options.indivfeh, #if indivfeh, we need validfeh jkmax=options.jkmax, datafilename=options.fakedata) #HACK indx = (data['J0MAG'] - data['K0MAG'] < 0.5) data['J0MAG'][indx] = 0.5 + data['K0MAG'][indx] #Cut inner disk locations #data= data[(data['GLON'] > 75.)] #Cut outliers #data= data[(data['VHELIO'] < 200.)*(data['VHELIO'] > -200.)] print "Using %i data points ..." % len(data) #Set up the isochrone if not options.isofile is None and os.path.exists(options.isofile): print "Loading the isochrone model ..." isofile = open(options.isofile, 'rb') iso = pickle.load(isofile) if options.indivfeh: zs = pickle.load(isofile) if options.varfeh: locl = pickle.load(isofile) isofile.close() else: print "Setting up the isochrone model ..." if options.indivfeh: #Load all isochrones iso = [] zs = numpy.arange(0.0005, 0.03005, 0.0005) for ii in range(len(zs)): iso.append( isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, Z=zs[ii])) elif options.varfeh: locs = list(set(data['LOCATION'])) iso = [] for ii in range(len(locs)): indx = (data['LOCATION'] == locs[ii]) locl = numpy.mean(data['GLON'][indx] * _DEGTORAD) iso.append( isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, marginalizefeh=True, glon=locl)) else: iso = isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, expsfh=options.expsfh) if options.dwarf: iso = [ iso, isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, dwarf=True, expsfh=options.expsfh) ] else: iso = [iso] if not options.isofile is None: isofile = open(options.isofile, 'wb') pickle.dump(iso, isofile) if options.indivfeh: pickle.dump(zs, isofile) elif options.varfeh: pickle.dump(locl, isofile) isofile.close() df = None print "Pre-calculating isochrone distance prior ..." logpiso = numpy.zeros((len(data), _BINTEGRATENBINS)) ds = numpy.linspace(_BINTEGRATEDMIN, _BINTEGRATEDMAX, _BINTEGRATENBINS) dm = _dm(ds) for ii in range(len(data)): mh = data['H0MAG'][ii] - dm if options.indivfeh: #Find closest Z thisZ = isodist.FEH2Z(data[ii]['FEH']) indx = numpy.argmin(numpy.fabs(thisZ - zs)) logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + (data['J0MAG'] - data['K0MAG'])[ii], mh) elif options.varfeh: #Find correct iso indx = (locl == data[ii]['LOCATION']) logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + (data['J0MAG'] - data['K0MAG'])[ii], mh) else: logpiso[ii, :] = iso[0](numpy.zeros(_BINTEGRATENBINS) + (data['J0MAG'] - data['K0MAG'])[ii], mh) if options.dwarf: logpisodwarf = numpy.zeros((len(data), _BINTEGRATENBINS)) dwarfds = numpy.linspace(_BINTEGRATEDMIN_DWARF, _BINTEGRATEDMAX_DWARF, _BINTEGRATENBINS) dm = _dm(dwarfds) for ii in range(len(data)): mh = data['H0MAG'][ii] - dm logpisodwarf[ii, :] = iso[1](numpy.zeros(_BINTEGRATENBINS) + (data['J0MAG'] - data['K0MAG'])[ii], mh) else: logpisodwarf = None #Calculate data means etc. #Calculate means locations = list(set(data['LOCATION'])) nlocs = len(locations) l_plate = numpy.zeros(nlocs) avg_plate = numpy.zeros(nlocs) sig_plate = numpy.zeros(nlocs) siga_plate = numpy.zeros(nlocs) sigerr_plate = numpy.zeros(nlocs) for ii in range(nlocs): indx = (data['LOCATION'] == locations[ii]) l_plate[ii] = numpy.mean(data['GLON'][indx]) avg_plate[ii] = numpy.mean(data['VHELIO'][indx]) sig_plate[ii] = numpy.std(data['VHELIO'][indx]) siga_plate[ii] = numpy.std(data['VHELIO'][indx]) / numpy.sqrt( numpy.sum(indx)) sigerr_plate[ii] = bootstrap_sigerr(data['VHELIO'][indx]) #Calculate plate means and variances from the model #Load initial parameters from file savefile = open(args[0], 'rb') params = pickle.load(savefile) if not options.index is None: params = params[options.index] savefile.close() #params[0]= 245./235. #params[1]= 8.5/8. avg_plate_model = numpy.zeros(nlocs) sig_plate_model = numpy.zeros(nlocs) for ii in range(nlocs): #Calculate vlos | los indx = (data['LOCATION'] == locations[ii]) thesedata = data[indx] thislogpiso = logpiso[indx, :] if options.dwarf: thislogpisodwarf = logpisodwarf[indx, :] else: thislogpisodwarf = None vlos = numpy.linspace(-200., 200., options.nvlos) pvlos = numpy.zeros(options.nvlos) if not options.multi is None: pvlos = multi.parallel_map( (lambda x: pvlosplate(params, vlos[x], thesedata, df, options, thislogpiso, thislogpisodwarf, iso)), range(options.nvlos), numcores=numpy.amin( [len(vlos), multiprocessing.cpu_count(), options.multi])) else: for jj in range(options.nvlos): print jj pvlos[jj] = pvlosplate(params, vlos[jj], thesedata, df, options, thislogpiso, thislogpisodwarf, iso) pvlos -= logsumexp(pvlos) pvlos = numpy.exp(pvlos) #Calculate mean and velocity dispersion avg_plate_model[ii] = numpy.sum(vlos * pvlos) sig_plate_model[ii]= numpy.sqrt(numpy.sum(vlos**2.*pvlos)\ -avg_plate_model[ii]**2.) #Plot everything left, bottom, width, height = 0.1, 0.4, 0.8, 0.5 axTop = pyplot.axes([left, bottom, width, height]) left, bottom, width, height = 0.1, 0.1, 0.8, 0.3 axMean = pyplot.axes([left, bottom, width, height]) #left, bottom, width, height= 0.1, 0.1, 0.8, 0.2 #axSig= pyplot.axes([left,bottom,width,height]) fig = pyplot.gcf() fig.sca(axTop) pyplot.ylabel(r'$\mathrm{Heliocentric\ velocity}\ [\mathrm{km\ s}^{-1}]$') pyplot.xlabel(r'$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$') pyplot.xlim(0., 360.) pyplot.ylim(-200., 200.) nullfmt = NullFormatter() # no labels axTop.xaxis.set_major_formatter(nullfmt) bovy_plot.bovy_plot(data['GLON'], data['VHELIO'], 'k,', yrange=[-200., 200.], xrange=[0., 360.], overplot=True) ndata_t = int(math.floor(len(data) / 1000.)) ndata_h = len(data) - ndata_t * 1000 bovy_plot.bovy_plot(l_plate, avg_plate, 'o', overplot=True, mfc='0.5', mec='none') bovy_plot.bovy_plot(l_plate, avg_plate_model, 'x', overplot=True, ms=10., mew=1.5, color='0.7') #Legend bovy_plot.bovy_plot([260.], [150.], 'k,', overplot=True) bovy_plot.bovy_plot([260.], [120.], 'o', mfc='0.5', mec='none', overplot=True) bovy_plot.bovy_plot([260.], [90.], 'x', ms=10., mew=1.5, color='0.7', overplot=True) bovy_plot.bovy_text(270., 145., r'$\mathrm{data}$') bovy_plot.bovy_text(270., 115., r'$\mathrm{data\ mean}$') bovy_plot.bovy_text(270., 85., r'$\mathrm{model\ mean}$') bovy_plot._add_ticks() #Now plot the difference fig.sca(axMean) bovy_plot.bovy_plot([0., 360.], [0., 0.], '-', color='0.5', overplot=True) bovy_plot.bovy_plot(l_plate, avg_plate - avg_plate_model, 'ko', overplot=True) pyplot.errorbar(l_plate, avg_plate - avg_plate_model, yerr=siga_plate, marker='o', color='k', linestyle='none', elinestyle='-') pyplot.ylabel(r'$\bar{V}_{\mathrm{data}}-\bar{V}_{\mathrm{model}}$') pyplot.ylim(-14.5, 14.5) pyplot.xlim(0., 360.) bovy_plot._add_ticks() #axMean.xaxis.set_major_formatter(nullfmt) pyplot.xlabel(r'$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$') pyplot.xlim(0., 360.) bovy_plot._add_ticks() #Save bovy_plot.bovy_end_print(options.plotfilename) return None #Sigma fig.sca(axSig) pyplot.plot([0., 360.], [1., 1.], '-', color='0.5') bovy_plot.bovy_plot(l_plate, sig_plate / sig_plate_model, 'ko', overplot=True) pyplot.errorbar(l_plate, sig_plate / sig_plate_model, yerr=sigerr_plate / sig_plate_model, marker='o', color='k', linestyle='none', elinestyle='-') pyplot.ylabel( r'$\sigma_{\mathrm{los}}^{\mathrm{data}}/ \sigma_{\mathrm{los}}^{\mathrm{model}}$' ) pyplot.ylim(0.5, 1.5)