def contour_transform(x0, x1, joint_pdf): """ Convert a 2D likelihood contour from modified to original space along with marginalized statistics ** THERE IS A BUG IN THIS CODE - DON'T DO SPLINE INTERPOLATION OVER CHI2 CONTOUTS, INSTEAD DO THEM OVER THE LIKELIHOOD SPACE ** Paramters: x0 : grid points in first dimension x1 : grid points in second dimension joint_pdf : posterior probability over the grid """ mu_x0, sig_x0, mu_x1, sig_x1 = marg_estimates(x0, x1, joint_pdf) # Convert the convert to original space corners = np.array([[mu_x0 - 5 * sig_x0, mu_x1 - 5 * sig_x1], [mu_x0 - 5 * sig_x0, mu_x1 + 5 * sig_x1], [mu_x0 + 5 * sig_x0, mu_x1 - 5 * sig_x1], [mu_x0 + 5 * sig_x0, mu_x1 + 5 * sig_x1]]) extents = xfm(corners, shift, tilt, dir='down') extent_t0 = [extents[:, 0].min(), extents[:, 0].max()] extent_gamma = [extents[:, 1].min(), extents[:, 1].max()] # suitable ranges for spline interpolation in modified space range_stats = np.array([ mu_x0 - 5 * sig_x0, mu_x0 + 5 * sig_x0, mu_x1 - 5 * sig_x1, mu_x1 + 5 * sig_x1 ]) x0_line, x1_line = x0[:, 0], x1[0] mask_x0 = np.where((x0_line > range_stats[0]) & (x0_line < range_stats[1]))[0] mask_x1 = np.where((x1_line > range_stats[2]) & (x1_line < range_stats[3]))[0] # create a rectbivariate spline in the modified space _b = RectBivariateSpline(x0_line[mask_x0], x1_line[mask_x1], cts(joint_pdf[mask_x0[:, None], mask_x1])) # Rectangular grid in original space tau0, gamma = np.mgrid[extent_t0[0]:extent_t0[1]:250j, extent_gamma[0]:extent_gamma[1]:250j] _point_orig = np.vstack([tau0.ravel(), gamma.ravel()]).T _grid_in_mod = xfm(_point_orig, shift, tilt, dir='up') values_orig = _b.ev(_grid_in_mod[:, 0], _grid_in_mod[:, 1]) values_orig = values_orig.reshape(tau0.shape) return tau0, gamma, values_orig
def addLogs(fname, npix=200, sfx_lst=None, mod_ax=None, get_est=False, basis='orig', orig_ax=None, orig_space=True, mycolor='k', plotit=False, mylabel='temp', ls='solid', individual=False, save=True, model=False, force=False, plot_marg=True, **kwargs): """ Plots the log-likelihood surface for each skewer in a given folder Parameters: ----------- fname : the path to the folder containing the files npix : # of grid points in modified space suffix_list : indices of the skewers to plot, None for all mod_ax : axes over which to draw the contours in modified space orig_ax : axes over which to draw the contours in original space orig_space : do conversions to original space? mycolor : edgecolor of the joint pdf contour (JPC) mylabel : label of the JPC ls : linestyle of JPC individual : whether to draw contours for individual skewers Returns: -------- None """ import glob import os from scipy.interpolate import RectBivariateSpline if not os.path.exists(fname): print('Oops! There is no such folder') return None currdir = os.getcwd() os.chdir(fname) try: if get_est: if basis == "orig": n_dim = 3 e_lst = glob.glob('tg_est*') labels = [r"$f_0$", r"$\ln \tau_0$", r"$\gamma$"] else: n_dim = 2 e_lst = glob.glob('xx_est*') labels = [r"$x_0$", r"$x_1$"] sfx = [] # 2. pull the names from the files and read the data e_cube = np.empty((len(e_lst), n_dim, 3)) for ct, ele in enumerate(e_lst): temp = str.split(ele, '_') sfx.append(int(temp[2][:-4])) e_cube[ct] = np.loadtxt(ele) # Sort the data according to the skewer index e_cube = np.array([ele for _, ele in sorted(zip(sfx, e_cube))]) sfx = np.array(sfx) sfx.sort() # Plotting if plotit: fig, axs = plt.subplots(nrows=n_dim, sharex=True, figsize=(9, 5)) for i in range(n_dim): axs[i].errorbar(sfx, e_cube[:, i, 0], yerr=[e_cube[:, i, 2], e_cube[:, i, 1]], fmt='.-', color='k', lw=0.6) axs[i].set_ylabel(labels[i]) plt.tight_layout() plt.show() # Best-fit after modeling correlation matrix results = None if model: # Cannot handle assymetric errorbars - so take the average err0 = (e_cube[:, -2, 1] + e_cube[:, -2, 2]) / 2. res0 = utils.get_corrfunc(e_cube[:, -2, 0], err0, model=True, est=True, sfx="x0_corr", scale_factor=2.24, viz=False) print("W/o correlations: %.5f pm %.5f" % (res0[0], res0[1])) print("With correlations: %.5f pm %.5f" % (res0[2], res0[3])) err1 = (e_cube[:, -1, 1] + e_cube[:, -1, 2]) / 2. res1 = utils.get_corrfunc(e_cube[:, -1, 0], err1, model=True, est=True, sfx="x1_corr", scale_factor=3.4, viz=False) print("W/o correlations: %.5f pm %.5f" % (res1[0], res1[1])) print("With correlations: %.5f pm %.5f" % (res1[2], res1[3])) results = [res0, res1] os.chdir(currdir) return sfx, e_cube, results # Joint PDF from the combined likelihoods # Read data from the files if not os.path.isfile('joint_pdf.dat') or force: f_lst = glob.glob('gridlnlike_*') d_cube = np.empty((len(f_lst), npix, npix)) # Read the skewer number from file itself for now sfx = [] for ct, ele in enumerate(f_lst): d_cube[ct] = np.loadtxt(ele) temp = str.split(ele, '_') sfx.append(int(temp[1][:-4])) # sort the data for visualization d_cube = np.array([ele for _, ele in sorted(zip(sfx, d_cube))]) sfx = np.array(sfx) sfx.sort() # choose a specific subset of the skewers if sfx_lst is not None: ind = [(ele in sfx_lst) for ele in sfx] d_cube = d_cube[ind] sfx = sfx[ind] # joint pdf ####################################################### joint_pdf = d_cube.sum(0) joint_pdf -= joint_pdf.max() if save: np.savetxt('joint_pdf.dat', joint_pdf) else: print("****** File already exists. Reading from it *******") joint_pdf = np.loadtxt('joint_pdf.dat') # simple point statistics in modified space if mod_ax is None: fig, mod_ax = plt.subplots(1) print("Modified space estimates:") res = utils.marg_estimates(x0_line, x1_line, joint_pdf, mod_ax, plot_marg, labels=["x_0", "x_1"], **kwargs) mu_x0, sig_x0, mu_x1, sig_x1, _ = res # Plotting individual + joint contour in likelihood space if individual: colormap = plt.cm.rainbow colors = [colormap(i) for i in np.linspace(0, 1, len(sfx))] for i in range(len(sfx)): CS = mod_ax.contour(x0, x1, cts(d_cube[i]), levels=[ 0.68, ], colors=(colors[i], )) CS.collections[0].set_label(sfx[i]) mod_ax.legend(loc='upper center', ncol=6) mod_ax.set_xlabel('$x_0$') mod_ax.set_ylabel('$x_1$') # 1. Find the appropriate ranges in tau0-gamma space corners = np.array([[mu_x0 - 5 * sig_x0, mu_x1 - 5 * sig_x1], [mu_x0 - 5 * sig_x0, mu_x1 + 5 * sig_x1], [mu_x0 + 5 * sig_x0, mu_x1 - 5 * sig_x1], [mu_x0 + 5 * sig_x0, mu_x1 + 5 * sig_x1]]) extents = utils.xfm(corners, shift, tilt, direction='down') extent_t0 = [extents[:, 0].min(), extents[:, 0].max()] extent_gamma = [extents[:, 1].min(), extents[:, 1].max()] # suitable ranges for spline interpolation in modified space range_stats = np.array([ mu_x0 - 5 * sig_x0, mu_x0 + 5 * sig_x0, mu_x1 - 5 * sig_x1, mu_x1 + 5 * sig_x1 ]) mask_x0 = np.where((x0_line > range_stats[0]) & (x0_line < range_stats[1]))[0] mask_x1 = np.where((x1_line > range_stats[2]) & (x1_line < range_stats[3]))[0] # create a rectbivariate spline in the modified space logP _b = RectBivariateSpline(x0_line[mask_x0], x1_line[mask_x1], joint_pdf[mask_x0[:, None], mask_x1]) # Rectangular grid in original space _tau0, _gamma = np.mgrid[extent_t0[0]:extent_t0[1]:500j, extent_gamma[0]:extent_gamma[1]:501j] _point_orig = np.vstack([_tau0.ravel(), _gamma.ravel()]).T _grid_in_mod = utils.xfm(_point_orig, shift, tilt, direction='up') values_orig = _b.ev(_grid_in_mod[:, 0], _grid_in_mod[:, 1]) values_orig = values_orig.reshape(_tau0.shape) # Best fit + statistical errors print("Original space estimates:") if orig_ax is None: fig, orig_ax = plt.subplots(1) utils.marg_estimates(_tau0[:, 0], _gamma[0], values_orig, orig_ax, plot_marg, labels=[r"\ln \tau_0", "\gamma"], **kwargs) plt.show() os.chdir(currdir) return res except Exception: os.chdir(currdir) raise
def mcmcSkewer(bundleObj, logdef=3, binned=False, niter=2500, do_mcmc=True, return_sampler=False, evalgrid=True, in_axes=None, viz=False, VERBOSITY=False, seed=None, truths=[0.002, 3.8]): """ Script to fit simple flux model on each restframe wavelength skewer Parameters: ----------- bundleObj : A list of [z, f, ivar] with the skewer_index logdef : Which model to use niter : The number of iterations to run the mcmc (40% for burn-in) do_mcmc : Flag whether to perform mcmc plt_pts : Plot the data along with best fit from scipy and mcmc return_sampler : Whether to return the raw sampler without flatchaining triangle : Display triangle plot of the parameters evalgrid : Whether to compute loglikelihood on a specified grid in_axes : axes over which to draw the plots xx_viz : draw marginalized contour in modifed space VERBOSITY : print extra information seed : how to seed the random state truths : used with logdef=4, best-fit values of tau0 and gamma Returns: mcmc_chains if return_sampler, else None """ z, f, ivar = bundleObj[0].T ind = (ivar > 0) & (np.isfinite(f)) z, f, sigma = z[ind], f[ind], 1.0 / np.sqrt(ivar[ind]) # ------------------------------------------------------------------------- # continuum flux estimate given a value of (tau0, gamma) if logdef == 4: if VERBOSITY: print('Continuum estimates using optical depth parameters:', truths) chisq4 = lambda *args: -outer(*truths)(*args) opt_res = minimize(chisq4, 1.5, args=(z, f, sigma), method='Nelder-Mead') return opt_res['x'] if VERBOSITY: print('Carrying analysis for skewer', bundleObj[1]) if logdef == 1: nll, names, labels, guess = chisq1, names1, labels1, guess1 ndim, kranges, lnlike = 4, kranges1, lnlike1 elif logdef == 2: nll, names, labels, guess = chisq2, names2, labels2, guess2 ndim, kranges, lnlike = 5, kranges2, lnlike2 elif logdef == 3: nll, names, labels, guess = chisq3, names3, labels3, guess3 ndim, kranges, lnlike = 3, kranges3, lnlike3 # Try to fit with scipy optimize routine opt_res = minimize(nll, guess, args=(z, f, sigma), method='Nelder-Mead') print('Scipy optimize results:') print('Success =', opt_res['success'], 'params =', opt_res['x'], '\n') if viz: if in_axes is None: fig, in_axes = plt.subplots(1) in_axes.errorbar(z, f, sigma, fmt='o', color='gray', alpha=0.2) in_axes.plot( zline, opt_res['x'][0] * np.exp(-np.exp(opt_res['x'][1]) * (1 + zline)**opt_res['x'][2])) if binned: mu = binned_statistic(z, f, bins=binx).statistic sig = binned_statistic(z, f, bins=binx, statistic=sig_func).statistic ixs = sig > 0 z, f, sigma = centers[ixs], mu[ixs], sig[ixs] if viz: in_axes.errorbar(z, f, sigma, fmt='o', color='r') nll, names, labels, guess = lsq, names3, labels3, guess3 ndim, kranges, lnlike = 3, kranges3, simpleln # -------------------------------------------------------------------------- if do_mcmc: np.random.seed() nwalkers = 100 p0 = [guess + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)] # configure the sampler sampler = emcee.EnsembleSampler(nwalkers, ndim, lnlike, args=(z, f, sigma)) # burn-in time - Is this enough? p0, __, __ = sampler.run_mcmc(p0, 500) sampler.reset() # Production step sampler.run_mcmc(p0, niter) print("Burn-in and production completed \n") if return_sampler: return sampler.chain else: # pruning 40 percent of the samples as extra burn-in lInd = int(niter * 0.4) samps = sampler.chain[:, lInd:, :].reshape((-1, ndim)) # using percentiles as confidence intervals CenVal = np.median(samps, axis=0) # print BIC at the best estimate point, BIC = - 2 * ln(L_0) + k ln(n) print('CHISQ_R', -2 * lnlike(CenVal, z, f, sigma) / (len(z) - 3)) print('BIC:', -2 * lnlike(CenVal, z, f, sigma) + ndim * np.log(len(z))) # Rotate the points to the other basis and 1D estimates # and write them to the file # Format : center, top error, bottom error tg_est = list( map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samps, [16, 50, 84], axis=0)))) xx = xfm(samps[:, 1:], shift, tilt, direction='up') xx_est = list( map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(xx, [16, 50, 84], axis=0)))) f_name2 = 'tg_est_' + str(bundleObj[1]) + '.dat' np.savetxt(f_name2, tg_est) f_name3 = 'xx_est_' + str(bundleObj[1]) + '.dat' np.savetxt(f_name3, xx_est) if viz: in_axes.plot( zline, CenVal[0] * np.exp(-np.exp(CenVal[1]) * (1 + zline)**CenVal[2]), '-g') # instantiate a getdist object MC = MCSamples(samples=samps, names=names, labels=labels, ranges=kranges) # MODIFY THIS TO BE PRETTIER if viz: g = plots.getSubplotPlotter() g.triangle_plot(MC) # Evaluate the pdf on a rotated grid for better estimation if evalgrid: print('Evaluating on the grid specified \n') pdist = MC.get2DDensity('t0', 'gamma') # Evalaute density on a grid pgrid = np.array([pdist.Prob(*ele) for ele in modPos]) # Prune to remove negative densities pgrid[pgrid < 0] = 1e-50 # Convert to logLikelihood logP = np.log(pgrid) logP -= logP.max() logP = logP.reshape(x0.shape) # Visualize the contour in modified space per skewer if viz: fig, ax2 = plt.subplots(1) ax2.contour(x0, x1, cts(logP), levels=[ 0.683, 0.955, ], colors='k') ax2.axvline(xx_est[0][0] + xx_est[0][1]) ax2.axvline(xx_est[0][0] - xx_est[0][2]) ax2.axhline(xx_est[1][0] + xx_est[1][1]) ax2.axhline(xx_est[1][0] - xx_est[1][2]) ax2.set_xlabel(r'$x_0$') ax2.set_ylabel(r'$x_1$') plt.show() # fileName1: the log-probability evaluated in the tilted grid f_name1 = 'gridlnlike_' + str(bundleObj[1]) + '.dat' np.savetxt(f_name1, logP)
def marg_estimates(xx, yy, logL, levels=None, par_labels=["x_0", "x_1"], ax=None, plot_marg=True, label='temp', **kwargs): """ Marginalized statistics that follows from a jont likelihood. Simple mean and standard deviation estimates. Parameters: x0 : vector in x-direction of the grid x1 : vector in y-direction of the grid joint_pdf : posterior log probability on the 2D grid Returns: [loc_x0, sig_x0, loc_x1, sig_x1, sig_x0_x1] """ if levels is None: levels = [0.683, 0.955] pdf = np.exp(logL) # normalize the pdf too --> though not necessary for # getting mean and the standard deviation x0_pdf = np.sum(pdf, axis=1) x0_pdf /= x0_pdf.sum() * (xx[1] - xx[0]) x1_pdf = np.sum(pdf, axis=0) x1_pdf /= x1_pdf.sum() * (yy[1] - yy[0]) mu_x0 = (xx * x0_pdf).sum() / x0_pdf.sum() mu_x1 = (yy * x1_pdf).sum() / x1_pdf.sum() sig_x0 = np.sqrt((xx ** 2 * x0_pdf).sum() / x0_pdf.sum() - mu_x0 ** 2) sig_x1 = np.sqrt((yy ** 2 * x1_pdf).sum() / x1_pdf.sum() - mu_x1 ** 2) sig_x0_x1 = ((xx - mu_x0) * (yy[:, None] - mu_x1) * pdf.T).sum() / pdf.sum() print("param1 = %.4f pm %.4f" % (mu_x0, sig_x0)) print("param2 = %.4f pm %.4f\n" % (mu_x1, sig_x1)) if ax is None: ax = plt.axes() CS = ax.contour(xx, yy, cts(logL.T), levels=levels, label=label, **kwargs) CS.collections[0].set_label(label) ax.set_xlim(mu_x0 - 4 * sig_x0, mu_x0 + 4 * sig_x0) ax.set_ylim(mu_x1 - 4 * sig_x1, mu_x1 + 4 * sig_x1) if plot_marg: xx_extent = 8 * sig_x0 yy_extent = 8 * sig_x1 pdf_xx_ext = x0_pdf.max() - x0_pdf.min() pdf_yy_ext = x1_pdf.max() - x1_pdf.min() ax.plot(xx, 0.2 * (x0_pdf - x0_pdf.min()) * yy_extent / pdf_xx_ext + ax.get_ylim()[0]) ax.axvline(mu_x0 - sig_x0) ax.axvline(mu_x0 + sig_x0) ax.plot(0.2 * (x1_pdf - x1_pdf.min()) * xx_extent / pdf_yy_ext + ax.get_xlim()[0], yy) ax.axhline(mu_x1 - sig_x1) ax.axhline(mu_x1 + sig_x1) plt.title(r"$%s = %.3f \pm %.3f, %s = %.3f \pm %.3f$" % (par_labels[0], mu_x0, sig_x0, par_labels[1], mu_x1, sig_x1)) plt.legend() plt.tight_layout() plt.show() return mu_x0, sig_x0, mu_x1, sig_x1, sig_x0_x1
def marg_estimates(x0_line, x1_line, joint_pdf, ax=None, plot_marg=True, labels=None): """ Marginalized statistics that follows from a jont likelihood. Simple mean and standard deviation estimates. Parameters: x0 : vector in x-direction of the grid x1 : vector in y-direction of the grid joint_pdf : posterior log probability on the 2D grid Returns: [loc_x0, sig_x0, loc_x1, sig_x1] """ x0_pdf = np.sum(np.exp(joint_pdf), axis=1) x0_pdf /= x0_pdf.sum() * (x0_line[1] - x0_line[0]) x1_pdf = np.sum(np.exp(joint_pdf), axis=0) x0_pdf /= x0_pdf.sum() * (x0_line[1] - x0_line[0]) mu_x0 = (x0_line * x0_pdf).sum() / x0_pdf.sum() mu_x1 = (x1_line * x1_pdf).sum() / x1_pdf.sum() sig_x0 = np.sqrt((x0_line**2 * x0_pdf).sum() / x0_pdf.sum() - mu_x0**2) sig_x1 = np.sqrt((x1_line**2 * x1_pdf).sum() / x1_pdf.sum() - mu_x1**2) print("param1 = %.4f pm %.4f" % (mu_x0, sig_x0)) print("param2 = %.4f pm %.4f\n" % (mu_x1, sig_x1)) if labels is None: labels = ["p0", "p1"] if ax is None: fig, ax = plt.subplots(1) ax.contour(x0_line, x1_line, cts(joint_pdf.T), colors=('k', ), levels=[0.668, 0.955]) ax.set_xlabel(labels[0]) ax.set_ylabel(labels[1]) ax.set_xlim(mu_x0 - 4 * sig_x0, mu_x0 + 4 * sig_x0) ax.set_ylim(mu_x1 - 4 * sig_x1, mu_x1 + 4 * sig_x1) if plot_marg: xx_extent = 8 * sig_x0 yy_extent = 8 * sig_x1 pdf_xx_ext = x0_pdf.max() - x0_pdf.min() pdf_yy_ext = x1_pdf.max() - x1_pdf.min() ax.plot( x0_line, 0.2 * (x0_pdf - x0_pdf.min()) * yy_extent / pdf_xx_ext + ax.get_ylim()[0]) ax.axvline(mu_x0 - sig_x0) ax.axvline(mu_x0 + sig_x0) ax.plot( 0.2 * (x1_pdf - x1_pdf.min()) * xx_extent / pdf_yy_ext + ax.get_xlim()[0], x1_line) ax.axhline(mu_x1 - sig_x1) ax.axhline(mu_x1 + sig_x1) plt.title(r"$%s = %.3f \pm %.3f, %s = %.3f \pm %.3f$" % (labels[0], mu_x0, sig_x0, labels[1], mu_x1, sig_x1)) plt.tight_layout() plt.show() return mu_x0, sig_x0, mu_x1, sig_x1