def test_megabeast_input(): """ Test that the megabeast input parameter file reading works. """ data_path = pkg_resources.resource_filename("megabeast", "examples/") tfilename = f"{data_path}/megabeast_input.txt" a = read_input(tfilename) assert isinstance(a, dict), "result should be a dictionary" for tparam in required_params: assert tparam in a.keys(), "required parameter not present in file"
def megabeast_image(megabeast_input_file, verbose=True): """ Run the MegaBEAST on an image of BEAST results. The BEAST results are given as spatially-reordered BEAST outputs with a file of lnp results for each pixel in the image. Parameters ---------- megabeast_input_file : string Name of the file that contains settings, filenames, etc verbose : boolean (default=True) print extra info """ # read in the settings from the file params = read_input(megabeast_input_file) # use nstars image to setup for each pixel nstars_image, nstars_header = fits.getdata(params["nstars_filename"], header=True) n_x, n_y = nstars_image.shape # read in the beast data that is needed by all the pixels beast_data = {} # - SED data beast_data.update( read_sed_data(params["beast_seds_filename"], param_list=["Av"])) # - max completeness beast_data.update( read_noise_data( params["beast_noise_filename"], param_list=["completeness"], )) # completeness from toothpick model so n band completeness values # require only 1 completeness value for each model # max picked (may not be correct) beast_data["completeness"] = np.max(beast_data["completeness"], axis=1) # BEAST prior model beast_pmodel = {} beast_pmodel["AV"] = params["av_prior_model"] beast_pmodel["RV"] = params["rv_prior_model"] beast_pmodel["fA"] = params["fA_prior_model"] # setup for output pixel_fit_status = np.full((n_x, n_y), False, dtype=bool) n_fit_params = len(params["fit_param_names"]) best_fit_images = np.zeros((n_x, n_y, n_fit_params), dtype=float) + np.nan # loop over the pixels with non-zero entries in the nstars image for i in trange(n_x, desc="x pixels"): for j in trange(n_y, desc="y pixels", leave=False): if nstars_image[i, j] >= params["min_for_fit"]: pixel_fit_status[i, j] = True # filename with saved BEAST posteriors lnp_prefix = params["lnp_file_prefix"] lnp_filename = f"{lnp_prefix}_{j}_{i}_lnp.hd5" best_fit_params = fit_ensemble( beast_data, lnp_filename, beast_pmodel, nstars_expected=nstars_image[i, j], ) best_fit_images[i, j, :] = best_fit_params # output results (* = future) # - best fit # - *megabeast parameter 1D pPDFs # - *MCMC chain # Write the maps to disk master_header = nstars_header # check that the directory exists dpath = "./%s_megabeast/" % (params["projectname"]) if not os.path.exists(dpath): os.makedirs(dpath) for k, cname in enumerate(params["fit_param_names"]): hdu = fits.PrimaryHDU(best_fit_images[:, :, k], header=master_header) hdu.writeto( "%s_megabeast/%s_%s_bestfit.fits" % (params["projectname"], params["projectname"], cname), overwrite=True, )
def plot_bestfit_parameter_maps(megabeast_input_file, n_col=2): """ Plot the best-fit parameters from the MegaBEAST Parameters ---------- megabeast_input_file : string Name of the file that contains settings, filenames, etc n_col : int (default=2) number of columns of plots in the output file """ # read in the settings from the file mb_settings = read_input(megabeast_input_file) # get the project name projectname = mb_settings["projectname"] # list of parameters to plot plot_params = mb_settings["fit_param_names"] # aspect ratio of the field with fits.open("./" + projectname + "_megabeast/" + projectname + "_" + plot_params[0] + "_bestfit.fits") as hdu: im_size = hdu[0].data.shape # initialize figure size = 4 n_row = len(plot_params) // n_col + np.count_nonzero( len(plot_params) % n_col) fig = plt.figure(figsize=(size * n_col, size * im_size[0] / im_size[1] * n_row)) for p, param in enumerate(plot_params): print(param) # image file name im_file = ("./" + projectname + "_megabeast/" + projectname + "_" + param + "_bestfit.fits") # subplot info # - current column and row # col_num = p % n_col # row_num = p // n_col # - corresponding dimensions # subplot_dimen = [1/n_col * col_num, 1-(1/n_row * (row_num+1)), # 1/n_col * (col_num+1), 1-(1/n_row * row_num)] # print('p='+str(p)+' col_num='+str(col_num)+' row_num='+str(row_num)) # print(subplot_dimen) # pdb.set_trace() f1 = aplpy.FITSFigure(im_file, figure=fig, subplot=(n_row, n_col, p + 1)) # f1 = aplpy.FITSFigure(im_file, figure=fig, subplot=subplot_dimen) f1.show_colorscale(cmap="magma") f1.add_colorbar() # f1.colorbar.set_box([0.12, 0.04, 0.33, 0.02], box_orientation='horizontal') # [xmin, ymin, dx, dy] f1.colorbar.set_font(size=10) f1.axis_labels.set_xtext(param) f1.axis_labels.set_font(size=15) # f1.axis_labels.hide_x() f1.axis_labels.hide_y() f1.tick_labels.hide_x() f1.tick_labels.hide_y() f1.frame.set_linewidth(0) plt.tight_layout() plt.savefig("./" + projectname + "_megabeast/" + projectname + "_bestfit_maps.pdf") plt.close()
def plot_input_data(megabeast_input_file, chi2_plot=[], log_scale=False): """ Parameters ---------- megabeast_input_file : string Name of the file that contains settings, filenames, etc chi2_plot : list of floats (default=[]) Make A_V histogram(s) with chi2 less than each of the values in this list log_scale : boolean (default=False) If True, make the histogram x-axis a log scale (to visualize log-normal A_V distribution) """ # read in the settings from the file mb_settings = read_input(megabeast_input_file) # get the project name projectname = mb_settings["projectname"] # read in the beast data that is needed by all the pixels beast_data = {} # - SED data beast_data.update( read_beast_data.read_sed_data( mb_settings["beast_seds_filename"], param_list=["Av"] # , "Rv", "f_A"] )) # - max completeness beast_data.update( read_beast_data.read_noise_data( mb_settings["beast_noise_filename"], param_list=["completeness"], )) beast_data["completeness"] = np.max(beast_data["completeness"], axis=1) # read in the nstars image nstars_image, nstars_header = fits.getdata(mb_settings["nstars_filename"], header=True) # dimensions of images/plotting y_dimen = nstars_image.shape[0] x_dimen = nstars_image.shape[1] # set up multi-page figure if not log_scale: pp = PdfPages("{0}_megabeast/plot_input_data.pdf".format(projectname)) if log_scale: pp = PdfPages( "{0}_megabeast/plot_input_data_log.pdf".format(projectname)) # save the best-fit A_V best_av = [[[] for j in range(x_dimen)] for i in range(y_dimen)] best_av_chi2 = [[[] for j in range(x_dimen)] for i in range(y_dimen)] # ----------------- # Completeness vs A_V # ----------------- print("") print("Making completeness/Av plot") print("") # set up figure plt.figure(figsize=(6, 6)) plt.subplot(1, 1, 1) for i in tqdm(range(y_dimen), desc="y pixels"): for j in tqdm(range(x_dimen), desc="x pixels"): # for i in tqdm(range(int(y_dimen/3)), desc='y pixels'): # for j in tqdm(range(int(x_dimen/3)), desc='x pixels'): # for i in [0]: # for j in [12]: if nstars_image[i, j] > 20: # get info about the fits lnp_filename = mb_settings[ "lnp_file_prefix"] + "_{0}_{1}_lnp.hd5".format(j, i) lnp_data = read_beast_data.read_lnp_data( lnp_filename, nstars=nstars_image[i, j], shift_lnp=True, ) # get the completeness and BEAST model parameters for the # same grid points as the sparse likelihoods lnp_grid_vals = read_beast_data.get_lnp_grid_vals( beast_data, lnp_data) # grab the things we want to plot plot_av = lnp_grid_vals["Av"] plot_comp = lnp_grid_vals["completeness"] for n in range(nstars_image[i, j]): # plot a random subset of the AVs and completenesses if (i % 3 == 0) and (j % 3 == 0): plot_these = np.random.choice(plot_av[:, n].size, size=20, replace=False) plt.plot( plot_av[plot_these, n] + np.random.normal(scale=0.02, size=plot_these.size), plot_comp[plot_these, n], marker=".", c="black", ms=3, mew=0, linestyle="None", alpha=0.05, ) # also overplot the values for the best fit max_ind = np.where(lnp_data["vals"][:, n] == np.max( lnp_data["vals"][:, n]))[0][0] best_av[i][j].append(plot_av[max_ind, n]) best_av_chi2[i][j].append(-2 * np.max(lnp_data["vals"][:, n])) if (i % 3 == 0) and (j % 3 == 0): plt.plot( plot_av[max_ind, n] + np.random.normal(scale=0.01), plot_comp[max_ind, n], marker=".", c="magenta", ms=2, mew=0, linestyle="None", alpha=0.3, zorder=9999, ) ax = plt.gca() ax.set_xlabel(r"$A_V$") ax.set_ylabel("Completeness") pp.savefig() # ----------------- # histograms of AVs # ----------------- print("") print("Making Av Histograms") print("") # set up figure plt.figure(figsize=(x_dimen * 2, y_dimen * 2)) # flat list of A_V # https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python flat_av = [i for sublist in best_av for item in sublist for i in item] # grab the max A_V of all of them # max_av = max(flat_av) # define bins if not log_scale: uniq_av = np.unique(flat_av) gap = np.min(np.diff(uniq_av)) bins = np.arange(uniq_av[0], uniq_av[-1], gap) if log_scale: uniq_av = np.unique(np.log10(flat_av)) gap = (uniq_av[-1] - uniq_av[0]) / len(uniq_av) bins = np.arange(uniq_av[0], uniq_av[-1], gap) for i in tqdm(range(y_dimen), desc="y pixels"): for j in tqdm(range(x_dimen), desc="x pixels"): # for i in [0]: # for j in [12]: if nstars_image[i, j] > 20: # set up the subplot plt.subplot(y_dimen, x_dimen, (y_dimen - i - 1) * (x_dimen) + j + 1) # make a histogram if best_av[i][j] != []: if not log_scale: plt.hist( best_av[i][j], bins=bins.size, range=(uniq_av[0] - gap / 2, uniq_av[-1] + gap / 2), facecolor="xkcd:azure", linewidth=0.25, edgecolor="xkcd:azure", ) if log_scale: plt.hist( np.log10(best_av[i][j]), bins=bins.size, range=(uniq_av[0] - gap / 2, uniq_av[-1] + gap / 2), facecolor="xkcd:azure", linewidth=0.25, edgecolor="xkcd:azure", ) # plt.xlim(xmax=max_av) plt.suptitle(r"Best-fit $A_V$ for each pixel", fontsize=40) pp.savefig() # ----------------- # histograms of AVs with a chi2 cut # ----------------- if len(chi2_plot) > 0: print("") print("Making Av Histograms with chi^2 cut") print("") for chi2_cut in chi2_plot: # set up figure plt.figure(figsize=(x_dimen * 2, y_dimen * 2)) for i in tqdm(range(y_dimen), desc="y pixels"): for j in tqdm(range(x_dimen), desc="x pixels"): # for i in [0]: # for j in [12]: if nstars_image[i, j] > 20: # set up the subplot plt.subplot(y_dimen, x_dimen, (y_dimen - i - 1) * (x_dimen) + j + 1) # make a histogram if best_av[i][j] != []: if not log_scale: plot_av = np.array(best_av[i][j])[ np.array(best_av_chi2[i][j]) < chi2_cut] if log_scale: plot_av = np.log10( np.array(best_av[i][j])[ np.array(best_av_chi2[i][j]) < chi2_cut]) if len(plot_av) != 0: plt.hist( plot_av, bins=bins.size, range=(uniq_av[0] - gap / 2, uniq_av[-1] + gap / 2), facecolor="xkcd:azure", linewidth=0.25, edgecolor="xkcd:azure", ) plt.suptitle( r"Best-fit $A_V$ for each pixel, but only using sources with $\chi^2 < $" + str(chi2_cut), fontsize=40, ) pp.savefig() # close PDF figure pp.close()
def simulate_av_plots( megabeast_input_file, log_scale=False, input_lognormal=None, input_lognormal2=None ): """ Plot distributions of simulated AVs, and overplot the best fit lognormals Parameters ---------- megabeast_input_file : string Name of the file that contains settings, filenames, etc log_scale : boolean (default=False) If True, make the histogram x-axis a log scale (to visualize log-normal A_V distribution) input_lognormal, input_lognormal2 : dict (default=None) Set these to the original values used to create the fake data, and they will also be plotted """ # read in the settings from the file mb_settings = read_input(megabeast_input_file) # get the project name projectname = mb_settings["projectname"] # read in the beast data that is needed by all the pixels # *** this likely needs updating - probably will fail - see megabeast.py beast_data = read_sed_data( mb_settings["beast_seds_filename"], mb_settings["beast_noise_filename"], beast_params=["completeness", "Av"], ) # ,'Rv','f_A']) av_grid = np.unique(beast_data["Av"]) # also make a more finely sampled A_V grid if not log_scale: av_grid_big = np.linspace(np.min(av_grid), np.max(av_grid), 500) else: av_grid_big = np.geomspace(np.min(av_grid), np.max(av_grid), 500) # read in the nstars image nstars_image, nstars_header = fits.getdata( mb_settings["nstars_filename"], header=True ) # dimensions of images/plotting y_dimen = nstars_image.shape[0] x_dimen = nstars_image.shape[1] # read in the best fits label_list = mb_settings["fit_param_names"] best_fits = {} for label in label_list: with fits.open( "./" + projectname + "_megabeast/" + projectname + "_" + label + "_bestfit.fits" ) as hdu: best_fits[label] = hdu[0].data # set colors for plots cmap = matplotlib.cm.get_cmap("inferno") color_data = cmap(0.0) color_fit = cmap(0.5) if input_lognormal is not None: color_input = cmap(0.85) # ----------------- # plotting # ----------------- # set up figure fig = plt.figure(figsize=(x_dimen * 2, y_dimen * 2)) for i in tqdm(range(y_dimen), desc="y pixels"): for j in tqdm(range(x_dimen), desc="x pixels"): # for i in [0]: # for j in [12]: if nstars_image[i, j] > 20: # -------- data # read in the original lnp data lnp_filename = mb_settings["lnp_file_prefix"] + "_%i_%i_lnp.hd5" % ( j, i, ) lnp_data = read_lnp_data(lnp_filename, nstars_image[i, j]) lnp_vals = np.array(lnp_data["vals"]) # completeness for each of the values lnp_comp = beast_data["completeness"][lnp_data["indxs"]] # best A_V for each star best_av = [] for k in range(lnp_vals.shape[1]): vals = lnp_vals[:, k] lnp_vals[:, k] = np.log(np.exp(vals) / np.sum(np.exp(vals))) inds = lnp_data["indxs"][:, k] best_val_ind = np.where(vals == np.max(vals))[0][0] best_av.append(beast_data["Av"][inds[best_val_ind]]) best_av = np.array(best_av) # stack up some representation of what's being maximized in ensemble_model.py prob_stack = np.sum(lnp_comp * np.exp(lnp_vals), axis=1) # normalize it (since it's not clear what the numbers mean anyway) # prob_stack = prob_stack / np.sum(prob_stack) prob_stack = prob_stack / np.trapz(prob_stack, av_grid) # stack up the probabilities at each A_V # prob_stack = np.sum(np.exp(lnp_vals), axis=1) # set up the subplot plt.subplot(y_dimen, x_dimen, (y_dimen - i - 1) * (x_dimen) + j + 1) # make a histogram if not log_scale: plt.plot( av_grid, prob_stack, marker=".", ms=0, mew=0, linestyle="-", color=color_data, linewidth=4, ) if log_scale: plt.plot( np.log10(av_grid), prob_stack, marker=".", ms=0, mew=0, linestyle="-", color=color_data, linewidth=4, ) ax = plt.gca() # -------- input lognormal(s) if input_lognormal is not None: # create lognormal lognorm = _lognorm( av_grid_big, input_lognormal["max_pos"], input_lognormal["sigma"], input_lognormal["N"], ) # if there's a second lognormal if input_lognormal2 is not None: lognorm += _lognorm( av_grid_big, input_lognormal2["max_pos"], input_lognormal2["sigma"], input_lognormal2["N"], ) # normalize it # lognorm = lognorm / np.sum(lognorm) lognorm = lognorm / np.trapz(lognorm, av_grid_big) # plot it # yrange_before = ax.get_ylim() if not log_scale: plt.plot( av_grid_big, lognorm, marker=".", ms=0, mew=0, linestyle="-", color=color_input, linewidth=2, alpha=0.85, ) if log_scale: plt.plot( np.log10(av_grid_big), lognorm, marker=".", ms=0, mew=0, linestyle="-", color=color_input, linewidth=2, alpha=0.85, ) # ax.set_ylim(yrange_before) # -------- best fit # generate best fit lognorm = _two_lognorm( av_grid_big, best_fits["Av1"][i, j], best_fits["Av2"][i, j], sigma1=best_fits["sigma1"][i, j], sigma2=best_fits["sigma2"][i, j], N1=nstars_image[i, j] * (1 - 1 / (best_fits["N12_ratio"][i, j] + 1)), N2=nstars_image[i, j] / (best_fits["N12_ratio"][i, j] + 1), ) # normalize it # lognorm = lognorm / nstars_image[i,j] # lognorm = lognorm / np.sum(lognorm) lognorm = lognorm / np.trapz(lognorm, av_grid_big) # plot it yrange_before = ax.get_ylim() if not log_scale: plt.plot( av_grid_big, lognorm, marker=".", ms=0, mew=0, dashes=[3, 1.5], color=color_fit, linewidth=2, ) if log_scale: plt.plot( np.log10(av_grid_big), lognorm, marker=".", ms=0, mew=0, dashes=[3, 1.5], color=color_fit, linewidth=2, ) ax.set_ylim(yrange_before) fig.add_subplot(111, frameon=False) plt.tick_params(labelcolor="none", top="off", bottom="off", left="off", right="off") plt.grid(False) if not log_scale: plt.xlabel(r"$A_V$", size=15) else: plt.xlabel(r"Log $A_V$", size=15) plt.ylabel("PDF", size=15) plt.tight_layout() # save figure plt.savefig("./" + projectname + "_megabeast/" + projectname + "_bestfit_plot.pdf") plt.close()