def simulate_av_plots( megabeast_input_file, log_scale=False, input_lognormal=None, input_lognormal2=None ): """ Plot distributions of simulated AVs, and overplot the best fit lognormals Parameters ---------- megabeast_input_file : string Name of the file that contains settings, filenames, etc log_scale : boolean (default=False) If True, make the histogram x-axis a log scale (to visualize log-normal A_V distribution) input_lognormal, input_lognormal2 : dict (default=None) Set these to the original values used to create the fake data, and they will also be plotted """ # read in the settings from the file mb_settings = read_megabeast_input(megabeast_input_file) # get the project name projectname = mb_settings["projectname"] # read in the beast data that is needed by all the pixels beast_data = read_beast_data( mb_settings["beast_seds_filename"], mb_settings["beast_noise_filename"], beast_params=["completeness", "Av"], ) # ,'Rv','f_A']) av_grid = np.unique(beast_data["Av"]) # also make a more finely sampled A_V grid if not log_scale: av_grid_big = np.linspace(np.min(av_grid), np.max(av_grid), 500) else: av_grid_big = np.geomspace(np.min(av_grid), np.max(av_grid), 500) # read in the nstars image nstars_image, nstars_header = fits.getdata( mb_settings["nstars_filename"], header=True ) # dimensions of images/plotting y_dimen = nstars_image.shape[0] x_dimen = nstars_image.shape[1] # read in the best fits label_list = mb_settings["fit_param_names"] best_fits = {} for label in label_list: with fits.open( "./" + projectname + "_megabeast/" + projectname + "_" + label + "_bestfit.fits" ) as hdu: best_fits[label] = hdu[0].data # set colors for plots cmap = matplotlib.cm.get_cmap("inferno") color_data = cmap(0.0) color_fit = cmap(0.5) if input_lognormal is not None: color_input = cmap(0.85) # ----------------- # plotting # ----------------- # set up figure fig = plt.figure(figsize=(x_dimen * 2, y_dimen * 2)) for i in tqdm(range(y_dimen), desc="y pixels"): for j in tqdm(range(x_dimen), desc="x pixels"): # for i in [0]: # for j in [12]: if nstars_image[i, j] > 20: # -------- data # read in the original lnp data lnp_filename = mb_settings["lnp_file_prefix"] + "_%i_%i_lnp.hd5" % ( j, i, ) lnp_data = read_lnp_data(lnp_filename, nstars_image[i, j]) lnp_vals = np.array(lnp_data["vals"]) # completeness for each of the values lnp_comp = beast_data["completeness"][lnp_data["indxs"]] # best A_V for each star best_av = [] for k in range(lnp_vals.shape[1]): vals = lnp_vals[:, k] lnp_vals[:, k] = np.log(np.exp(vals) / np.sum(np.exp(vals))) inds = lnp_data["indxs"][:, k] best_val_ind = np.where(vals == np.max(vals))[0][0] best_av.append(beast_data["Av"][inds[best_val_ind]]) best_av = np.array(best_av) # stack up some representation of what's being maximized in ensemble_model.py prob_stack = np.sum(lnp_comp * np.exp(lnp_vals), axis=1) # normalize it (since it's not clear what the numbers mean anyway) # prob_stack = prob_stack / np.sum(prob_stack) prob_stack = prob_stack / np.trapz(prob_stack, av_grid) # stack up the probabilities at each A_V # prob_stack = np.sum(np.exp(lnp_vals), axis=1) # set up the subplot plt.subplot(y_dimen, x_dimen, (y_dimen - i - 1) * (x_dimen) + j + 1) # make a histogram if not log_scale: plt.plot( av_grid, prob_stack, marker=".", ms=0, mew=0, linestyle="-", color=color_data, linewidth=4, ) if log_scale: plt.plot( np.log10(av_grid), prob_stack, marker=".", ms=0, mew=0, linestyle="-", color=color_data, linewidth=4, ) ax = plt.gca() # -------- input lognormal(s) if input_lognormal is not None: # create lognormal lognorm = _lognorm( av_grid_big, input_lognormal["max_pos"], input_lognormal["sigma"], input_lognormal["N"], ) # if there's a second lognormal if input_lognormal2 is not None: lognorm += _lognorm( av_grid_big, input_lognormal2["max_pos"], input_lognormal2["sigma"], input_lognormal2["N"], ) # normalize it # lognorm = lognorm / np.sum(lognorm) lognorm = lognorm / np.trapz(lognorm, av_grid_big) # plot it # yrange_before = ax.get_ylim() if not log_scale: plt.plot( av_grid_big, lognorm, marker=".", ms=0, mew=0, linestyle="-", color=color_input, linewidth=2, alpha=0.85, ) if log_scale: plt.plot( np.log10(av_grid_big), lognorm, marker=".", ms=0, mew=0, linestyle="-", color=color_input, linewidth=2, alpha=0.85, ) # ax.set_ylim(yrange_before) # -------- best fit # generate best fit lognorm = _two_lognorm( av_grid_big, best_fits["Av1"][i, j], best_fits["Av2"][i, j], sigma1=best_fits["sigma1"][i, j], sigma2=best_fits["sigma2"][i, j], N1=nstars_image[i, j] * (1 - 1 / (best_fits["N12_ratio"][i, j] + 1)), N2=nstars_image[i, j] / (best_fits["N12_ratio"][i, j] + 1), ) # normalize it # lognorm = lognorm / nstars_image[i,j] # lognorm = lognorm / np.sum(lognorm) lognorm = lognorm / np.trapz(lognorm, av_grid_big) # plot it yrange_before = ax.get_ylim() if not log_scale: plt.plot( av_grid_big, lognorm, marker=".", ms=0, mew=0, dashes=[3, 1.5], color=color_fit, linewidth=2, ) if log_scale: plt.plot( np.log10(av_grid_big), lognorm, marker=".", ms=0, mew=0, dashes=[3, 1.5], color=color_fit, linewidth=2, ) ax.set_ylim(yrange_before) fig.add_subplot(111, frameon=False) plt.tick_params(labelcolor="none", top="off", bottom="off", left="off", right="off") plt.grid(False) if not log_scale: plt.xlabel(r"$A_V$", size=15) else: plt.xlabel(r"Log $A_V$", size=15) plt.ylabel("PDF", size=15) plt.tight_layout() # save figure plt.savefig("./" + projectname + "_megabeast/" + projectname + "_bestfit_plot.pdf") plt.close()
def setup_lnp_files(nstars_hdu, av_lognorm, av_gridpoints, av_ind, output_label, av_lognorm2=None): """ Create sparsely sampled lnp files for each pixel Parameters ---------- nstars_hdu : hdu object hdu with the nstars image av_lognorm : dict Dictionary with parameters for lognormal distribution (keys = 'max_pos', 'sigma', 'N') av_gridpoints : array array of all the unique A_V values in the BEAST grid av_ind : list of ints indices in the BEAST grid for each A_V in av_gridpoints output_label : string label to use for folders and file names av_lognorm2 : dict (default=None) if set (identical formatting to av_lognorm), makes A_V a double lognormal distribution """ # dimensions of image nx, ny = nstars_hdu.data.shape # create a lognormal distribution av_dist = np.linspace(av_gridpoints[0], av_gridpoints[-1], 500) temp = _lognorm(av_dist, av_lognorm["max_pos"], sigma=av_lognorm["sigma"], N=av_lognorm["N"]) if av_lognorm2 is not None: temp2 = _lognorm( av_dist, av_lognorm2["max_pos"], sigma=av_lognorm2["sigma"], N=av_lognorm2["N"], ) temp += temp2 lognorm_dist = temp / np.sum(temp) lognorm_cdf = np.cumsum(lognorm_dist) # iterate through pixels for i in range(nx): for j in range(ny): # number of stars in this pixel nstar = nstars_hdu.data[i, j] # initialize a list to save info for lnp file lnp_save_list = [] # iterate through the stars in this pixel for n in range(nstar): # get an A_V from the lognormal distribution av = np.interp(np.random.random(1), lognorm_cdf, av_dist) # calculate probabilities # include errors -> sample from gaussian at each BEAST A_V grid point prob_list = scipy.stats.norm(av, 0.2).pdf(av_gridpoints) prob_list = prob_list / np.sum(prob_list) # make a list of the required quantities in the lnp file star_num = n idx = av_ind lnp = np.log(prob_list) chi2 = lnp / (-0.5) sed = [99] lnp_save_list.append([star_num, idx, lnp, chi2, sed]) # save the lnp data # print('i='+str(i)+' j='+str(j)+' nstar='+str(nstar)+' len(lnp_list)='+str(len(lnp_save_list))) save_lnp( output_label + "/" + output_label + "_%i_%i_lnp.hd5" % (j, i), lnp_save_list, False, )