def test_trim_grid(self): """ Generate trim the sed grid and noise model using cached versions of the both and compare the result to a cached version. """ # read in the observed data obsdata = Observations(self.obs_fname_cache, self.settings.filters, self.settings.obs_colnames) # get the modesedgrid modelsedgrid = SEDGrid(self.seds_fname_cache) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat(self.noise_fname_cache) # trim the model sedgrid seds_trim_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name noise_trim_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name trim_models( modelsedgrid, noisemodel_vals, obsdata, seds_trim_fname, noise_trim_fname, sigma_fac=3.0, ) # compare the new to the cached version compare_hdf5(self.seds_trim_fname_cache, seds_trim_fname, ctype="seds") compare_hdf5(self.noise_trim_fname_cache, noise_trim_fname, ctype="noise")
def test_simobs(self): """ Simulate observations using cached versions of the sed grid and noise model and compare the result to a cached version. """ # download files specific to this test simobs_fname_cache = download_rename("beast_example_phat_simobs.fits") # get the physics model grid - includes priors modelsedgrid = SEDGrid(self.seds_fname_cache) # read in the noise model - includes bias, unc, and completeness noisegrid = noisemodel.get_noisemodelcat(self.noise_fname_cache) table_new = gen_SimObs_from_sedgrid( modelsedgrid, noisegrid, nsim=100, compl_filter="max", ranseed=1234, ) # check that the simobs files are exactly the same table_cache = Table.read(simobs_fname_cache) # to avoid issues with uppercase vs lowercase column names, make them all # the same before comparing for col in table_new.colnames: table_new[col].name = col.upper() for col in table_cache.colnames: table_cache[col].name = col.upper() compare_tables(table_cache, table_new)
def fit_submodel(modelsedgridfile): # input files trimmed_modelsedgridfile = modelsedgridfile.replace( 'seds', 'seds_trim') trimmed_noisemodelfile = trimmed_modelsedgridfile.replace( 'seds', 'noisemodel') # output files lnpfile = modelsedgridfile.replace('seds', 'lnp') statsfile = modelsedgridfile.replace('seds', 'stats') statsfile = statsfile.replace('.hd5', '.fits') pdf1dfile = statsfile.replace('stats', 'pdf1d') # load the subgrid seds and subgrid noisemodel modelsedgrid = FileSEDGrid(trimmed_modelsedgridfile) noisemodel_vals = noisemodel.get_noisemodelcat( trimmed_noisemodelfile) fit.summary_table_memory(obsdata, noisemodel_vals, modelsedgrid, resume=args.resume, threshold=-10., save_every_npts=100, lnp_npts=60, stats_outname=statsfile, pdf1d_outname=pdf1dfile, grid_info_dict=grid_info_dict, lnp_outname=lnpfile, do_not_normalize=True) print('Done fitting on grid ' + trimmed_modelsedgridfile)
def trim_submodel(modelsedgridfile): modelsedgrid = FileSEDGrid(modelsedgridfile) noisefile = modelsedgridfile.replace("seds", "noisemodel") sed_trimname = modelsedgridfile.replace("seds", "seds_trim") noisemodel_trimname = sed_trimname.replace("seds", "noisemodel") # When working with density bins, we nees to work in a subfolder if args.dens_bin is not None: noisefile = os.path.join(bin_subfolder, noisefile) sed_trimname = os.path.join(bin_subfolder, sed_trimname) noisemodel_trimname = os.path.join(bin_subfolder, noisemodel_trimname) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat(noisefile) # trim the model sedgrid trim_grid.trim_models( modelsedgrid, noisemodel_vals, obsdata, sed_trimname, noisemodel_trimname, sigma_fac=3.0, )
def test_get_noisemodelcat(): noise_fname = download_rename("beast_example_phat_noisemodel.grid.hd5") ntable = get_noisemodelcat(noise_fname) # check that at least the 3 basic elements are included expected_elements = ["error", "bias", "completeness"] for cexp in expected_elements: assert cexp in ntable.keys(), f"{cexp} values not found in noisemodel"
def test_fit_grid(): # download the needed files vega_fname = download_rename('vega.hd5') obs_fname = download_rename('b15_4band_det_27_A.fits') noise_trim_fname = download_rename( 'beast_example_phat_noisemodel_trim.grid.hd5') seds_trim_fname = download_rename('beast_example_phat_seds_trim.grid.hd5') # download cached version of fitting results stats_fname_cache = download_rename('beast_example_phat_stats.fits') pdf1d_fname_cache = download_rename('beast_example_phat_pdf1d.fits') ################ # read in the the AST noise model noisemodel_vals = noisemodel.get_noisemodelcat(noise_trim_fname) # read in the observed data filters = [ 'HST_WFC3_F275W', 'HST_WFC3_F336W', 'HST_ACS_WFC_F475W', 'HST_ACS_WFC_F814W', 'HST_WFC3_F110W', 'HST_WFC3_F160W' ] basefilters = ['F275W', 'F336W', 'F475W', 'F814W', 'F110W', 'F160W'] obs_colnames = [f.lower() + '_rate' for f in basefilters] obsdata = get_obscat(obs_fname, filters, obs_colnames, vega_fname=vega_fname) # output files stats_fname = '/tmp/beast_example_phat_stats.fits' pdf1d_fname = '/tmp/beast_example_phat_pdf1d.fits' lnp_fname = '/tmp/beast_example_phat_lnp.hd5' fit.summary_table_memory(obsdata, noisemodel_vals, seds_trim_fname, threshold=-10., save_every_npts=100, lnp_npts=60, stats_outname=stats_fname, pdf1d_outname=pdf1d_fname, lnp_outname=lnp_fname) # check that the stats files are exactly the same table_cache = Table.read(stats_fname_cache) table_new = Table.read(stats_fname) compare_tables(table_cache, table_new) # lnp files not checked as they are randomly sparsely sampled # hence will be different every time the fitting is run # check that the pdf1d files are exactly the same compare_fits(pdf1d_fname_cache, pdf1d_fname)
def test_trim_grid(): # download the needed files vega_fname = download_rename("vega.hd5") seds_fname = download_rename("beast_example_phat_seds.grid.hd5") noise_fname = download_rename("beast_example_phat_noisemodel.grid.hd5") obs_fname = download_rename("b15_4band_det_27_A.fits") # download cached version of noisemodel on the sed grid noise_trim_fname_cache = download_rename( "beast_example_phat_noisemodel_trim.grid.hd5") seds_trim_fname_cache = download_rename( "beast_example_phat_seds_trim.grid.hd5") ################ # read in the observed data filters = [ "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_ACS_WFC_F475W", "HST_ACS_WFC_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", ] basefilters = ["F275W", "F336W", "F475W", "F814W", "F110W", "F160W"] obs_colnames = [f.lower() + "_rate" for f in basefilters] obsdata = Observations(obs_fname, filters, obs_colnames, vega_fname=vega_fname) # get the modesedgrid modelsedgrid = SEDGrid(seds_fname) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat(noise_fname) # trim the model sedgrid seds_trim_fname = "beast_example_phat_seds_trim.grid.hd5" noise_trim_fname = seds_trim_fname.replace("_seds", "_noisemodel") trim_models( modelsedgrid, noisemodel_vals, obsdata, seds_trim_fname, noise_trim_fname, sigma_fac=3.0, ) # compare the new to the cached version compare_hdf5(seds_trim_fname_cache, seds_trim_fname, ctype="seds") compare_hdf5(noise_trim_fname_cache, noise_trim_fname, ctype="noise")
def fit_submodel(modelsedgridfile): # input files trimmed_modelsedgridfile = modelsedgridfile.replace( "seds", "seds_trim") trimmed_noisemodelfile = trimmed_modelsedgridfile.replace( "seds", "noisemodel") # output files lnpfile = modelsedgridfile.replace("seds", "lnp") statsfile = modelsedgridfile.replace("seds", "stats") statsfile = statsfile.replace(".hd5", ".fits") pdf1dfile = statsfile.replace("stats", "pdf1d") if args.dens_bin is not None: # Put everything in the right subfolder ( trimmed_modelsedgridfile, trimmed_noisemodelfile, lnpfile, statsfile, pdf1dfile, ) = [ os.path.join(bin_subfolder, f) for f in [ trimmed_modelsedgridfile, trimmed_noisemodelfile, lnpfile, statsfile, pdf1dfile, ] ] # load the subgrid seds and subgrid noisemodel modelsedgrid = FileSEDGrid(trimmed_modelsedgridfile) noisemodel_vals = noisemodel.get_noisemodelcat( trimmed_noisemodelfile) try: fit.summary_table_memory( obsdata, noisemodel_vals, modelsedgrid, resume=args.resume, threshold=-10.0, save_every_npts=100, lnp_npts=60, stats_outname=statsfile, pdf1d_outname=pdf1dfile, grid_info_dict=grid_info_dict, lnp_outname=lnpfile, do_not_normalize=True, ) print("Done fitting on grid " + trimmed_modelsedgridfile) except Exception as e: if not args.ignore_missing_subresults: raise e
def trim_submodel(modelsedgridfile): modelsedgrid = FileSEDGrid(modelsedgridfile) # read in the noise model just created noisefile = modelsedgridfile.replace('seds', 'noisemodel') noisemodel_vals = noisemodel.get_noisemodelcat(noisefile) # trim the model sedgrid sed_trimname = modelsedgridfile.replace('seds', 'seds_trim') noisemodel_trimname = sed_trimname.replace('seds', 'noisemodel') trim_grid.trim_models(modelsedgrid, noisemodel_vals, obsdata, sed_trimname, noisemodel_trimname, sigma_fac=3.)
def remove_filters_from_files(catfile, physgrid, obsgrid, outbase, rm_filters): # remove the requested filters from the catalog file cat = Table.read(catfile) for cfilter in rm_filters: colname = "{}_rate".format(cfilter) if colname in cat.colnames: cat.remove_column(colname) else: print("{} not in catalog file".format(colname)) cat.write("{}_cat.fits".format(outbase), overwrite=True) # get the sed grid and process g0 = FileSEDGrid(physgrid, backend="cache") filters = g0.header["filters"].split(" ") shortfilters = [(cfilter.split("_"))[-1].lower() for cfilter in filters] nlamb = [] nfilters = [] rindxs = [] for csfilter, clamb, cfilter in zip(shortfilters, g0.lamb, filters): if csfilter not in rm_filters: nlamb.append(clamb) nfilters.append(cfilter) else: rindxs.append(shortfilters.index(csfilter)) nseds = np.delete(g0.seds, rindxs, 1) print("orig filters: {}".format(" ".join(filters))) print(" new filters: {}".format(" ".join(nfilters))) g = SpectralGrid(np.array(nlamb), seds=nseds, grid=g0.grid, backend="memory") g.grid.header["filters"] = " ".join(nfilters) g.writeHDF("{}_sed.grid.hd5".format(outbase)) # get and process the observation model obsgrid = noisemodel.get_noisemodelcat(obsgrid) with tables.open_file("{}_noisemodel.grid.hd5".format(outbase), "w") as outfile: outfile.create_array(outfile.root, "bias", np.delete(obsgrid.root.bias, rindxs, 1)) outfile.create_array(outfile.root, "error", np.delete(obsgrid.root.error, rindxs, 1)) outfile.create_array( outfile.root, "completeness", np.delete(obsgrid.root.completeness, rindxs, 1), )
def remove_filters_from_files(catfile, physgrid, obsgrid, outbase, rm_filters): # remove the requested filters from the catalog file cat = Table.read(catfile) for cfilter in rm_filters: colname = '{}_rate'.format(cfilter) if colname in cat.colnames: cat.remove_column(colname) else: print('{} not in catalog file'.format(colname)) cat.write('{}_cat.fits'.format(outbase), overwrite=True) # get the sed grid and process g0 = FileSEDGrid(physgrid, backend='cache') filters = g0.header['filters'].split(' ') shortfilters = [(cfilter.split('_'))[-1].lower() for cfilter in filters] nlamb = [] nfilters = [] rindxs = [] for csfilter, clamb, cfilter in zip(shortfilters, g0.lamb, filters): if csfilter not in rm_filters: nlamb.append(clamb) nfilters.append(cfilter) else: rindxs.append(shortfilters.index(csfilter)) nseds = np.delete(g0.seds, rindxs, 1) print('orig filters: {}'.format(' '.join(filters))) print(' new filters: {}'.format(' '.join(nfilters))) g = SpectralGrid(np.array(nlamb), seds=nseds, grid=g0.grid, backend='memory') g.grid.header['filters'] = ' '.join(nfilters) g.writeHDF('{}_sed.grid.hd5'.format(outbase)) # get and process the observation model obsgrid = noisemodel.get_noisemodelcat(obsgrid) with tables.open_file('{}_noisemodel.grid.hd5'.format(outbase), 'w') \ as outfile: outfile.create_array(outfile.root, 'bias', np.delete(obsgrid.root.bias, rindxs, 1)) outfile.create_array(outfile.root, 'error', np.delete(obsgrid.root.error, rindxs, 1)) outfile.create_array(outfile.root, 'completeness', np.delete(obsgrid.root.completeness, rindxs, 1))
def test_fit_grid(self): """ Fit a cached version of the observations with cached version of the trimmed sed grid and noisemodel and compare the result to cached versions of the stats and pdf1d files. """ # read in the the AST noise model noisemodel_vals = noisemodel.get_noisemodelcat( self.noise_trim_fname_cache) # read in the observed data obsdata = Observations(self.obs_fname_cache, self.settings.filters, self.settings.obs_colnames) # output files stats_fname = tempfile.NamedTemporaryFile(suffix=".fits").name pdf1d_fname = tempfile.NamedTemporaryFile(suffix=".fits").name pdf2d_fname = tempfile.NamedTemporaryFile(suffix=".fits").name lnp_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name fit.summary_table_memory( obsdata, noisemodel_vals, self.seds_trim_fname_cache, threshold=-10.0, save_every_npts=100, lnp_npts=500, max_nbins=200, stats_outname=stats_fname, pdf1d_outname=pdf1d_fname, pdf2d_outname=pdf2d_fname, pdf2d_param_list=["Av", "M_ini", "logT"], lnp_outname=lnp_fname, surveyname=self.settings.surveyname, ) # check that the stats files are exactly the same table_cache = Table.read(self.stats_fname_cache) table_new = Table.read(stats_fname) compare_tables(table_cache, table_new) # lnp files not checked as they are randomly sparsely sampled # hence will be different every time the fitting is run # check that the pdf1d/pdf2d files are exactly the same compare_fits(self.pdf1d_fname_cache, pdf1d_fname) compare_fits(self.pdf2d_fname_cache, pdf2d_fname)
def test_simobs(): # download the needed files vega_fname = download_rename("vega.hd5") seds_fname = download_rename("beast_example_phat_seds.grid.hd5") noise_fname = download_rename("beast_example_phat_noisemodel.grid.hd5") # download cached version of noisemodel on the sed grid simobs_fname_cache = download_rename("beast_example_phat_simobs.fits") ################ # get the physics model grid - includes priors modelsedgrid = SEDGrid(seds_fname) # read in the noise model - includes bias, unc, and completeness noisegrid = noisemodel.get_noisemodelcat(noise_fname) table_new = gen_SimObs_from_sedgrid( modelsedgrid, noisegrid, nsim=100, compl_filter="f475w", ranseed=1234, vega_fname=vega_fname, ) # check that the simobs files are exactly the same table_cache = Table.read(simobs_fname_cache) # to avoid issues with uppercase vs lowercase column names, make them all # the same before comparing for col in table_new.colnames: table_new[col].name = col.upper() for col in table_cache.colnames: table_cache[col].name = col.upper() compare_tables(table_cache, table_new)
def plot_noisemodel( sed_file, noise_file_list, plot_file, samp=100, cmap_name='viridis', ): """ Make a plot of the noise model: for each of the bandsm make plots of bias and uncertainty as a function of flux If there are multiple files in noise_file_list, each of them will be overplotted in each panel. Parameters ---------- sed_file : string path+name of the SED grid file noise_file_list : list of strings path+name of the noise model file(s) plot_file : string name of the file to save the plot samp : int (default=100) plotting all of the SED points takes a long time for a viewer to load, so set this to plot every Nth point cmap_name : string (default=plt.cm.viridis) name of a color map to use """ # read in the SED grid print("* reading SED grid file") sed_object = SEDGrid(sed_file) if hasattr(sed_object.seds, "read"): sed_grid = sed_object.seds.read() else: sed_grid = sed_object.seds filter_list = sed_object.filters n_filter = len(filter_list) # figure fig, ax = plt.subplots(nrows=3, ncols=n_filter, figsize=(25, 15)) # setup the plots fontsize = 12 font = {"size": fontsize} plt.rc("font", **font) plt.rc("lines", linewidth=2) plt.rc("axes", linewidth=2) plt.rc("xtick.major", width=2) plt.rc("ytick.major", width=2) plt.set_cmap(cmap_name) # go through noise files after sorting them according to # their SD bin number noise_file_list.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) bin_label = [re.findall(r"bin\d+", x)[0] for x in noise_file_list] for n, nfile in enumerate(np.atleast_1d(noise_file_list)): print("* reading " + nfile) # read in the values noisemodel_vals = noisemodel.get_noisemodelcat(nfile) # extract error and bias noise_err = noisemodel_vals["error"] noise_bias = noisemodel_vals["bias"] noise_compl = noisemodel_vals["completeness"] # plot things for f, filt in enumerate(filter_list): # error is negative where it's been extrapolated -> trim those good_err = np.where(noise_err[:, f] > 0)[0] plot_sed = sed_grid[good_err, f][::samp] plot_err = noise_err[good_err, f][::samp] plot_bias = noise_bias[good_err, f][::samp] plot_compl = noise_compl[good_err, f][::samp] # bias bax = ax[0, f] bax.plot( np.log10(plot_sed), plot_bias / plot_sed, marker="o", linestyle="none", mew=0, ms=2, alpha=0.1, label='SD %s' % (bin_label[n]), ) bax.tick_params(axis="both", which="major") bax.set_xlabel("log " + filt) bax.set_ylabel(r"Bias ($\mu$/F)") leg = bax.legend(loc='lower right', markerscale=3) for lh in leg.legendHandles: lh._legmarker.set_alpha(1) # error eax = ax[1, f] eax.plot( np.log10(plot_sed), plot_err / plot_sed, marker="o", linestyle="none", mew=0, ms=2, alpha=0.1, ) eax.tick_params(axis="both", which="major") eax.set_xlabel("log " + filt) eax.set_ylabel(r"Error ($\sigma$/F)") # completeness cax = ax[2, f] cax.plot( np.log10(plot_sed), plot_compl, marker="o", linestyle="none", mew=0, ms=2, alpha=0.1, ) cax.tick_params(axis="both", which="major") cax.set_xlabel("log " + filt) cax.set_ylabel(r"Completeness") plt.tight_layout() fig.savefig(plot_file, dpi=300) plt.close(fig)
def plot_noisemodel( sed_file, noise_file_list, plot_file, samp=100, color=["black", "red", "gold", "lime", "xkcd:azure"], label=None, ): """ Make a plot of the noise model: for each of the bandsm make plots of bias and uncertainty as a function of flux If there are multiple files in noise_file_list, each of them will be overplotted in each panel. Parameters ---------- sed_file : string path+name of the SED grid file noise_file_list : list of strings path+name of the noise model file(s) plot_file : string name of the file to save the plot samp : int (default=100) plotting all of the SED points takes a long time for a viewer to load, so set this to plot every Nth point color : list of strings (default=['black','red','gold','lime','xkcd:azure']) colors to cycle through when making plots label : list of strings (default=None) if set, use these labels in a legend for each item in noise_file_list """ # read in the SED grid print("* reading SED grid file") sed_object = SEDGrid(sed_file) if hasattr(sed_object.seds, "read"): sed_grid = sed_object.seds.read() else: sed_grid = sed_object.seds filter_list = sed_object.filters n_filter = len(filter_list) # figure fig, ax = plt.subplots(nrows=3, ncols=n_filter, figsize=(25, 15)) # setup the plots fontsize = 12 font = {"size": fontsize} plt.rc("font", **font) plt.rc("lines", linewidth=2) plt.rc("axes", linewidth=2) plt.rc("xtick.major", width=2) plt.rc("ytick.major", width=2) # go through noise files for n, nfile in enumerate(np.atleast_1d(noise_file_list)): print("* reading " + nfile) # read in the values noisemodel_vals = noisemodel.get_noisemodelcat(nfile) # extract error and bias noise_err = noisemodel_vals["error"] noise_bias = noisemodel_vals["bias"] noise_compl = noisemodel_vals["completeness"] # plot things for f, filt in enumerate(filter_list): # error is negative where it's been extrapolated -> trim those good_err = np.where(noise_err[:, f] > 0)[0] plot_sed = sed_grid[good_err, f][::samp] plot_err = noise_err[good_err, f][::samp] plot_bias = noise_bias[good_err, f][::samp] plot_compl = noise_compl[good_err, f][::samp] # bias bax = ax[0, f] bax.plot( np.log10(plot_sed), plot_bias / plot_sed, marker="o", linestyle="none", mew=0, ms=2, color=color[n % len(color)], alpha=0.1, ) if label is not None: bax.set_label(label[n]) bax.tick_params(axis="both", which="major") # ax.set_xlim(ax.get_xlim()[::-1]) bax.set_xlabel("log " + filt) bax.set_ylabel(r"Bias ($\mu$/F)") # error eax = ax[1, f] eax.plot( np.log10(plot_sed), plot_err / plot_sed, marker="o", linestyle="none", mew=0, ms=2, color=color[n % len(color)], alpha=0.1, ) if label is not None: eax.set_label(label[n]) eax.tick_params(axis="both", which="major") # ax.set_xlim(ax.get_xlim()[::-1]) eax.set_xlabel("log " + filt) eax.set_ylabel(r"Error ($\sigma$/F)") # completeness cax = ax[2, f] cax.plot( np.log10(plot_sed), plot_compl, marker="o", linestyle="none", mew=0, ms=2, color=color[n % len(color)], alpha=0.1, ) if label is not None: cax.set_label(label[n]) cax.tick_params(axis="both", which="major") # ax.set_xlim(ax.get_xlim()[::-1]) cax.set_xlabel("log " + filt) cax.set_ylabel(r"Completeness") # do a legend if this is # (a) the leftmost panel # (b) the last line to be added # (c) there are labels set if (f == 0) and (n == len(noise_file_list) - 1) and (label is not None): leg = bax.legend(fontsize=12) for lh in leg.legendHandles: lh._legmarker.set_alpha(1) leg = eax.legend(fontsize=12) for lh in leg.legendHandles: lh._legmarker.set_alpha(1) plt.tight_layout() fig.savefig(plot_file) plt.close(fig)
def calc_depth( physgrid_list, noise_model_list, completeness_value=0.5, vega_mag=True, vega_fname=None, ): """ Calculate the observation depth of a field using the completeness. Some fields have low completeness at both faint and bright fluxes; this finds the faintest flux at which the completeness exceeds the given value(s). Parameters ---------- physgrid_list : string or list of strings Name of the physics model file. If there are multiple physics model grids (i.e., if there are subgrids), list them all here. noise_model_list : string or list of strings Name of the noise model file. If there are multiple files for physgrid_list (because of subgrids), list the noise model file associated with each physics model file. completeness_value : float or list of floats The completeness(es) at which to evaluate the depth. Completeness is defined in the range 0.0 to 1.0. vega_mag : boolean (default=True) If True, return results in Vega mags. Otherwise returns flux in erg/s/cm^2/A. vega_fname : string filename for the vega info (useful for testing) Returns ------- depth_dict : dictionary keys are the filters present in the grid, each value is the flux or Vega mag for each of the requested completeness values """ # ------ Reading in data # If there are subgrids, we can't read them all into memory. Therefore, # we'll go through each one and just grab the relevant parts. compl_table_list = [] # make a table for each physics model + noise model for physgrid, noise_model in zip( np.atleast_1d(physgrid_list), np.atleast_1d(noise_model_list) ): # get the physics model grid - includes priors modelsedgrid = SEDGrid(str(physgrid)) if hasattr(modelsedgrid.seds, "read"): sed_grid = modelsedgrid.seds.read() else: sed_grid = modelsedgrid.seds # get list of filters filter_list = modelsedgrid.filters # read in the noise model noisegrid = noisemodel.get_noisemodelcat(str(noise_model)) # get the completeness model_compl = noisegrid["completeness"] # put it all into a table table_dict = {filt: sed_grid[:, f] for f, filt in enumerate(filter_list)} table_dict.update( {filt + "_compl": model_compl[:, f] for f, filt in enumerate(filter_list)} ) # append to the list compl_table_list.append(Table(table_dict)) # stack all the tables into one compl_table = vstack(compl_table_list) # if chosen, get the vega fluxes for the filters if vega_mag: _, vega_flux, _ = Vega(source=vega_fname).getFlux(filter_list) # ------ Calculate depth # initialize dictionary to hold results depth_dict = {filt: [] for filt in filter_list} # grab numbers for each filter for f, filt in enumerate(filter_list): use_sed = compl_table[filt] use_comp = compl_table[filt + "_compl"] # get sorted versions of data sort_ind = np.argsort(use_sed) sort_sed = use_sed[sort_ind] sort_comp = use_comp[sort_ind] # grab depths for compl in np.atleast_1d(completeness_value): # first check whether the noise model even covers this completeness # (in case there weren't sufficient ASTs) if (compl < np.min(sort_comp)) or (compl > np.max(sort_comp)): depth_dict[filt].append(np.nan) continue # find first instance of completeness > N first_ind = np.where(sort_comp > compl)[0][0] # corresponding flux comp_flux = sort_sed[first_ind] # save it if vega_mag: depth_dict[filt].append(-2.5 * np.log10(comp_flux / vega_flux[f])) else: depth_dict[filt].append(comp_flux) # return the results return depth_dict
def remove_filters_from_files( catfile, physgrid=None, obsgrid=None, outbase=None, physgrid_outfile=None, rm_filters=None, beast_filt=None, ): """ Remove filters from catalog, physics grid, and/or obsmodel grid. This has two primary use cases: 1. When making simulated observations, you want to test how your fit quality changes with different combinations of filters. In that case, put in files for both `physgrid` and `obsgrid`. Set `rm_filters` to the filter(s) you wish to remove, and they will be removed both from those and from the catalog file. The three new files will be output with the name prefix set in `outbase`. 2. When running the BEAST, you have a master physics model grid with all filters present in the survey, but some fields don't have observations in all of those filters. In that case, put the master grid in `physgrid` and set `rm_filters` to None. The catalog will be used to determine the filters to remove (if any). `obsgrid` should be left as None, because in this use case, the obsmodel grid has not yet been generated. The output physics model grid will be named using the filename in `physgrid_outfile` (if given) or with the prefix in `outbase`. Parameters ---------- catfile : string file name of photometry catalog physgrid : string (default=None) If set, remove filters from this physics model grid obsgrid : string (default=None) If set, remove filters from this obsmodel grid outbase : string (default=None) Path+file to prepend to all output file names. Useful for case 1 above. physgrid_outfile : string (default=None) Path+name of the output physics model grid. Useful for case 2 above. rm_filters : string or list of strings (default=None) If set, these are the filters to remove from all of the files. If not set, only the filters present in catfile will be retained in physgrid and/or obsgrid. beast_filt : list of strings Sometimes there is ambiguity in the filter name (e.g., the grid has both HST_ACS_WFC_F475W and HST_WFC3_F475W, and the filter name is F475W). Set this to the BEAST filter name to resolve any ambiguities. For example, ['HST_WFC3_F475W', 'HST_WFC3_F814W'] ensures that these are the names used for F475W and F814W. """ # read in the photometry catalog cat = Table.read(catfile) # if rm_filters set, remove the requested filters from the catalog if rm_filters is not None: for cfilter in np.atleast_1d(rm_filters): colname = "{}_rate".format(cfilter) if colname.upper() in cat.colnames: cat.remove_column(colname.upper()) elif colname.lower() in cat.colnames: cat.remove_column(colname.lower()) else: print("{} not in catalog file".format(colname)) cat.write("{}_cat.fits".format(outbase), overwrite=True) # if rm_filters not set, extract the filter names that are present if rm_filters is None: cat_filters = [f[:-5].upper() for f in cat.colnames if f[-4:].lower() == "rate"] # if beast_filt is set, make a list of the short versions if beast_filt is not None: beast_filt_short = [(f.split("_"))[-1].upper() for f in beast_filt] # if physgrid set, process the SED grid if physgrid is not None: # read in the sed grid g0 = FileSEDGrid(physgrid, backend="cache") # extract info filters = g0.header["filters"].split(" ") shortfilters = [(cfilter.split("_"))[-1].upper() for cfilter in filters] rindxs = [] rgridcols = [] # loop through filters and determine what needs deleting for csfilter, cfilter in zip(shortfilters, filters): # -------------------------- # if the user chose the filters to remove if rm_filters is not None: # if the current filter is in the list of filters to remove if csfilter in np.atleast_1d(rm_filters): # if there's a list of BEAST instrument+filter references if beast_filt is not None: # if the current filter is in the list of BEAST references if csfilter in beast_filt_short: # if it's the same instrument, delete it # (if it's not the same instrument, keep it) if beast_filt[beast_filt_short.index(csfilter)] == cfilter: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # if the current filter isn't in the BEAST ref list, delete it else: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # if there isn't a list of BEAST refs, delete it else: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # -------------------------- # if the removed filters are determined from the catalog file if rm_filters is None: # if the current filter is present in the catalog filters if csfilter in cat_filters: # if there's a list of BEAST instrument+filter references # (if there isn't a list of BEAST refs, keep it) if beast_filt is not None: # if the current filter is in the list of BEAST references # (if the current filter isn't in the BEAST ref list, keep it) if csfilter in beast_filt_short: # if it's not the same instrument, delete it # (if it's the same instrument, keep it) if beast_filt[beast_filt_short.index(csfilter)] != cfilter: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # if the current filter isn't in the catalog filters, delete it else: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # delete column(s) nseds = np.delete(g0.seds, rindxs, 1) nlamb = np.delete(g0.lamb, rindxs, 0) nfilters = np.delete(filters, rindxs, 0) for rcol in rgridcols: g0.grid.delCol(rcol) print("orig filters: {}".format(" ".join(filters))) print(" new filters: {}".format(" ".join(nfilters))) # save the modified grid g = SpectralGrid(np.array(nlamb), seds=nseds, grid=g0.grid, backend="memory") g.grid.header["filters"] = " ".join(nfilters) if physgrid_outfile is not None: g.writeHDF(physgrid_outfile) elif outbase is not None: g.writeHDF("{}_seds.grid.hd5".format(outbase)) else: raise ValueError("Need to set either outbase or physgrid_outfile") # if obsgrid set, process the observation model if obsgrid is not None: obsgrid = noisemodel.get_noisemodelcat(obsgrid) with tables.open_file("{}_noisemodel.grid.hd5".format(outbase), "w") as outfile: outfile.create_array( outfile.root, "bias", np.delete(obsgrid["bias"], rindxs, 1) ) outfile.create_array( outfile.root, "error", np.delete(obsgrid["error"], rindxs, 1) ) outfile.create_array( outfile.root, "completeness", np.delete(obsgrid["completeness"], rindxs, 1), )
def subgrid_info(grid_fname, noise_fname=None): """ Generates a list of mins and maxes of all the quantities in the given grid Parameters ---------- grid_fname: string path to a beast grid file (hd5 format) noise_fname: string Path to the noise model file for the given grid (hd5 format) (optional). If this is given, the mins/maxes for the full model fluxes are added too, under the name 'log'+filter+'_wd_bias' (needs to conform to the name used in fit.py). Returns ------- info_dict: dictionary {name of quantity [string]: {'min': min, 'max': max, 'unique': unique values}} """ # Use the HDFStore (pytables) backend sedgrid = grid.FileSEDGrid(grid_fname, backend="hdf") seds = sedgrid.seds info_dict = {} qnames = sedgrid.keys() for q in qnames: qvals = sedgrid[q] qmin = np.amin(qvals) qmax = np.amax(qvals) qunique = np.unique(qvals) info_dict[q] = {} info_dict[q]["min"] = qmin info_dict[q]["max"] = qmax info_dict[q]["unique"] = qunique if noise_fname is not None: noisemodel = get_noisemodelcat(noise_fname) # The following is also in fit.py, so we're kind of doing double # work here, but it's necessary if we want to know the proper # ranges for these values. full_model_flux = seds[:] + noisemodel["bias"] logtempseds = np.array(full_model_flux) full_model_flux = ( np.sign(logtempseds) * np.log1p(np.abs(logtempseds * math.log(10))) / math.log(10) ) filters = sedgrid.filters for i, f in enumerate(filters): f_fluxes = full_model_flux[:, i] # Be sure to cut out the -100's in the calculation of the minimum qmin = np.amin(f_fluxes[f_fluxes > -99.99]) qmax = np.amax(f_fluxes) qunique = np.unique(qvals) q = "symlog" + f + "_wd_bias" info_dict[q] = {} info_dict[q]["min"] = qmin info_dict[q]["max"] = qmax info_dict[q]["unique"] = qunique print("Gathered grid info for {}".format(grid_fname)) return info_dict
# modelsedgrid, # absflux_a_matrix=settings.absflux_a_matrix) if args.trim: print("Trimming the model and noise grids") # read in the observed data obsdata = Observations(settings.obsfile, settings.filters, settings.obs_colnames) # get the modesedgrid on which to generate the noisemodel modelsedgridfile = settings.project + "/" + settings.project + "_seds.grid.hd5" modelsedgrid = SEDGrid(modelsedgridfile) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat(settings.noisefile) # trim the model sedgrid sed_trimname = "{0}/{0}_seds_trim.grid.hd5".format(settings.project) noisemodel_trimname = "{0}/{0}_noisemodel_trim.grid.hd5".format( settings.project) trim_grid.trim_models( modelsedgrid, noisemodel_vals, obsdata, sed_trimname, noisemodel_trimname, sigma_fac=3.0, )
def plot_completeness( physgrid_list, noise_model_list, output_plot_filename, param_list=["Av", "Rv", "logA", "f_A", "M_ini", "Z", "distance"], compl_filter="F475W", ): """ Make visualization of the completeness Parameters ---------- physgrid_list : string or list of strings Name of the physics model file. If there are multiple physics model grids (i.e., if there are subgrids), list them all here. noise_model_list : string or list of strings Name of the noise model file. If there are multiple files for physgrid_list (because of subgrids), list the noise model file associated with each physics model file. param_list : list of strings names of the parameters to plot compl_filter : str filter to use for completeness (required for toothpick model) output_plot_filename : string name of the file in which to save the output plot """ n_params = len(param_list) # If there are subgrids, we can't read them all into memory. Therefore, # we'll go through each one and just grab the relevant parts. compl_table_list = [] # make a table for each physics model + noise model for physgrid, noise_model in zip(np.atleast_1d(physgrid_list), np.atleast_1d(noise_model_list)): # get the physics model grid - includes priors modelsedgrid = SEDGrid(str(physgrid)) # get list of filters short_filters = [ filter.split(sep="_")[-1].upper() for filter in modelsedgrid.filters ] if compl_filter.upper() not in short_filters: raise ValueError("requested completeness filter not present") filter_k = short_filters.index(compl_filter.upper()) print("Completeness from {0}".format(modelsedgrid.filters[filter_k])) # read in the noise model noisegrid = noisemodel.get_noisemodelcat(str(noise_model)) # get the completeness model_compl = noisegrid["completeness"] # close the file to save memory noisegrid.close() # put it all into a table table_dict = {x: modelsedgrid[x] for x in param_list} table_dict["compl"] = model_compl[:, filter_k] # append to the list compl_table_list.append(Table(table_dict)) # stack all the tables into one compl_table = vstack(compl_table_list) # import pdb; pdb.set_trace() # figure fig = plt.figure(figsize=(4 * n_params, 4 * n_params)) # label font sizes label_font = 25 tick_font = 22 # load in color map cmap = matplotlib.cm.get_cmap("magma") # iterate through the panels for i, pi in enumerate(param_list): for j, pj in enumerate(param_list[i:], i): print("plotting {0} and {1}".format(pi, pj)) # not along diagonal if i != j: # set up subplot plt.subplot(n_params, n_params, i + j * (n_params) + 1) ax = plt.gca() # create image and labels x_col, x_bins, x_label = setup_axis(compl_table, pi) y_col, y_bins, y_label = setup_axis(compl_table, pj) compl_image, _, _, _ = binned_statistic_2d( x_col, y_col, compl_table["compl"], statistic="mean", bins=(x_bins, y_bins), ) # plot points im = plt.imshow( compl_image.T, # np.random.random((4,4)), extent=( np.min(x_bins), np.max(x_bins), np.min(y_bins), np.max(y_bins), ), cmap="magma", vmin=0, vmax=1, aspect="auto", origin="lower", ) ax.tick_params( axis="both", which="both", direction="in", labelsize=tick_font, bottom=True, top=True, left=True, right=True, ) # axis labels and ticks if i == 0: ax.set_ylabel(y_label, fontsize=label_font) # ax.get_yaxis().set_label_coords(-0.35,0.5) else: ax.set_yticklabels([]) if j == n_params - 1: ax.set_xlabel(x_label, fontsize=label_font) plt.xticks(rotation=-45) else: ax.set_xticklabels([]) # along diagonal if i == j: # set up subplot plt.subplot(n_params, n_params, i + j * (n_params) + 1) ax = plt.gca() # create histogram and labels x_col, x_bins, x_label = setup_axis(compl_table, pi) compl_hist, _, _ = binned_statistic( x_col, compl_table["compl"], statistic="mean", bins=x_bins, ) # make histogram _, _, patches = plt.hist(x_bins[:-1], x_bins, weights=compl_hist) # color each bar by its completeness for c, comp in enumerate(compl_hist): patches[c].set_color(cmap(comp)) patches[c].set_linewidth = 0.1 # make a black outline so it stands out as a histogram plt.hist(x_bins[:-1], x_bins, weights=compl_hist, histtype="step", color="k") # axis ranges plt.xlim(np.min(x_bins), np.max(x_bins)) plt.ylim(0, 1.05) ax.tick_params(axis="y", which="both", length=0, labelsize=tick_font) ax.tick_params(axis="x", which="both", direction="in", labelsize=tick_font) # axis labels and ticks ax.set_yticklabels([]) if i < n_params - 1: ax.set_xticklabels([]) if i == n_params - 1: ax.set_xlabel(x_label, fontsize=label_font) plt.xticks(rotation=-45) # plt.subplots_adjust(wspace=0.05, hspace=0.05) plt.tight_layout() # add a colorbar gs = GridSpec(nrows=20, ncols=n_params) cax = fig.add_subplot(gs[0, 2:]) cbar = plt.colorbar(im, cax=cax, orientation="horizontal") cbar.set_label("Completeness", fontsize=label_font) cbar.ax.tick_params(labelsize=tick_font) gs.tight_layout(fig) fig.savefig(output_plot_filename) plt.close(fig)
def generate_files_for_tests(run_beast=True, run_tools=True): """ Use the metal_small example to generate a full set of files for the BEAST regression tests. Parameters ---------- run_beast : boolean (default=True) if True, run the BEAST run_tools : boolean (default=True) if True, run the code to generate things for tools """ # read in BEAST settings settings_orig = beast_settings.beast_settings("beast_settings.txt") # also make a version with subgrids settings_subgrids = copy.deepcopy(settings_orig) settings_subgrids.n_subgrid = 2 settings_subgrids.project = f"{settings_orig.project}_subgrids" # ========================================== # run the beast for each set of settings # ========================================== if run_beast: for settings in [settings_orig, settings_subgrids]: # ----------------- # physics model # ----------------- create_physicsmodel.create_physicsmodel( settings, nsubs=settings.n_subgrid, nprocs=1, ) # ----------------- # ASTs # ----------------- # currently only works for no subgrids if settings.n_subgrid == 1: make_ast_inputs.make_ast_inputs(settings, pick_method="flux_bin_method") # ----------------- # obs model # ----------------- create_obsmodel.create_obsmodel( settings, use_sd=False, nsubs=settings.n_subgrid, nprocs=1, use_rate=True, ) # ----------------- # trimming # ----------------- # make file names file_dict = create_filenames.create_filenames( settings, use_sd=False, nsubs=settings.n_subgrid) # read in the observed data obsdata = Observations(settings.obsfile, settings.filters, settings.obs_colnames) for i in range(settings.n_subgrid): # get the modesedgrid on which to generate the noisemodel modelsedgridfile = file_dict["modelsedgrid_files"][i] modelsedgrid = SEDGrid(modelsedgridfile) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat( file_dict["noise_files"][i]) # trim the model sedgrid sed_trimname = file_dict["modelsedgrid_trim_files"][i] noisemodel_trimname = file_dict["noise_trim_files"][i] trim_grid.trim_models( modelsedgrid, noisemodel_vals, obsdata, sed_trimname, noisemodel_trimname, sigma_fac=3.0, ) # ----------------- # fitting # ----------------- run_fitting.run_fitting( settings, use_sd=False, nsubs=settings.n_subgrid, nprocs=1, pdf2d_param_list=["Av", "M_ini", "logT"], pdf_max_nbins=200, ) # ----------------- # merging # ----------------- # it'll automatically skip for no subgrids merge_files.merge_files(settings, use_sd=False, nsubs=settings.n_subgrid) print("\n\n") # ========================================== # reference files for assorted tools # ========================================== if run_tools: # ----------------- # compare_spec_type # ----------------- # the input settings input = { "spec_ra": [72.67213351], "spec_dec": [-67.71720515], "spec_type": ["A"], "spec_subtype": [0], "lumin_class": ["IV"], "match_radius": 0.2, } # run it output = compare_spec_type.compare_spec_type( settings_orig.obsfile, "{0}/{0}_stats.fits".format(settings_orig.project), **input, ) # save the inputs and outputs asdf.AsdfFile({ "input": input, "output": output }).write_to("{0}/{0}_compare_spec_type.asdf".format( settings_orig.project)) # ----------------- # star_type_probability # ----------------- # input settings input = { "output_filebase": None, "ext_O_star_params": { "min_M_ini": 10, "min_Av": 0.5, "max_Av": 5 }, } # run it output = star_type_probability.star_type_probability( "{0}/{0}_pdf1d.fits".format(settings_orig.project), "{0}/{0}_pdf2d.fits".format(settings_orig.project), **input, ) # save the inputs and outputs asdf.AsdfFile({ "input": input, "output": output }).write_to("{0}/{0}_star_type_probability.asdf".format( settings_orig.project)) # ========================================== # asdf file permissions # ========================================== # for unknown reasons, asdf currently writes files with permissions set # to -rw-------. This changes it to -rw-r--r-- (like the rest of the # BEAST files) so Karl can easily copy them over to the cached file # website. # list of asdf files asdf_files = glob.glob("*/*.asdf") # go through each one to change permissions for fname in asdf_files: os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
def plot_noisemodel( sed_file, noise_file_list, plot_file, samp=100, color=["black", "red", "gold", "lime", "xkcd:azure"], label=None, ): """ Make a plot of the noise model: for each of the bandsm make plots of bias and uncertainty as a function of flux If there are multiple files in noise_file_list, each of them will be overplotted in each panel. Parameters ---------- sed_file : string path+name of the SED grid file noise_file_list : list of strings path+name of the noise model file(s) plot_file : string name of the file to save the plot samp : int (default=100) plotting all of the SED points takes a long time for a viewer to load, so set this to plot every Nth point color : list of strings (default=['black','red','gold','lime','xkcd:azure']) colors to cycle through when making plots label : list of strings (default=None) if set, use these labels in a legend for each item in noise_file_list """ # read in the SED grid print("* reading SED grid file") sed_object = FileSEDGrid(sed_file) if hasattr(sed_object.seds, "read"): sed_grid = sed_object.seds.read() else: sed_grid = sed_object.seds filter_list = sed_object.filters n_filter = len(filter_list) # figure fig = plt.figure(figsize=(4 * n_filter, 10)) # go through noise files for n, nfile in enumerate(noise_file_list): print("* reading " + nfile) # read in the values noisemodel_vals = noisemodel.get_noisemodelcat(nfile) # extract error and bias noise_err = noisemodel_vals.root.error[:] noise_bias = noisemodel_vals.root.bias[:] # plot things for f, filt in enumerate(filter_list): # error is negative where it's been extrapolated -> trim those good_err = np.where(noise_err[:, f] > 0)[0] plot_sed = sed_grid[good_err, f][::samp] plot_err = noise_err[good_err, f][::samp] plot_bias = noise_bias[good_err, f][::samp] # subplot region: bias ax1 = plt.subplot(2, n_filter, f + 1) (plot1,) = ax1.plot( np.log10(plot_sed), plot_bias / plot_sed, marker="o", linestyle="none", mew=0, ms=2, color=color[n % len(color)], alpha=0.1, ) if label is not None: plot1.set_label(label[n]) ax1.tick_params(axis="both", which="major", labelsize=13) # ax.set_xlim(ax.get_xlim()[::-1]) plt.xlabel("log " + filt, fontsize=12) plt.ylabel(r"Bias ($\mu$/F)", fontsize=12) # subplot region: error ax2 = plt.subplot(2, n_filter, n_filter + f + 1) (plot2,) = ax2.plot( np.log10(plot_sed), plot_err / plot_sed, marker="o", linestyle="none", mew=0, ms=2, color=color[n % len(color)], alpha=0.1, ) if label is not None: plot2.set_label(label[n]) ax2.tick_params(axis="both", which="major", labelsize=13) # ax.set_xlim(ax.get_xlim()[::-1]) plt.xlabel("log " + filt, fontsize=12) plt.ylabel(r"Error ($\sigma$/F)", fontsize=12) # do a legend if this is # (a) the leftmost panel # (b) the last line to be added # (c) there are labels set if (f == 0) and (n == len(noise_file_list) - 1) and (label is not None): leg = ax1.legend(fontsize=12) for lh in leg.legendHandles: lh._legmarker.set_alpha(1) leg = ax2.legend(fontsize=12) for lh in leg.legendHandles: lh._legmarker.set_alpha(1) plt.tight_layout() fig.savefig(plot_file) plt.close(fig)
def test_merge_pdf1d_stats(self): """ Using cached versions of the observations, sed grid, and noise model, split the grids and do the fitting on the subgrids and original grid. Merge the results from the subgrids and compare to the results from fitting the full grid. """ ###################################### # STEP 1: GET SOME DATA TO WORK WITH # ###################################### # read in the observed data obsdata = Observations(self.obs_fname_cache, self.settings.filters, self.settings.obs_colnames) ######################################################################################### # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE # ######################################################################################### num_subgrids = 3 # Split SED grid sub_seds_trim_fnames = subgridding_tools.split_grid( self.seds_trim_fname_cache, num_subgrids, overwrite=True) # Split noise grid (a standardized function does not exist) sub_noise_trim_fnames = [] noisemodel_vals = noisemodel.get_noisemodelcat( self.noise_trim_fname_cache) slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]), num_subgrids) for i, slc in enumerate(slices): outname = self.noise_trim_fname_cache.replace( ".hd5", "sub{}.hd5".format(i)) with tables.open_file(outname, "w") as outfile: outfile.create_array(outfile.root, "bias", noisemodel_vals["bias"][slc]) outfile.create_array(outfile.root, "error", noisemodel_vals["error"][slc]) outfile.create_array(outfile.root, "completeness", noisemodel_vals["completeness"][slc]) sub_noise_trim_fnames.append(outname) # Collect information about the parameter rangers, to make the pdf1d bins # consistent between subgrids grid_info_dict = subgridding_tools.reduce_grid_info( sub_seds_trim_fnames, sub_noise_trim_fnames, nprocs=1, cap_unique=100) ################################################## # STEP 3: GENERATE FILENAMES AND RUN THE FITTING # ################################################## def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"): return [ base_fname.replace(extension, "gridsub{}{}".format(i, extension)) for i in range(num_subgrids) ] stats_fname = tempfile.NamedTemporaryFile(suffix=".fits").name pdf1d_fname = tempfile.NamedTemporaryFile(suffix=".fits").name lnp_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids) subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids) subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname, num_subgrids, extension=".hd5") for i in range(num_subgrids): sub_noisemodel_vals = noisemodel.get_noisemodelcat( sub_noise_trim_fnames[i]) fit.summary_table_memory( obsdata, sub_noisemodel_vals, sub_seds_trim_fnames[i], threshold=-40.0, save_every_npts=100, lnp_npts=500, stats_outname=subgrid_stats_fnames[i], pdf1d_outname=subgrid_pdf1d_fnames[i], lnp_outname=subgrid_lnp_fnames[i], grid_info_dict=grid_info_dict, do_not_normalize=True, ) # The do_not_normalize option is absolutely crucial! # Now merge the results merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats( subgrid_pdf1d_fnames, subgrid_stats_fnames) # Do a full fit also normal_stats = tempfile.NamedTemporaryFile(suffix=".fits").name normal_pdf1d = tempfile.NamedTemporaryFile(suffix=".fits").name normal_lnp = tempfile.NamedTemporaryFile(suffix=".hd5").name fit.summary_table_memory( obsdata, noisemodel_vals, self.seds_trim_fname_cache, threshold=-40.0, save_every_npts=100, lnp_npts=500, stats_outname=normal_stats, pdf1d_outname=normal_pdf1d, lnp_outname=normal_lnp, do_not_normalize=True, ) # Here, we also need to use do_not_normalize, otherwise Pmax will be # different by a factor # CHECKS tolerance = 1e-6 fits_normal = fits.open(normal_pdf1d) fits_new = fits.open(merged_pdf1d_fname) if not len(fits_new) == len(fits_normal): raise AssertionError() # A similar problem to the above will also occur here for k in range(1, len(fits_new)): qname = fits_new[k].header["EXTNAME"] np.testing.assert_allclose( fits_new[k].data, fits_normal[qname].data, rtol=tolerance, atol=tolerance, ) table_normal = Table.read(normal_stats) table_new = Table.read(merged_stats_fname) if not len(table_normal) == len(table_new): raise AssertionError() # These will normally fail, as the merging process can not be made # bit-correct due do floating point math (exacerbated by exponentials) for c in table_new.colnames: if c == "Name" or c == "RA" or c == "DEC": np.testing.assert_equal( table_normal[c], table_new[c], err_msg="column {} is not equal".format(c), ) else: np.testing.assert_allclose( table_normal[c], table_new[c], rtol=tolerance, equal_nan=True, err_msg="column {} is not close enough".format(c), )
def fit_submodel( photometry_file, modelsedgrid_file, noise_file, stats_file, pdf_file, pdf2d_file, pdf2d_param_list, lnp_file, grid_info_file=None, resume=False, ): """ Code to run the SED fitting Parameters ---------- photometry_file : string path+name of the photometry file modelsedgrid_file : string path+name of the physics model grid file noise_file : string path+name of the noise model file stats_file : string path+name of the file to contain stats output pdf_file : string path+name of the file to contain 1D PDF output pdf2d_file : string path+name of the file to contain 2D PDF output pdf2d_param_list: list of strings or None parameters for which to make 2D PDFs (or None) lnp_file : string path+name of the file to contain log likelihood output grid_info_file : string (default=None) path+name for pickle file that contains dictionary with subgrid min/max/n_unique (required for a run with subgrids) resume : boolean (default=False) choose whether to resume existing run or start over Returns ------- noisefile : string name of the created noise file """ # read in the photometry catalog obsdata = datamodel.get_obscat(photometry_file, datamodel.filters) # check if it's a subgrid run by looking in the file name if "gridsub" in modelsedgrid_file: subgrid_run = True print("loading grid_info_dict from " + grid_info_file) with open(grid_info_file, "rb") as p: grid_info_dict = pickle.loads(p.read()) else: subgrid_run = False # load the SED grid and noise model modelsedgrid = FileSEDGrid(modelsedgrid_file) noisemodel_vals = noisemodel.get_noisemodelcat(noise_file) if subgrid_run: fit.summary_table_memory( obsdata, noisemodel_vals, modelsedgrid, resume=resume, threshold=-10.0, save_every_npts=100, lnp_npts=500, stats_outname=stats_file, pdf1d_outname=pdf_file, pdf2d_outname=pdf2d_file, pdf2d_param_list=pdf2d_param_list, grid_info_dict=grid_info_dict, lnp_outname=lnp_file, do_not_normalize=True, surveyname=datamodel.surveyname, ) print("Done fitting on grid " + modelsedgrid_file) else: fit.summary_table_memory( obsdata, noisemodel_vals, modelsedgrid, resume=resume, threshold=-10.0, save_every_npts=100, lnp_npts=500, stats_outname=stats_file, pdf1d_outname=pdf_file, pdf2d_outname=pdf2d_file, pdf2d_param_list=pdf2d_param_list, lnp_outname=lnp_file, surveyname=datamodel.surveyname, ) print("Done fitting on grid " + modelsedgrid_file)
def run_beast_production(basename, physicsmodel=False, ast=False, observationmodel=False, trim=False, fitting=False, resume=False, source_density='', sub_source_density=''): """ Turns the original command-line version of run_beast_production.py into something callable from within a function Parameters ---------- basename : string name of the gst file (assuming it's located in ./data/) For the info related to the other inputs, see the argparse info at the bottom """ # before doing ANYTHING, force datamodel to re-import (otherwise, any # changes within this python session will not be loaded!) importlib.reload(datamodel) # check input parameters, print what is the problem, stop run_beast verify_params.verify_input_format(datamodel) # update the filenames as needed for production # - photometry sub-file datamodel.obsfile = basename.replace( '.fits', '_with_sourceden' + '_SD_' + source_density.replace('_', '-') + '_sub' + sub_source_density + '.fits') # - stats files stats_filebase = "%s/%s"%(datamodel.project,datamodel.project) \ + '_sd' + source_density.replace('_','-') \ + '_sub' + sub_source_density sed_trimname = stats_filebase + '_sed_trim.grid.hd5' # - trimmed noise model noisemodel_trimname = stats_filebase + '_noisemodel_trim.hd5' # - SED grid #modelsedgrid_filename = "%s/%s_seds.grid.hd5"%(datamodel.project, # datamodel.project) modelsedgrid_filename = "METAL_seds.grid.hd5" print("***run information***") print(" project = " + datamodel.project) print(" obsfile = " + datamodel.obsfile) print(" astfile = " + datamodel.astfile) print(" noisefile = " + datamodel.noisefile) print(" trimmed sedfile = " + sed_trimname) print("trimmed noisefiles = " + noisemodel_trimname) print(" stats filebase = " + stats_filebase) # make sure the project directory exists pdir = create_project_dir(datamodel.project) if physicsmodel: # download and load the isochrones (iso_fname, oiso) = make_iso_table(datamodel.project, oiso=datamodel.oiso, logtmin=datamodel.logt[0], logtmax=datamodel.logt[1], dlogt=datamodel.logt[2], z=datamodel.z) if hasattr(datamodel, 'add_spectral_properties_kwargs'): extra_kwargs = datamodel.add_spectral_properties_kwargs else: extra_kwargs = None if hasattr(datamodel, 'velocity'): redshift = (datamodel.velocity / const.c).decompose().value else: redshift = 0 # generate the spectral library (no dust extinction) (spec_fname, g_spec) = make_spectral_grid( datamodel.project, oiso, osl=datamodel.osl, redshift=redshift, distance=datamodel.distances, distance_unit=datamodel.distance_unit, add_spectral_properties_kwargs=extra_kwargs) # add the stellar priors as weights # also computes the grid weights for the stellar part (pspec_fname, g_pspec) = add_stellar_priors(datamodel.project, g_spec) # generate the SED grid by integrating the filter response functions # effect of dust extinction applied before filter integration # also computes the dust priors as weights (seds_fname, g_seds) = make_extinguished_sed_grid( datamodel.project, g_pspec, datamodel.filters, extLaw=datamodel.extLaw, av=datamodel.avs, rv=datamodel.rvs, fA=datamodel.fAs, rv_prior_model=datamodel.rv_prior_model, av_prior_model=datamodel.av_prior_model, fA_prior_model=datamodel.fA_prior_model, spec_fname=modelsedgrid_filename, add_spectral_properties_kwargs=extra_kwargs) if ast: N_models = datamodel.ast_models_selected_per_age Nfilters = datamodel.ast_bands_above_maglimit Nrealize = datamodel.ast_realization_per_model mag_cuts = datamodel.ast_maglimit obsdata = datamodel.get_obscat(basename, datamodel.filters) if len(mag_cuts) == 1: tmp_cuts = mag_cuts min_mags = np.zeros(len(datamodel.filters)) for k, filtername in enumerate(obsdata.filters): sfiltername = obsdata.data.resolve_alias(filtername) sfiltername = sfiltername.replace('rate', 'vega') sfiltername = sfiltername.replace('RATE', 'VEGA') keep, = np.where(obsdata[sfiltername] < 99.) min_mags[k] = np.percentile(obsdata[keep][sfiltername], 90.) # max. mags from the gst observation cat. mag_cuts = min_mags + tmp_cuts outfile = './' + datamodel.project + '/' + datamodel.project + '_inputAST.txt' outfile_params = './' + datamodel.project + '/' + datamodel.project + '_ASTparams.fits' chosen_seds = pick_models(modelsedgrid_filename, datamodel.filters, mag_cuts, Nfilter=Nfilters, N_stars=N_models, Nrealize=Nrealize, outfile=outfile, outfile_params=outfile_params) if datamodel.ast_with_positions == True: separation = datamodel.ast_pixel_distribution filename = datamodel.project + '/' + datamodel.project + '_inputAST.txt' if datamodel.ast_reference_image is not None: # With reference image, use the background or source density map if available if datamodel.ast_density_table is not None: pick_positions_from_map( obsdata, chosen_seds, datamodel.ast_density_table, datamodel.ast_N_bins, datamodel.ast_realization_per_model, outfile=filename, refimage=datamodel.ast_reference_image, refimage_hdu=0, Nrealize=1, set_coord_boundary=datamodel.ast_coord_boundary) else: pick_positions(obsdata, filename, separation, refimage=datamodel.ast_reference_image) else: # Without reference image, we can only use this function if datamodel.ast_density_table is None: pick_positions(obsdata, filename, separation) else: print( "To use ast_density_table, ast_reference_image must be specified." ) if observationmodel: print('Generating noise model from ASTs and absflux A matrix') # get the modesedgrid on which to generate the noisemodel modelsedgrid = FileSEDGrid(modelsedgrid_filename) # generate the AST noise model noisemodel.make_toothpick_noise_model( \ datamodel.noisefile, datamodel.astfile, modelsedgrid, use_rate=True, absflux_a_matrix=datamodel.absflux_a_matrix) if trim: print('Trimming the model and noise grids') # read in the observed data obsdata = datamodel.get_obscat(basename, datamodel.filters) # get the modesedgrid on which to generate the noisemodel modelsedgrid = FileSEDGrid(modelsedgrid_filename) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat(datamodel.noisefile) # trim the model sedgrid trim_grid.trim_models(modelsedgrid, noisemodel_vals, obsdata, sed_trimname, noisemodel_trimname, sigma_fac=3.) if fitting: start_time = time.clock() # read in the the AST noise model noisemodel_vals = noisemodel.get_noisemodelcat(noisemodel_trimname) # read in the observed data obsdata = datamodel.get_obscat(datamodel.obsfile, datamodel.filters) # output files statsfile = stats_filebase + '_stats.fits' pdf1dfile = statsfile.replace('stats.fits', 'pdf1d.fits') lnpfile = statsfile.replace('stats.fits', 'lnp.hd5') fit.summary_table_memory(obsdata, noisemodel_vals, sed_trimname, resume=resume, threshold=-10., save_every_npts=100, lnp_npts=500, stats_outname=statsfile, pdf1d_outname=pdf1dfile, lnp_outname=lnpfile, surveyname=datamodel.surveyname) new_time = time.clock() print('time to fit: ', (new_time - start_time) / 60., ' min')
def test_merge_pdf1d_stats(): ###################################### # STEP 1: GET SOME DATA TO WORK WITH # ###################################### vega_fname = download_rename("vega.hd5") obs_fname = download_rename("b15_4band_det_27_A.fits") noise_trim_fname = download_rename( "beast_example_phat_noisemodel_trim.grid.hd5") seds_trim_fname = download_rename("beast_example_phat_seds_trim.grid.hd5") # download cached version of fitting results # stats_fname_cache = download_rename('beast_example_phat_stats.fits') # pdf1d_fname_cache = download_rename('beast_example_phat_pdf1d.fits') # read in the observed data filters = [ "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_ACS_WFC_F475W", "HST_ACS_WFC_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", ] basefilters = ["F275W", "F336W", "F475W", "F814W", "F110W", "F160W"] obs_colnames = [f.lower() + "_rate" for f in basefilters] obsdata = Observations(obs_fname, filters, obs_colnames, vega_fname=vega_fname) ######################################################################################### # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE # ######################################################################################### num_subgrids = 3 # Split SED grid sub_seds_trim_fnames = subgridding_tools.split_grid(seds_trim_fname, num_subgrids, overwrite=True) # Split noise grid (a standardized function does not exist) sub_noise_trim_fnames = [] noisemodel_vals = get_noisemodelcat(noise_trim_fname) slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]), num_subgrids) for i, slc in enumerate(slices): outname = noise_trim_fname.replace(".hd5", "sub{}.hd5".format(i)) with tables.open_file(outname, "w") as outfile: outfile.create_array(outfile.root, "bias", noisemodel_vals["bias"][slc]) outfile.create_array(outfile.root, "error", noisemodel_vals["error"][slc]) outfile.create_array(outfile.root, "completeness", noisemodel_vals["completeness"][slc]) sub_noise_trim_fnames.append(outname) # Collect information about the parameter rangers, to make the pdf1d bins # consistent between subgrids grid_info_dict = subgridding_tools.reduce_grid_info(sub_seds_trim_fnames, sub_noise_trim_fnames, nprocs=1, cap_unique=100) ################################################## # STEP 3: GENERATE FILENAMES AND RUN THE FITTING # ################################################## def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"): return [ base_fname.replace(extension, "gridsub{}{}".format(i, extension)) for i in range(num_subgrids) ] stats_fname = "/tmp/beast_example_phat_stats.fits" pdf1d_fname = "/tmp/beast_example_phat_pdf1d.fits" lnp_fname = "/tmp/beast_example_phat_lnp.hd5" subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids) subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids) subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname, num_subgrids, extension=".hd5") for i in range(num_subgrids): sub_noisemodel_vals = get_noisemodelcat(sub_noise_trim_fnames[i]) fit.summary_table_memory( obsdata, sub_noisemodel_vals, sub_seds_trim_fnames[i], threshold=-40.0, save_every_npts=100, lnp_npts=60, stats_outname=subgrid_stats_fnames[i], pdf1d_outname=subgrid_pdf1d_fnames[i], lnp_outname=subgrid_lnp_fnames[i], grid_info_dict=grid_info_dict, do_not_normalize=True, ) # The do_not_normalize option is absolutely crucial! # Now merge the results merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats( subgrid_pdf1d_fnames, subgrid_stats_fnames) # Do a full fit also normal_stats = "normal_stats.fits" normal_pdf1d = "normal_pdf1d.fits" normal_lnp = "normal_lnp.hd5" fit.summary_table_memory( obsdata, noisemodel_vals, seds_trim_fname, threshold=-40.0, save_every_npts=100, lnp_npts=60, stats_outname=normal_stats, pdf1d_outname=normal_pdf1d, lnp_outname=normal_lnp, do_not_normalize=True, ) # Here, we also need to use do_not_normalize, otherwise Pmax will be # different by a factor # CHECKS tolerance = 1e-6 print("comparing pdf1d") # fits_cache = fits.open(pdf1d_fname_cache) fits_normal = fits.open(normal_pdf1d) fits_new = fits.open(merged_pdf1d_fname) if not len(fits_new) == len(fits_normal): raise AssertionError() # A similar problem to the above will also occur here for k in range(1, len(fits_new)): qname = fits_new[k].header["EXTNAME"] print(qname) np.testing.assert_allclose(fits_new[k].data, fits_normal[qname].data, rtol=tolerance, atol=tolerance) print("comparing stats") # table_cache = Table.read(stats_fname_cache) table_normal = Table.read(normal_stats) table_new = Table.read(merged_stats_fname) if not len(table_normal) == len(table_new): raise AssertionError() # These will normally fail, as the merging process can not be made # bit-correct due do floating point math (exacerbated by exponentials) for c in table_new.colnames: print(c) if c == "Name" or c == "RA" or c == "DEC": np.testing.assert_equal( table_normal[c], table_new[c], err_msg="column {} is not equal".format(c), ) else: np.testing.assert_allclose( table_normal[c], table_new[c], rtol=tolerance, equal_nan=True, err_msg="column {} is not close enough".format(c), )
modelsedgrid, absflux_a_matrix=datamodel.absflux_a_matrix) if args.trim: print('Trimming the model and noise grids') # read in the observed data obsdata = datamodel.get_obscat(datamodel.obsfile, datamodel.filters) # get the modesedgrid on which to generate the noisemodel modelsedgridfile = datamodel.project + '/' + datamodel.project + \ '_seds.grid.hd5' modelsedgrid = FileSEDGrid(modelsedgridfile) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat(datamodel.noisefile) # trim the model sedgrid sed_trimname = modelsedgridfile.replace('_seds', '_seds_trim') noisemodel_trimname = sed_trimname.replace('_seds', '_noisemodel') trim_grid.trim_models(modelsedgrid, noisemodel_vals, obsdata, sed_trimname, noisemodel_trimname, sigma_fac=3.) if args.fit: start_time = time.clock()
parser.add_argument("obsgrid", help="filename of observation/nosie grid") parser.add_argument("outfile", help="filename for simulated observations") parser.add_argument("--nsim", default=100, type=int, help="number of simulated objects") parser.add_argument("--compl_filter", default="F475W", help="filter name to use for completeness") parser.add_argument("--ranseed", default=None, type=int, help="seed for random number generator") args = parser.parse_args() # get the physics model grid - includes priors modelsedgrid = FileSEDGrid(args.physgrid) # read in the noise model - includes bias, unc, and completeness noisegrid = noisemodel.get_noisemodelcat(args.obsgrid) simtable = gen_SimObs_from_sedgrid( modelsedgrid, noisegrid, nsim=args.nsim, compl_filter=args.compl_filter, ranseed=args.ranseed, ) simtable.write(args.outfile, overwrite=True)
def simulate_obs( physgrid_list, noise_model_list, output_catalog, nsim=100, compl_filter="F475W", weight_to_use='weight', ranseed=None, ): """ Wrapper for creating a simulated photometry. Parameters ---------- physgrid_list : list of strings Name of the physics model file. If there are multiple physics model grids (i.e., if there are subgrids), list them all here, and they will each be sampled nsim/len(physgrid_list) times. noise_model_list : list of strings Name of the noise model file. If there are multiple files for physgrid_list (because of subgrids), list the noise model file associated with each physics model file. output_catalog : string Name of the output simulated photometry catalog n_sim : int (default=100) Number of simulated objects to create. If nsim/len(physgrid_list) isn't an integer, this will be increased so that each grid has the same number of samples. compl_filter : string (default='F475W') filter name to use for completeness weight_to_use : string (default='weight') Set to either 'weight' (prior+grid), 'prior_weight', or 'grid_weight' to choose the weighting for SED selection. ranseed : int seed for random number generator """ # numbers of samples to do # (ensure there are enough for even sampling of multiple model grids) n_phys = len(physgrid_list) samples_per_grid = int(np.ceil(nsim / n_phys)) # list to hold all simulation tables simtable_list = [] # make a table for each physics model + noise model for physgrid, noise_model in zip(np.atleast_1d(physgrid_list), np.atleast_1d(noise_model_list)): # get the physics model grid - includes priors modelsedgrid = SEDGrid(str(physgrid)) # read in the noise model - includes bias, unc, and completeness noisegrid = noisemodel.get_noisemodelcat(str(noise_model)) # generate the table simtable = gen_SimObs_from_sedgrid( modelsedgrid, noisegrid, nsim=samples_per_grid, compl_filter=compl_filter, weight_to_use=weight_to_use, ranseed=ranseed, ) # append to the list simtable_list.append(simtable) # stack all the tables into one and write it out vstack(simtable_list).write(output_catalog, overwrite=True)
warnings.warn( "trimming already complete for {0}, skipping".format(sed_trimname) ) continue print("working on " + sed_trimname) start_time = time.clock() if noisefile == old_noisefile: print("not reading noisefile - same as last") # print(noisefile) else: print("reading noisefile") # read in the noise model noisemodel_vals = noisemodel.get_noisemodelcat(noisefile) old_noisefile = noisefile # read in the observed data print("getting the observed data") obsdata = Observations( obsfile, modelsedgrid.filters, obs_colnames=datamodel.obs_colnames ) # trim the model sedgrid # set n_detected = 0 to disable the trimming of models based on # the ASTs (e.g. extrapolations are ok) # this is needed as the ASTs in the NIR bands do not go faint enough trim_grid.trim_models( modelsedgrid, noisemodel_vals, obsdata,