def test_reduce_grid_info(self): """ Split a cached version of a sed grid and check that [not quite sure what this is checking - details needed]. """ sub_fnames = subgridding_tools.split_grid(self.seds_trim_fname_cache, 3) complete_g_info = subgridding_tools.subgrid_info( self.seds_trim_fname_cache) cap_unique = 50 sub_g_info = subgridding_tools.reduce_grid_info(sub_fnames, nprocs=3, cap_unique=cap_unique) for q in complete_g_info: if q not in sub_g_info: raise AssertionError() if not complete_g_info[q]["min"] == sub_g_info[q]["min"]: raise AssertionError() if not complete_g_info[q]["max"] == sub_g_info[q]["max"]: raise AssertionError() num_unique = len(complete_g_info[q]["unique"]) if num_unique > cap_unique: # Cpan still be larger if one of the sub results during the # reduction is larger. This is as intended. if not sub_g_info[q]["num_unique"] >= cap_unique: raise AssertionError() else: if not sub_g_info[q]["num_unique"] == num_unique: raise AssertionError()
def test_reduce_grid_info(): seds_trim_fname = download_rename("beast_example_phat_seds_trim.grid.hd5") sub_fnames = subgridding_tools.split_grid(seds_trim_fname, 3) complete_g_info = subgridding_tools.subgrid_info(seds_trim_fname) cap_unique = 50 sub_g_info = subgridding_tools.reduce_grid_info(sub_fnames, nprocs=3, cap_unique=cap_unique) for q in complete_g_info: if q not in sub_g_info: raise AssertionError() if not complete_g_info[q]["min"] == sub_g_info[q]["min"]: raise AssertionError() if not complete_g_info[q]["max"] == sub_g_info[q]["max"]: raise AssertionError() num_unique = len(complete_g_info[q]["unique"]) if num_unique > cap_unique: # Cpan still be larger if one of the sub results during the # reduction is larger. This is as intended. if not sub_g_info[q]["num_unique"] >= cap_unique: raise AssertionError() else: if not sub_g_info[q]["num_unique"] == num_unique: raise AssertionError()
def test_reduce_grid_info(): seds_trim_fname = download_rename('beast_example_phat_seds_trim.grid.hd5') sub_fnames = subgridding_tools.split_grid(seds_trim_fname, 3) complete_g_info = subgridding_tools.subgrid_info(seds_trim_fname) cap_unique = 50 sub_g_info = subgridding_tools.reduce_grid_info( sub_fnames, nprocs=3, cap_unique=cap_unique) for q in complete_g_info: assert q in sub_g_info assert complete_g_info[q]['min'] == sub_g_info[q]['min'] assert complete_g_info[q]['max'] == sub_g_info[q]['max'] num_unique = len(complete_g_info[q]['unique']) if num_unique > cap_unique: # Cpan still be larger if one of the sub results during the # reduction is larger. This is as intended. assert sub_g_info[q]['num_unique'] >= cap_unique else: assert sub_g_info[q]['num_unique'] == num_unique
s.replace("seds", "noisemodel") for s in trimmed_modelsedgridfiles ] # File where the ranges and number of unique values for the grid # will be stored (this can take a while to calculate) grid_info_pkl = "grid_info_dict.pkl" if args.dens_bin is not None: # Use the right subfolder trimmed_modelsedgridfiles, trimmed_noisemodelfiles = [[ os.path.join(bin_subfolder, f) for f in l ] for l in [trimmed_modelsedgridfiles, trimmed_noisemodelfiles]] grid_info_pkl = os.path.join(bin_subfolder, grid_info_pkl) if not os.path.isfile(grid_info_pkl): grid_info_dict = subgridding_tools.reduce_grid_info( trimmed_modelsedgridfiles, trimmed_noisemodelfiles, nprocs=4) with open(grid_info_pkl, "wb") as p: pickle.dump(grid_info_dict, p) print("wrote grid_info_dict to " + grid_info_pkl) else: print("loading grid_info_dict from " + grid_info_pkl) with open(grid_info_pkl, "rb") as p: grid_info_dict = pickle.loads(p.read()) # perform fits for the subgrids individually def fit_submodel(modelsedgridfile): # input files trimmed_modelsedgridfile = modelsedgridfile.replace( "seds", "seds_trim") trimmed_noisemodelfile = trimmed_modelsedgridfile.replace(
def test_merge_pdf1d_stats(self): """ Using cached versions of the observations, sed grid, and noise model, split the grids and do the fitting on the subgrids and original grid. Merge the results from the subgrids and compare to the results from fitting the full grid. """ ###################################### # STEP 1: GET SOME DATA TO WORK WITH # ###################################### # read in the observed data obsdata = Observations(self.obs_fname_cache, self.settings.filters, self.settings.obs_colnames) ######################################################################################### # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE # ######################################################################################### num_subgrids = 3 # Split SED grid sub_seds_trim_fnames = subgridding_tools.split_grid( self.seds_trim_fname_cache, num_subgrids, overwrite=True) # Split noise grid (a standardized function does not exist) sub_noise_trim_fnames = [] noisemodel_vals = noisemodel.get_noisemodelcat( self.noise_trim_fname_cache) slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]), num_subgrids) for i, slc in enumerate(slices): outname = self.noise_trim_fname_cache.replace( ".hd5", "sub{}.hd5".format(i)) with tables.open_file(outname, "w") as outfile: outfile.create_array(outfile.root, "bias", noisemodel_vals["bias"][slc]) outfile.create_array(outfile.root, "error", noisemodel_vals["error"][slc]) outfile.create_array(outfile.root, "completeness", noisemodel_vals["completeness"][slc]) sub_noise_trim_fnames.append(outname) # Collect information about the parameter rangers, to make the pdf1d bins # consistent between subgrids grid_info_dict = subgridding_tools.reduce_grid_info( sub_seds_trim_fnames, sub_noise_trim_fnames, nprocs=1, cap_unique=100) ################################################## # STEP 3: GENERATE FILENAMES AND RUN THE FITTING # ################################################## def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"): return [ base_fname.replace(extension, "gridsub{}{}".format(i, extension)) for i in range(num_subgrids) ] stats_fname = tempfile.NamedTemporaryFile(suffix=".fits").name pdf1d_fname = tempfile.NamedTemporaryFile(suffix=".fits").name lnp_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids) subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids) subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname, num_subgrids, extension=".hd5") for i in range(num_subgrids): sub_noisemodel_vals = noisemodel.get_noisemodelcat( sub_noise_trim_fnames[i]) fit.summary_table_memory( obsdata, sub_noisemodel_vals, sub_seds_trim_fnames[i], threshold=-40.0, save_every_npts=100, lnp_npts=500, stats_outname=subgrid_stats_fnames[i], pdf1d_outname=subgrid_pdf1d_fnames[i], lnp_outname=subgrid_lnp_fnames[i], grid_info_dict=grid_info_dict, do_not_normalize=True, ) # The do_not_normalize option is absolutely crucial! # Now merge the results merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats( subgrid_pdf1d_fnames, subgrid_stats_fnames) # Do a full fit also normal_stats = tempfile.NamedTemporaryFile(suffix=".fits").name normal_pdf1d = tempfile.NamedTemporaryFile(suffix=".fits").name normal_lnp = tempfile.NamedTemporaryFile(suffix=".hd5").name fit.summary_table_memory( obsdata, noisemodel_vals, self.seds_trim_fname_cache, threshold=-40.0, save_every_npts=100, lnp_npts=500, stats_outname=normal_stats, pdf1d_outname=normal_pdf1d, lnp_outname=normal_lnp, do_not_normalize=True, ) # Here, we also need to use do_not_normalize, otherwise Pmax will be # different by a factor # CHECKS tolerance = 1e-6 fits_normal = fits.open(normal_pdf1d) fits_new = fits.open(merged_pdf1d_fname) if not len(fits_new) == len(fits_normal): raise AssertionError() # A similar problem to the above will also occur here for k in range(1, len(fits_new)): qname = fits_new[k].header["EXTNAME"] np.testing.assert_allclose( fits_new[k].data, fits_normal[qname].data, rtol=tolerance, atol=tolerance, ) table_normal = Table.read(normal_stats) table_new = Table.read(merged_stats_fname) if not len(table_normal) == len(table_new): raise AssertionError() # These will normally fail, as the merging process can not be made # bit-correct due do floating point math (exacerbated by exponentials) for c in table_new.colnames: if c == "Name" or c == "RA" or c == "DEC": np.testing.assert_equal( table_normal[c], table_new[c], err_msg="column {} is not equal".format(c), ) else: np.testing.assert_allclose( table_normal[c], table_new[c], rtol=tolerance, equal_nan=True, err_msg="column {} is not close enough".format(c), )
def test_merge_pdf1d_stats(): ###################################### # STEP 1: GET SOME DATA TO WORK WITH # ###################################### vega_fname = download_rename("vega.hd5") obs_fname = download_rename("b15_4band_det_27_A.fits") noise_trim_fname = download_rename( "beast_example_phat_noisemodel_trim.grid.hd5") seds_trim_fname = download_rename("beast_example_phat_seds_trim.grid.hd5") # download cached version of fitting results # stats_fname_cache = download_rename('beast_example_phat_stats.fits') # pdf1d_fname_cache = download_rename('beast_example_phat_pdf1d.fits') # read in the observed data filters = [ "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_ACS_WFC_F475W", "HST_ACS_WFC_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", ] basefilters = ["F275W", "F336W", "F475W", "F814W", "F110W", "F160W"] obs_colnames = [f.lower() + "_rate" for f in basefilters] obsdata = Observations(obs_fname, filters, obs_colnames, vega_fname=vega_fname) ######################################################################################### # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE # ######################################################################################### num_subgrids = 3 # Split SED grid sub_seds_trim_fnames = subgridding_tools.split_grid(seds_trim_fname, num_subgrids, overwrite=True) # Split noise grid (a standardized function does not exist) sub_noise_trim_fnames = [] noisemodel_vals = get_noisemodelcat(noise_trim_fname) slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]), num_subgrids) for i, slc in enumerate(slices): outname = noise_trim_fname.replace(".hd5", "sub{}.hd5".format(i)) with tables.open_file(outname, "w") as outfile: outfile.create_array(outfile.root, "bias", noisemodel_vals["bias"][slc]) outfile.create_array(outfile.root, "error", noisemodel_vals["error"][slc]) outfile.create_array(outfile.root, "completeness", noisemodel_vals["completeness"][slc]) sub_noise_trim_fnames.append(outname) # Collect information about the parameter rangers, to make the pdf1d bins # consistent between subgrids grid_info_dict = subgridding_tools.reduce_grid_info(sub_seds_trim_fnames, sub_noise_trim_fnames, nprocs=1, cap_unique=100) ################################################## # STEP 3: GENERATE FILENAMES AND RUN THE FITTING # ################################################## def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"): return [ base_fname.replace(extension, "gridsub{}{}".format(i, extension)) for i in range(num_subgrids) ] stats_fname = "/tmp/beast_example_phat_stats.fits" pdf1d_fname = "/tmp/beast_example_phat_pdf1d.fits" lnp_fname = "/tmp/beast_example_phat_lnp.hd5" subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids) subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids) subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname, num_subgrids, extension=".hd5") for i in range(num_subgrids): sub_noisemodel_vals = get_noisemodelcat(sub_noise_trim_fnames[i]) fit.summary_table_memory( obsdata, sub_noisemodel_vals, sub_seds_trim_fnames[i], threshold=-40.0, save_every_npts=100, lnp_npts=60, stats_outname=subgrid_stats_fnames[i], pdf1d_outname=subgrid_pdf1d_fnames[i], lnp_outname=subgrid_lnp_fnames[i], grid_info_dict=grid_info_dict, do_not_normalize=True, ) # The do_not_normalize option is absolutely crucial! # Now merge the results merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats( subgrid_pdf1d_fnames, subgrid_stats_fnames) # Do a full fit also normal_stats = "normal_stats.fits" normal_pdf1d = "normal_pdf1d.fits" normal_lnp = "normal_lnp.hd5" fit.summary_table_memory( obsdata, noisemodel_vals, seds_trim_fname, threshold=-40.0, save_every_npts=100, lnp_npts=60, stats_outname=normal_stats, pdf1d_outname=normal_pdf1d, lnp_outname=normal_lnp, do_not_normalize=True, ) # Here, we also need to use do_not_normalize, otherwise Pmax will be # different by a factor # CHECKS tolerance = 1e-6 print("comparing pdf1d") # fits_cache = fits.open(pdf1d_fname_cache) fits_normal = fits.open(normal_pdf1d) fits_new = fits.open(merged_pdf1d_fname) if not len(fits_new) == len(fits_normal): raise AssertionError() # A similar problem to the above will also occur here for k in range(1, len(fits_new)): qname = fits_new[k].header["EXTNAME"] print(qname) np.testing.assert_allclose(fits_new[k].data, fits_normal[qname].data, rtol=tolerance, atol=tolerance) print("comparing stats") # table_cache = Table.read(stats_fname_cache) table_normal = Table.read(normal_stats) table_new = Table.read(merged_stats_fname) if not len(table_normal) == len(table_new): raise AssertionError() # These will normally fail, as the merging process can not be made # bit-correct due do floating point math (exacerbated by exponentials) for c in table_new.colnames: print(c) if c == "Name" or c == "RA" or c == "DEC": np.testing.assert_equal( table_normal[c], table_new[c], err_msg="column {} is not equal".format(c), ) else: np.testing.assert_allclose( table_normal[c], table_new[c], rtol=tolerance, equal_nan=True, err_msg="column {} is not close enough".format(c), )
def run_fitting( use_sd=True, nsubs=1, nprocs=1, choose_sd_sub=None, choose_subgrid=None, pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'], resume=False, ): """ Run the fitting. If nsubs > 1, this will find existing subgrids. If use_sd is True, will also incorporate source density info. The additional choose_* options are to make queue scripts usable, by specifying a given SD+sub and/or subgrid for the fitting run. Parameters ---------- use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to datamodel.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) choose_sd_sub : list of two strings (default=None) If this is set, the fitting will just be for this combo of SD+sub, rather than all of them. Overrides use_sd. format of the list: ['#','#'] choose_subgrid : int (default=None) If this is set, the fitting with just be for this subgrid index. If nsubs=1, this is ignored. pdf2d_param_list : list of strings or None If set, do 2D PDFs of these parameters. If None, don't make 2D PDFs. resume : boolean (default=False) choose whether to resume existing run or start over """ # before doing ANYTHING, force datamodel to re-import (otherwise, any # changes within this python session will not be loaded!) importlib.reload(datamodel) # check input parameters verify_params.verify_input_format(datamodel) # keep track of time start_time = time.clock() # -------------------- # make lists of file names # -------------------- file_dict = create_filenames.create_filenames( use_sd=use_sd, nsubs=nsubs, choose_sd_sub=choose_sd_sub, choose_subgrid=choose_subgrid, ) # input files photometry_files = file_dict["photometry_files"] # modelsedgrid_files = file_dict["modelsedgrid_files"] modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"] # noise_files = file_dict["noise_files"] noise_trim_files = file_dict["noise_trim_files"] # output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] pdf2d_files = file_dict["pdf2d_files"] if pdf2d_param_list is None: pdf2d_files = [None for i in range(len(pdf2d_files))] lnp_files = file_dict["lnp_files"] # total number of files n_files = len(photometry_files) # other potentially useful info sd_sub_info = file_dict["sd_sub_info"] # gridsub_info = file_dict['gridsub_info'] # if using subgrids, make the grid dictionary file: # File where the ranges and number of unique values for the grid # will be stored (this can take a while to calculate) if nsubs > 1: gridpickle_files = file_dict["gridpickle_files"] for i in range(len(gridpickle_files)): if not os.path.isfile(gridpickle_files[i]): # list of corresponding SED grids and noise models # - with SD+sub: get file list for ALL subgrids at current SD+sub if use_sd or (choose_sd_sub is not None): temp = create_filenames.create_filenames( nsubs=nsubs, choose_sd_sub=sd_sub_info[i], choose_subgrid=None ) modelsedgrid_trim_list = temp["modelsedgrid_trim_files"] noise_trim_list = temp["noise_trim_files"] # - no SD info: get file list for ALL subgrids else: temp = create_filenames.create_filenames( use_sd=False, nsubs=nsubs, choose_subgrid=None ) modelsedgrid_trim_list = temp["modelsedgrid_trim_files"] noise_trim_list = temp["noise_trim_files"] # create the grid info dictionary print("creating grid_info_dict for " + gridpickle_files[i]) grid_info_dict = subgridding_tools.reduce_grid_info( modelsedgrid_trim_list, noise_trim_list, nprocs=nprocs ) # save it with open(gridpickle_files[i], "wb") as p: pickle.dump(grid_info_dict, p) print("wrote grid_info_dict to " + gridpickle_files[i]) # -------------------- # do the fitting! # -------------------- # set up function inputs if nsubs == 1: input_list = [ ( photometry_files[i], modelsedgrid_trim_files[i], noise_trim_files[i], stats_files[i], pdf_files[i], pdf2d_files[i], pdf2d_param_list, lnp_files[i], None, resume, ) for i in range(n_files) ] if nsubs > 1: input_list = [ ( photometry_files[i], modelsedgrid_trim_files[i], noise_trim_files[i], stats_files[i], pdf_files[i], pdf2d_files[i], pdf2d_param_list, lnp_files[i], gridpickle_files[i], resume, ) for i in range(n_files) ] # run the fitting (via parallel wrapper) parallel_wrapper(fit_submodel, input_list, nprocs=nprocs) # see how long it took! new_time = time.clock() print("time to fit: ", (new_time - start_time) / 60.0, " min")