Exemplo n.º 1
0
def split_main(
    beast_settings_info,
    catfile,
    astfile,
    mapfile,
    n_per_file=6250,
    min_n_subfile=None,
    sort_col="F475W_RATE",
):
    """
    Making the physics model grid takes a while for production runs.  This
    creates scripts to run each subgrid as a separate job.

    Parameters
    ----------
    beast_settings_info : string or instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    catfile : string
        name of the photometry catalog file

    astfile : string
        name of the ast catalog file

    mapfile : string
        background or source density map file

    n_per_file : int or None (default=6250)
        If set, divide the split catalog into sub-catalogs with length
        n_per_file.  Good for photometry, not useful for ASTs.

    min_n_subfile : int or None (default=None)
        If set, each bin in the photometry catalog will be split into at least
        this many subfiles. Useful if a bin has fewer than n_per_file stars but
        you still want flux-sorted subfiles (which means more trimming and
        faster fitting).

    sort_col : string (default="F475W_RATE")
        If n_per_file or min_n_subfile is set, the catalog will be sorted by this
        column before splitting into sub-catalogs.


    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # Create a binned density map, so both the observed and the ast
    # catalog can be split using a consistent grouping (= binning) of
    # the tiles
    if not settings.sd_Nbins and not settings.sd_binwidth and not settings.sd_custom:
        raise RuntimeError(
            "You need to specify the source density binning parameters in beast_settings_info"
        )

    bdm = BinnedDensityMap.create(
        mapfile,
        bin_mode=settings.sd_binmode,
        N_bins=settings.sd_Nbins,
        bin_width=settings.sd_binwidth,
        custom_bins=settings.sd_custom,
    )

    print("Splitting catalog")
    split_catalog_using_map(
        catfile,
        bdm,
        n_per_file=n_per_file,
        min_n_subfile=min_n_subfile,
        sort_col=sort_col,
    )
    print("")
    print("Splitting ASTs")
    split_catalog_using_map(
        astfile, bdm, ra_colname="RA_J2000", dec_colname="DEC_J2000", n_per_file=None
    )
Exemplo n.º 2
0
def make_ast_inputs(beast_settings_info, pick_method="flux_bin_method"):
    """
    Make the list of artificial stars to be run through the photometry pipeline

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    pick_method : string (default = "flux_bin_method")
        By default, use the flux bin method to select SEDs.
        If set to "random_seds", randomly select SEDs from the model grid.
        If set to "suppl_seds", supplement the existing input ASTs by randomly
        selecting additional SEDs from the list of non-selected models.

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # read in the photometry catalog
    obsdata = Observations(settings.obsfile,
                           settings.filters,
                           obs_colnames=settings.obs_colnames)

    # --------------------
    # select SEDs
    # --------------------

    modelsedgrid_filename = "./{0}/{0}_seds.grid.hd5".format(settings.project)
    Nrealize = settings.ast_realization_per_model

    # file names for stars and corresponding SED parameters
    if pick_method == "suppl_seds":
        outfile_seds = "./{0}/{0}_inputAST_seds_suppl.txt".format(
            settings.project)
        outfile_params = "./{0}/{0}_ASTparams_suppl.fits".format(
            settings.project)
    else:
        outfile_seds = "./{0}/{0}_inputAST_seds.txt".format(settings.project)
        outfile_params = "./{0}/{0}_ASTparams.fits".format(settings.project)

    # if the SED file doesn't exist, create SEDs
    if not os.path.isfile(outfile_seds):

        print("Selecting SEDs for ASTs")

        if pick_method == "flux_bin_method":

            N_fluxes = settings.ast_n_flux_bins
            min_N_per_flux = settings.ast_n_per_flux_bin
            bins_outfile = "./{0}/{0}_ASTfluxbins.txt".format(settings.project)

            chosen_seds = pick_models_toothpick_style(
                modelsedgrid_filename,
                settings.filters,
                N_fluxes,
                min_N_per_flux,
                outfile=outfile_seds,
                outfile_params=outfile_params,
                bins_outfile=bins_outfile,
            )

        if pick_method == "random_pick":

            # construct magnitude cuts
            mag_cuts = settings.ast_maglimit
            Nfilters = settings.ast_bands_above_maglimit

            if len(mag_cuts) == 1:
                tmp_cuts = mag_cuts
                min_mags = np.zeros(len(settings.filters))
                for k, filtername in enumerate(obsdata.filters):
                    sfiltername = obsdata.filter_aliases[filtername]
                    sfiltername = sfiltername.replace("rate", "vega")
                    sfiltername = sfiltername.replace("RATE", "VEGA")
                    (keep, ) = np.where(obsdata[sfiltername] < 99.0)
                    min_mags[k] = np.percentile(obsdata[keep][sfiltername],
                                                90.0)

                # max. mags from the gst observation cat.
                mag_cuts = min_mags + tmp_cuts

            N_models = settings.ast_models_selected_per_age

            chosen_seds = pick_models(
                modelsedgrid_filename,
                settings.filters,
                mag_cuts,
                Nfilter=Nfilters,
                N_stars=N_models,
                Nrealize=Nrealize,
                outfile=outfile_seds,
                outfile_params=outfile_params,
            )

        if pick_method == "suppl_seds":

            print("Supplementing ASTs")

            nAST = settings.ast_N_supplement
            existingASTfile = settings.ast_existing_file
            mag_cuts = settings.ast_suppl_maglimit
            color_cuts = settings.ast_suppl_colorlimit

            chosen_seds = supplement_ast(
                modelsedgrid_filename,
                settings.filters,
                nAST=nAST,
                existingASTfile=existingASTfile,
                outASTfile=outfile_seds,
                outASTfile_params=outfile_params,
                mag_cuts=mag_cuts,
                color_cuts=color_cuts,
            )

    # if the SED file does exist, read them in
    else:
        print("Reading existing AST SEDs")
        chosen_seds = Table.read(outfile_seds, format="ascii")

    # --------------------
    # assign positions
    # --------------------

    # if we want ASTs with positions included (rather than just the fluxes from
    # the section above)
    if settings.ast_with_positions:

        print("Assigning positions to artifical stars")

        outfile = "./{0}/{0}_inputAST.txt".format(settings.project)
        if pick_method == "suppl_seds":
            outfile = "./{0}/{0}_inputAST_suppl.txt".format(settings.project)

        # if we're replicating SEDs across source density or background bins
        if settings.ast_density_table is not None:
            if hasattr(settings, "ast_reference_image_hdu_extension"):
                hdu_ext = settings.ast_reference_image_hdu_extension
            else:
                hdu_ext = 1

            make_ast_xy_list.pick_positions_from_map(
                obsdata,
                chosen_seds,
                settings.ast_density_table,
                settings.sd_binmode,
                settings.sd_Nbins,
                settings.sd_binwidth,
                settings.sd_custom,
                settings.ast_realization_per_model,
                outfile=outfile,
                refimage=settings.ast_reference_image,
                refimage_hdu=hdu_ext,
                wcs_origin=1,
                Nrealize=1,
                set_coord_boundary=settings.ast_coord_boundary,
                region_from_filters="all",
                erode_boundary=settings.ast_erode_selection_region,
            )
        # if we're not using SD/background maps, SEDs will be distributed
        # based on catalog sources
        else:
            make_ast_xy_list.pick_positions(
                obsdata,
                outfile_seds,
                outfile,
                settings.ast_pixel_distribution,
                refimage=settings.ast_reference_image,
            )
Exemplo n.º 3
0
        "--trim",
        help="Trim the physics and observation model grids",
        action="store_true",
    )
    parser.add_argument("-f",
                        "--fit",
                        help="Fit the observed data",
                        action="store_true")
    parser.add_argument("-r",
                        "--resume",
                        help="Resume a fitting run",
                        action="store_true")
    args = parser.parse_args()

    # read in BEAST settings
    settings = beast_settings.beast_settings("beast_settings.txt")

    if args.physicsmodel:

        create_physicsmodel.create_physicsmodel(
            settings,
            nsubs=settings.n_subgrid,
            nprocs=1,
        )

    if args.ast:

        make_ast_inputs.make_ast_inputs(settings, flux_bin_method=False)

    if args.observationmodel:
        print("Generating noise model from ASTs and absflux A matrix")
Exemplo n.º 4
0
def beast_ast_inputs(field_name=None,
                     ref_image=None,
                     filter_ids=None,
                     galaxy=None,
                     supp=0):
    """
    This does all of the steps for generating AST inputs and can be used
    a wrapper to automatically do most steps for multiple fields.
    * make field's beast_settings file
    * make source density map
    * make background density map
    * split catalog by source density
    * make physics model (SED grid)
    * make input list for ASTs
    * prune input ASTs

    ----
    Inputs:

    field_name (str): name of field
    ref_image (str): path to reference image
    filter_ids (list): list of indexes corresponding to the filters in the
                        observation, referenced to the master list below.
    galaxy (str): name of target galaxy (e.g., 'SMC', 'LMC')
    ----

    Places for user to manually do things:
    * editing code before use
        - here: list the catalog filter names with the corresponding BEAST names
        - here: choose settings (pixel size, filter, mag range) for the source density map
        - here: choose settings (pixel size, reference image) for the background map

    """

    # the list of fields
    field_names = [field_name]

    # the path+file for a reference image
    im_path = [ref_image]
    ref_filter = ["F475W"]

    # choose a filter to use for removing artifacts
    # (remove catalog sources with filter_FLAG > 99)
    flag_filter = ["F475W"]

    # number of fields
    n_field = len(field_names)

    # Need to know what the correspondence is between filter names in the
    # catalog and the BEAST filter names.
    #
    # These will be used to automatically determine the filters present in
    # each GST file and fill in the beast settings file.  The order doesn't
    # matter, as long as the order in one list matches the order in the other
    # list.
    #
    gst_filter_names = [
        "F225W",
        "F275W",
        "F336W",
        "F475W",
        "F814W",
        "F110W",
        "F160W",
        "F657N",
    ]
    beast_filter_names = [
        "HST_WFC3_F225W",
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_WFC3_F475W",
        "HST_WFC3_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
        "HST_WFC3_F657N",
    ]

    for b in range(n_field):

        print("********")
        print("field " + field_names[b])
        print("********")

        # -----------------
        # data file names
        # -----------------

        # paths for the data/AST files
        gst_file = "./data/{0}/{0}.vgst.fits".format(field_names[b])
        ast_input_file = "./{0}/{0}_inputAST.txt".format(field_names[b])

        # if no galaxy is manually indicated, try to fetch from gst_file name
        if galaxy == None:
            print("no galaxy specified")
            print("fetching galaxy from field name")
            galaxy_attempt = field_names[b].split("_")[1].split("-")[0]
            print("is this the correct galaxy? : %s" % galaxy_attempt)

            # raw_input returns the empty string for "enter"
            yes = {'yes', 'y', 'ye', ''}
            no = {'no', 'n'}

            response = 0

            while response == 0:
                choice = input().lower()
                if choice in yes:
                    galaxy = galaxy_attempt
                    response = 1
                elif choice in no:
                    print("please rerun with --galaxy specified")
                    break
                else:
                    sys.stdout.write("Please respond with 'yes' or 'no'")

        # path for the reference image (if using for the background map)
        im_file = im_path[b]

        # fetch filter ids
        gst_data = Table.read(gst_file)
        filter_cols = [c for c in gst_data.colnames if "VEGA" in c]

        # extract every filter mentioned in the table
        filters = [f.split("_")[0] for f in filter_cols]

        # match with the gst filter list
        filter_ids = [gst_filter_names.index(i) for i in filters]
        filter_ids.sort()

        gst_filter_names = [gst_filter_names[i] for i in filter_ids]
        beast_filter_names = [beast_filter_names[i] for i in filter_ids]

        print(beast_filter_names)

        # region file with catalog stars
        # make_region_file(gst_file, ref_filter[b])

        # -----------------
        # 0. make beast settings file
        # -----------------

        print("")
        print("creating beast settings file")
        print("")

        beast_settings_filename = create_beast_settings(gst_file,
                                                        ast_input_file,
                                                        gst_filter_names,
                                                        beast_filter_names,
                                                        galaxy,
                                                        ref_image=im_file,
                                                        supp=supp)

        # load in beast settings to get number of subgrids
        settings = beast_settings.beast_settings(
            beast_settings_filename
            #"beast_settings_" + galaxy + "_asts_" + field_names[b] + ".txt"
        )

        # -----------------
        # 1a. make magnitude histograms
        # -----------------

        print("")
        print("making magnitude histograms")
        print("")

        # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'):
        peak_mags = plot_mag_hist.plot_mag_hist(gst_file,
                                                stars_per_bin=70,
                                                max_bins=75)

        # -----------------
        # 1b. make a source density map
        # -----------------

        print("")
        print("making source density map")
        print("")

        # not currently doing background density bins
        # use_bg_info = True
        use_bg_info = False
        if use_bg_info:
            background_args = types.SimpleNamespace(
                subcommand="background",
                catfile=gst_file,
                erode_boundary=settings.ast_erode_selection_region,
                pixsize=5,
                npix=None,
                reference=im_file,
                mask_radius=10,
                ann_width=20,
                cat_filter=[ref_filter, "90"],
            )

            create_background_density_map.main_make_map(background_args)

        # but we are doing source density bins!
        if not os.path.isfile(
                gst_file.replace(".fits", "_source_den_image.fits")):
            print("No sd image file found")
            # - pixel size of 10 arcsec
            # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5
            sourceden_args = types.SimpleNamespace(
                subcommand="sourceden",
                catfile=gst_file,
                erode_boundary=settings.ast_erode_selection_region,
                pixsize=5,
                npix=None,
                mag_name=ref_filter[0] + "_VEGA",
                mag_cut=[17, peak_mags[ref_filter[0]] - 0.5],
                flag_name=flag_filter[0] + "_FLAG",
            )
            create_background_density_map.main_make_map(sourceden_args)

        # new file name with the source density column
        gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits")

        # -----------------
        # 2. make physics model
        # -----------------

        print("")
        print("making physics model")
        print("")

        # see which subgrid files already exist
        gs_str = ""
        if settings.n_subgrid > 1:
            gs_str = "sub*"

        # try to fetch the list of SED files (physics models)
        model_grid_files = sorted(
            glob.glob("./{0}/{0}_seds.grid*.hd5".format(field_names[b], )))

        # only make the physics model they don't already exist
        if len(model_grid_files) < settings.n_subgrid:
            # directly create physics model grids
            create_physicsmodel.create_physicsmodel(settings,
                                                    nprocs=1,
                                                    nsubs=settings.n_subgrid)

        # fetch the list of SED files again (physics models)
        model_grid_files = sorted(
            glob.glob("./{0}/{0}_seds.grid*.hd5".format(field_names[b], )))

        # -------------------
        # 3. make AST inputs
        # -------------------

        print("")
        print("making AST inputs")
        print("")

        # only create an AST input list if the ASTs don't already exist
        if not os.path.isfile(ast_input_file):
            make_ast_inputs.make_ast_inputs(settings,
                                            pick_method="flux_bin_method")

        # compare magnitude histograms of ASTs with catalog
        plot_ast_histogram.plot_ast_histogram(
            ast_file=ast_input_file, sed_grid_file=model_grid_files[0])

        if supp != 0:

            print("")
            print("making supplemental AST inputs")
            print("")

            ast_input_supp_file = "./{0}/{0}_inputAST_suppl.txt".format(
                field_names[b])

            if not os.path.isfile(ast_input_supp_file):
                make_ast_inputs.make_ast_inputs(settings,
                                                pick_method="suppl_seds")

        print("now go check the diagnostic plots!")
def beast_production_wrapper():
    """
    This does all of the steps for a full production run, and can be used as
    a wrapper to automatically do most steps for multiple fields.
    * make field's beast_settings file
    * make source density map
    * make background density map
    * split catalog by source density
    * make physics model (SED grid)
    * make input list for ASTs
    * make noise model
    * generate batch script to trim models
    * generate batch script to fit models
    * merge stats files back together
    * spatially reorder the results

    Places for user to manually do things:
    * editing code before use
        - beast_settings_template.py: setting up the file with desired parameters
        - here: list the catalog filter names with the corresponding BEAST names
        - here: choose settings (pixel size, filter, mag range) for the source density map
        - here: choose settings (pixel size, reference image) for the background map
        - here: choose settings (filter, number per file) for dividing catalog by source density
        - here: choose settings (# files, nice level) for the trimming/fitting batch scripts
    * process the ASTs, as described in BEAST documentation
    * run the trimming scripts
    * run the fitting scripts

    """

    # the list of fields
    field_names = ["15275_IC1613"]

    # distance moduli and velocities
    # http://adsabs.harvard.edu/abs/2013AJ....146...86T
    dist_mod = [24.36]
    velocity = [-236]

    # the path+file for a reference image
    im_path = ["../beast_dwarfs/images/15275_IC1613_F555W_drz.fits.gz"]
    ref_filter = ["F555W"]

    # choose a filter to use for removing artifacts
    # (remove catalog sources with filter_FLAG > 99)
    flag_filter = ["F555W"]

    # number of fields
    n_field = len(field_names)

    # Need to know what the correspondence is between filter names in the
    # catalog and the BEAST filter names.
    #
    # These will be used to automatically determine the filters present in
    # each GST file and fill in the beast settings file.  The order doesn't
    # matter, as long as the order in one list matches the order in the other
    # list.
    #
    gst_filter_names = ["F275W", "F336W", "F390M", "F555W", "F814W", "F110W", "F160W"]
    beast_filter_names = [
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_WFC3_F390M",
        "HST_WFC3_F555W",
        "HST_WFC3_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
    ]

    for b in range(n_field):
        # for b in [0]:

        print("********")
        print("field " + field_names[b])
        print("********")

        # -----------------
        # data file names
        # -----------------

        # paths for the data/AST files
        gst_file = "./data/" + field_names[b] + ".gst.fits"
        ast_file = "./data/" + field_names[b] + ".gst.fake.fits"
        # path for the reference image (if using for the background map)
        im_file = im_path[b]

        # region file with catalog stars
        # make_region_file(gst_file, ref_filter[b])

        # -----------------
        # 0. make beast settings file
        # -----------------

        print("")
        print("creating beast settings file")
        print("")

        create_beast_settings(
            gst_file,
            ast_file,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
        )

        # load in beast settings to get number of subgrids
        settings = beast_settings.beast_settings(
            "beast_settings_" + field_names[i] + ".txt"
        )

        # -----------------
        # 1a. make magnitude histograms
        # -----------------

        print("")
        print("making magnitude histograms")
        print("")

        # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'):
        peak_mags = plot_mag_hist.plot_mag_hist(gst_file, stars_per_bin=70, max_bins=75)
        # test = plot_mag_hist.plot_mag_hist(ast_file, stars_per_bin=200, max_bins=30)

        # -----------------
        # 1b. make a source density map
        # -----------------

        print("")
        print("making source density map")
        print("")

        # not currently doing background density bins
        # use_bg_info = True
        use_bg_info = False
        if use_bg_info:
            background_args = types.SimpleNamespace(
                subcommand="background",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                reference=im_file,
                mask_radius=10,
                ann_width=20,
                cat_filter=[ref_filter, "90"],
            )
            create_background_density_map.main_make_map(background_args)

        # but we are doing source density bins!
        if not os.path.isfile(gst_file.replace(".fits", "_source_den_image.fits")):
            # - pixel size of 10 arcsec
            # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5
            sourceden_args = types.SimpleNamespace(
                subcommand="sourceden",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                mag_name=ref_filter + "_VEGA",
                mag_cut=[15, peak_mags[ref_filter - 0.5]],
                flag_name=flag_filter[b]+'_FLAG',
            )
            create_background_density_map.main_make_map(sourceden_args)

        # new file name with the source density column
        gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits")

        # -----------------
        # 2. make physics model
        # -----------------

        print("")
        print("making physics model")
        print("")

        # see which subgrid files already exist
        gs_str = ""
        if settings.n_subgrid > 1:
            gs_str = "sub*"

        sed_files = glob.glob(
            "./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(field_names[b], gs_str)
        )

        # only make the physics model they don't already exist
        if len(sed_files) < settings.n_subgrid:
            # directly create physics model grids
            #create_physicsmodel.create_physicsmodel(nprocs=1, nsubs=settings.n_subgrid)
            # create grids with script
            create_physicsmodel.split_create_physicsmodel(
                settings, nprocs=1, nsubs=settings.n_subgrid
            )
            print('\n**** go run physics model code for '+field_names[b]+'! ****')
            continue


        # -----------------
        # 3. make ASTs
        # -----------------

        # only create an AST input list if the ASTs don't already exist
        ast_input_file = (
            "./" + field_names[b] + "_beast/" + field_names[b] + "_beast_inputAST.txt"
        )

        if not os.path.isfile(ast_file):
            if not os.path.isfile(ast_input_file):
                print("")
                print("creating artificial stars")
                print("")
                make_ast_inputs.make_ast_inputs(settings, flux_bin_method=True)

            split_ast_input_file.split_asts(
                field_names[b] + "_beast", ast_input_file, 2000
            )

            print("\n**** go run ASTs for " + field_names[b] + "! ****\n")
            continue

        # -----------------
        # 4/5. edit photometry/AST catalogs
        # -----------------

        # remove sources that are
        # - in regions without full imaging coverage,
        # - flagged in flag_filter

        print("")
        print("editing photometry/AST catalogs")
        print("")

        # - photometry
        gst_file_cut = gst_file.replace(".fits", "_with_sourceden_cut.fits")
        ast_file_cut = ast_file.replace(".fits", "_cut.fits")
        cut_catalogs.cut_catalogs(
            gst_file_sd,
            gst_file_cut,
            ast_file,
            ast_file_cut,
            partial_overlap=True,
            flagged=True,
            flag_filter=flag_filter[b],
            region_file=True,
        )

        # edit the beast settings file to have the correct photometry file name
        # (AST file name is already automatically the cut version)
        create_beast_settings(
            gst_file_cut,
            ast_file_cut,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
        )

        # load in the new settings
        settings = beast_settings.beast_settings(
            "beast_settings_" + field_names[i] + ".txt"
        )

        # -----------------
        # 6. split observations by source density
        # -----------------

        print("")
        print("splitting observations by source density")
        print("")

        # - photometry

        if len(glob.glob(gst_file_cut.replace('.fits','*sub*fits') )) == 0:

            # a smaller value for n_per_file will mean more individual files/runs,
            # but each run will take a shorter amount of time

            split_catalog_using_map.split_main(
                gst_file_cut,
                ast_file_cut,
                gst_file.replace('.fits','_sourceden_map.hd5'),
                bin_width=1,
                n_per_file=6250,
                sort_col=ref_filter[b]+'_RATE',
            )


        # -- at this point, we can run the code to create lists of filenames
        file_dict = create_filenames.create_filenames(
            settings, use_sd=True, nsubs=settings.n_subgrid
        )

        # figure out how many files there are
        sd_sub_info = file_dict["sd_sub_info"]
        # - number of SD bins
        temp = set([i[0] for i in sd_sub_info])
        print("** total SD bins: " + str(len(temp)))
        # - the unique sets of SD+sub
        unique_sd_sub = [
            x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
        ]
        print("** total SD subfiles: " + str(len(unique_sd_sub)))

        # -----------------
        # 7. make noise models
        # -----------------

        print("")
        print("making noise models")
        print("")

        # create the noise model (this code will check if it exists)
        create_obsmodel.create_obsmodel(
            settings, use_sd=True, nsubs=settings.n_subgrid, nprocs=1
        )

        # -----------------
        # 8. make script to trim models
        # -----------------

        print("")
        print("setting up script to trim models")
        print("")

        job_file_list = make_trim_scripts.make_trim_scripts(
            settings, num_subtrim=1, prefix='source activate b13'
        )

        if len(job_file_list) > 0:
            print('\n**** go run trimming code for '+field_names[b]+'! ****')
            print('Here are the command(s) to run:')
            for job in job_file_list:
                print('at -f '+job+' now')
            return
        else:
            print('all files are trimmed for '+field_names[b])



        # -----------------
        # 9. make script to fit models
        # -----------------

        print("")
        print("setting up script to fit models")
        print("")

        fit_run_info = setup_batch_beast_fit.setup_batch_beast_fit(
            settings,
            num_percore=1,
            nice=19,
            overwrite_logfile=False,
            prefix="source activate b13",
            use_sd=True,
            nsubs=settings.n_subgrid,
            nprocs=1,
        )

        # check if the fits exist before moving on
        tot_remaining = len(fit_run_info["done"]) - np.sum(fit_run_info["done"])
        if tot_remaining > 0:
            print("\n**** go run fitting code for " + field_names[b] + "! ****")
            print(
                "Here are the "
                + str(len(fit_run_info["files_to_run"]))
                + " commands to run:"
            )
            for job_file in fit_run_info["files_to_run"]:
                print("at -f ./" + job_file + " now")
            continue
        else:
            print("all fits are complete for " + field_names[b])

        # -----------------
        # 10. merge stats files from each fit
        # -----------------

        print("")
        print("merging stats files")
        print("")

        merge_files.merge_files(settings, use_sd=True, nsubs=settings.n_subgrid)
Exemplo n.º 6
0
def plot_toothpick_details(asts_filename, settings_filename, savefig=False):
    """
    Plot the details of the toothpick noisemodel creation for each filter.
    These plots show the individual AST results as points as
    (flux_in - flux_out)/flux_in.  In addition, the binned values of these
    points are plotted giving the bias term in the observation model.
    Error bars around the binned bias values give the binned sigma term of
    the observation model.  Finally, as a separate column of plots the
    binned completeness in each filter is plotted.

    Parameters
    ----------
    asts_filename : str
        filename with the AST results

    settings_filename : str
        filename with the SED grid (used just for the filter information)

    savefig : str (default=False)
        to save the figure, set this to the file extension (e.g., 'png', 'pdf')
    """
    settings = beast_settings.beast_settings(settings_filename)

    # read in AST results
    model = toothpick.MultiFilterASTs(asts_filename, settings.filters)

    # set the column mappings as the external file is BAND_VEGA or BAND_IN
    model.set_data_mappings(upcase=True,
                            in_pair=("in", "in"),
                            out_pair=("out", "rate"))

    # compute binned biases, uncertainties, and completeness as a function of band flux
    ast_nonrecovered_ratio = 2.0
    model.fit_bins(
        nbins=50,
        ast_nonrecovered_ratio=ast_nonrecovered_ratio,
    )

    nfilters = len(settings.filters)
    figsize_y = nfilters * 3
    fig, ax = plt.subplots(nrows=nfilters,
                           ncols=2,
                           figsize=(14, figsize_y),
                           sharex=True)
    set_params()

    for i, cfilter in enumerate(settings.filters):
        mag_in = model.data[model.filter_aliases[cfilter + "_in"]]
        flux_out = model.data[model.filter_aliases[cfilter + "_out"]]

        flux_in = (10**(-0.4 * mag_in)) * model.vega_flux[i]
        flux_out *= model.vega_flux[i]

        gvals = flux_out != 0.0

        ax[i, 0].plot(
            flux_in[gvals],
            flux_out[gvals] / flux_in[gvals],
            "ko",
            alpha=0.1,
            markersize=2,
        )

        # not all bins are filled with good data
        ngbins = model._nasts[i]

        ax[i, 0].plot(
            model._fluxes[0:ngbins, i],
            1. + model._biases[0:ngbins, i] / model._fluxes[0:ngbins, i],
            "b-",
        )

        ax[i, 0].errorbar(
            model._fluxes[0:ngbins, i],
            1. + model._biases[0:ngbins, i] / model._fluxes[0:ngbins, i],
            yerr=model._sigmas[0:ngbins, i] / model._fluxes[0:ngbins, i],
            fmt="bo",
            markersize=2,
            alpha=0.5,
        )

        if ast_nonrecovered_ratio is not None:
            ax[i, 0].axhline(ast_nonrecovered_ratio,
                             linestyle="--",
                             alpha=0.25,
                             color="k")

        ax[i, 0].set_ylim(-10, 2.5)
        ax[i, 0].set_ylabel(r"$F_o/F_i$")

        ax[i, 1].plot(
            model._fluxes[0:ngbins, i],
            model._compls[0:ngbins, i],
            "b-",
        )

        ax[i, 1].yaxis.tick_right()
        ax[i, 1].yaxis.set_label_position("right")
        ax[i, 1].set_ylim(0, 1)
        ax[i, 1].set_xscale("log")
        sfilt = cfilter.split("_")[-1]
        ax[i, 1].set_ylabel(f"C({sfilt})")

    ax[nfilters - 1, 0].set_xlabel(r"$F_i$")
    ax[nfilters - 1, 1].set_xlabel(r"$F_i$")

    # add in the zero line
    # do after all the data has been plotted to get the full x range
    pxrange = ax[0, 0].get_xlim()
    for i, cfilter in enumerate(settings.filters):
        ax[i, 0].plot(pxrange, [1.0, 1.0], "k--", alpha=0.5)

    # figname
    basename = asts_filename.replace(".fits", "_plot")

    fig.tight_layout()

    # save or show fig
    if savefig:
        fig.savefig("{}.{}".format(basename, savefig))
    else:
        plt.show()
Exemplo n.º 7
0
def split_create_physicsmodel(beast_settings_info, nsubs=1, nprocs=1):
    """
    Making the physics model grid takes a while for production runs.  This
    creates scripts to run each subgrid as a separate job.

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    nsubs : int (default=1)
        number of subgrids to split the physics model into

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # make sure the project directory exists
    create_project_dir(settings.project)

    # directory for scripts
    job_path = "./{0}/model_batch_jobs/".format(settings.project)
    if not os.path.isdir(job_path):
        os.mkdir(job_path)

    log_path = job_path + "logs/"
    if not os.path.isdir(log_path):
        os.mkdir(log_path)

    for i in range(nsubs):

        joblist_file = job_path + "create_physicsmodel_" + str(i) + ".job"
        with open(joblist_file, "w") as jf:

            jf.write(
                "python -m beast.tools.run.create_physicsmodel "
                + " {0} ".format(settings.settings_file)
                + " --nsubs "
                + str(nsubs)
                + " --nprocs "
                + str(nprocs)
                + " --subset "
                + str(i)
                + " "
                + str(i + 1)
                + " >> "
                + log_path
                + "create_physicsmodel_"
                + str(i)
                + ".log\n"
            )

        # slurm needs it to be executable
        os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH)
Exemplo n.º 8
0
def merge_files(beast_settings_info, use_sd=True, nsubs=1, partial=False):
    """
    Merge all of the results from the assorted fitting sub-files (divided by
    source density, subgrids, or both).

    If fitting is in progress but you want to check results of completed stars,
    set partial=True.  This is only relevant when using subgrids.

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    use_sd : boolean (default=True)
        set to True if the fitting used source density bins

    nsubs : int (default=1)
        number of subgrids used for the physics model

    partial : boolean (default=False)
        If True, the output merged files will only have stars that have been
        run across all subgrids.  If stars have only been fit in some subgrids
        and not others, they will be discarded in the "partial" output files.
        Currently only implemented for 1D PDFs and stats (not lnP) files.

    """

    # if there's no SD and no subgridding, running this is unnecessary
    if (not use_sd) and (nsubs == 1):
        print("No merging necessary")
        return

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(settings,
                                                  use_sd=use_sd,
                                                  nsubs=nsubs)

    # - input files
    # photometry_files = file_dict['photometry_files']
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    lnp_files = file_dict["lnp_files"]

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']
    # the unique sets of gridsub
    unique_sd_sub = [
        x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
    ]

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        out_filebase = "{0}/{0}".format(settings.project)
        reorder_tags = [
            "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
        ]
        merge_beast_stats.merge_stats_files(stats_files,
                                            out_filebase,
                                            reorder_tag_list=reorder_tags)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # runs were split by source density
        if use_sd:

            # lists to save the merged file names
            merged_pdf_files = []
            merged_stats_files = []
            merged_lnp_files = []

            for sd_sub in unique_sd_sub:

                # indices with the current sd_sub
                ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub]

                # merge the subgrid files for that SD+sub
                out_filebase = "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}".format(
                    settings.project, sd_sub[0], sd_sub[1])
                if partial:
                    out_filebase += "_partial"

                # - 1D PDFs and stats
                (
                    merged_pdf1d_fname,
                    merged_stats_fname,
                ) = subgridding_tools.merge_pdf1d_stats(
                    [pdf_files[j] for j in ind],
                    [stats_files[j] for j in ind],
                    re_run=False,
                    output_fname_base=out_filebase,
                    partial=partial,
                )

                merged_pdf_files.append(merged_pdf1d_fname)
                merged_stats_files.append(merged_stats_fname)

                # - lnP files
                if not partial:
                    merged_lnp_fname = subgridding_tools.merge_lnp(
                        [lnp_files[j] for j in ind],
                        re_run=False,
                        output_fname_base=out_filebase,
                        threshold=-10,
                    )
                    merged_lnp_files.append(merged_lnp_fname)

            # merge the merged stats files
            out_filebase = "{0}/{0}".format(settings.project)
            reorder_tags = [
                "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
            ]
            merge_beast_stats.merge_stats_files(merged_stats_files,
                                                out_filebase,
                                                reorder_tag_list=reorder_tags)

        # runs weren't split by source density
        else:

            out_filebase = "{0}/{0}".format(settings.project)

            # - 1D PDFs and stats
            subgridding_tools.merge_pdf1d_stats(
                pdf_files,
                stats_files,
                output_fname_base=out_filebase,
                partial=partial,
            )

            # - lnP files
            if not partial:
                subgridding_tools.merge_lnp(
                    lnp_files,
                    re_run=False,
                    output_fname_base=out_filebase,
                    threshold=-10,
                )
def generate_files_for_tests(run_beast=True, run_tools=True):
    """
    Use the metal_small example to generate a full set of files for the BEAST
    regression tests.

    Parameters
    ----------
    run_beast : boolean (default=True)
        if True, run the BEAST

    run_tools : boolean (default=True)
        if True, run the code to generate things for tools
    """

    # read in BEAST settings
    settings_orig = beast_settings.beast_settings("beast_settings.txt")
    # also make a version with subgrids
    settings_subgrids = copy.deepcopy(settings_orig)
    settings_subgrids.n_subgrid = 2
    settings_subgrids.project = f"{settings_orig.project}_subgrids"

    # ==========================================
    # run the beast for each set of settings
    # ==========================================

    if run_beast:

        for settings in [settings_orig, settings_subgrids]:

            # -----------------
            # physics model
            # -----------------
            create_physicsmodel.create_physicsmodel(
                settings,
                nsubs=settings.n_subgrid,
                nprocs=1,
            )

            # -----------------
            # ASTs
            # -----------------

            # currently only works for no subgrids
            if settings.n_subgrid == 1:
                make_ast_inputs.make_ast_inputs(settings,
                                                pick_method="flux_bin_method")

            # -----------------
            # obs model
            # -----------------
            create_obsmodel.create_obsmodel(
                settings,
                use_sd=False,
                nsubs=settings.n_subgrid,
                nprocs=1,
                use_rate=True,
            )

            # -----------------
            # trimming
            # -----------------

            # make file names
            file_dict = create_filenames.create_filenames(
                settings, use_sd=False, nsubs=settings.n_subgrid)

            # read in the observed data
            obsdata = Observations(settings.obsfile, settings.filters,
                                   settings.obs_colnames)

            for i in range(settings.n_subgrid):

                # get the modesedgrid on which to generate the noisemodel
                modelsedgridfile = file_dict["modelsedgrid_files"][i]
                modelsedgrid = SEDGrid(modelsedgridfile)

                # read in the noise model just created
                noisemodel_vals = noisemodel.get_noisemodelcat(
                    file_dict["noise_files"][i])

                # trim the model sedgrid
                sed_trimname = file_dict["modelsedgrid_trim_files"][i]
                noisemodel_trimname = file_dict["noise_trim_files"][i]

                trim_grid.trim_models(
                    modelsedgrid,
                    noisemodel_vals,
                    obsdata,
                    sed_trimname,
                    noisemodel_trimname,
                    sigma_fac=3.0,
                )

            # -----------------
            # fitting
            # -----------------

            run_fitting.run_fitting(
                settings,
                use_sd=False,
                nsubs=settings.n_subgrid,
                nprocs=1,
                pdf2d_param_list=["Av", "M_ini", "logT"],
                pdf_max_nbins=200,
            )

            # -----------------
            # merging
            # -----------------

            # it'll automatically skip for no subgrids
            merge_files.merge_files(settings,
                                    use_sd=False,
                                    nsubs=settings.n_subgrid)

            print("\n\n")

    # ==========================================
    # reference files for assorted tools
    # ==========================================

    if run_tools:

        # -----------------
        # compare_spec_type
        # -----------------

        # the input settings
        input = {
            "spec_ra": [72.67213351],
            "spec_dec": [-67.71720515],
            "spec_type": ["A"],
            "spec_subtype": [0],
            "lumin_class": ["IV"],
            "match_radius": 0.2,
        }

        # run it
        output = compare_spec_type.compare_spec_type(
            settings_orig.obsfile,
            "{0}/{0}_stats.fits".format(settings_orig.project),
            **input,
        )

        # save the inputs and outputs
        asdf.AsdfFile({
            "input": input,
            "output": output
        }).write_to("{0}/{0}_compare_spec_type.asdf".format(
            settings_orig.project))

        # -----------------
        # star_type_probability
        # -----------------

        # input settings
        input = {
            "output_filebase": None,
            "ext_O_star_params": {
                "min_M_ini": 10,
                "min_Av": 0.5,
                "max_Av": 5
            },
        }

        # run it
        output = star_type_probability.star_type_probability(
            "{0}/{0}_pdf1d.fits".format(settings_orig.project),
            "{0}/{0}_pdf2d.fits".format(settings_orig.project),
            **input,
        )

        # save the inputs and outputs
        asdf.AsdfFile({
            "input": input,
            "output": output
        }).write_to("{0}/{0}_star_type_probability.asdf".format(
            settings_orig.project))

    # ==========================================
    # asdf file permissions
    # ==========================================

    # for unknown reasons, asdf currently writes files with permissions set
    # to -rw-------.  This changes it to -rw-r--r-- (like the rest of the
    # BEAST files) so Karl can easily copy them over to the cached file
    # website.

    # list of asdf files
    asdf_files = glob.glob("*/*.asdf")
    # go through each one to change permissions
    for fname in asdf_files:
        os.chmod(fname,
                 stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
Exemplo n.º 10
0
def create_filenames(
    beast_settings_info,
    use_sd=True,
    nsubs=1,
    choose_sd_sub=None,
    choose_subgrid=None,
):
    """
    Helper function to make all of the filenames.  SED grid and noise model
    are trimmed versions.

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to settings.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    choose_sd_sub : list of two strings (default=None)
        If this is set, the fitting will just be for this combo of SD+sub,
        rather than all of them.  Overrides use_sd.
        format of the list: ['#','#']

    choose_subgrid : int (default=None)
        If this is set, the fitting with just be for this subgrid index.
        If nsubs=1, this is ignored.

    Returns
    -------
    dictionary with the lists of filenames, plus the corresponding SD+sub and
    gridsub values for easy referencing

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # input files
    photometry_files = []
    modelsedgrid_files = []
    modelsedgrid_trim_files = []
    noise_files = []
    noise_trim_files = []

    # output files
    stats_files = []
    pdf_files = []
    pdf2d_files = []
    lnp_files = []

    # other potentially useful things
    sd_sub_info = []
    gridsub_info = []

    # ** no subgrids **

    if nsubs == 1:

        # -- SD+sub specified
        if choose_sd_sub is not None:

            photometry_files.append(
                settings.obsfile.replace(
                    ".fits",
                    "_bin{0}_sub{1}.fits".format(choose_sd_sub[0],
                                                 choose_sd_sub[1]),
                ))
            modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                settings.project))
            modelsedgrid_trim_files.append(
                "{0}/{0}_bin{1}_sub{2}_seds_trim.grid.hd5".format(
                    settings.project, choose_sd_sub[0], choose_sd_sub[1]))
            noise_files.append("{0}/{0}_noisemodel_bin{1}.grid.hd5".format(
                settings.project, choose_sd_sub[0]))
            noise_trim_files.append(
                "{0}/{0}_bin{1}_sub{2}_noisemodel_trim.grid.hd5".format(
                    settings.project, choose_sd_sub[0], choose_sd_sub[1]))

            stats_files.append("{0}/{0}_bin{1}_sub{2}_stats.fits".format(
                settings.project, choose_sd_sub[0], choose_sd_sub[1]))
            pdf_files.append("{0}/{0}_bin{1}_sub{2}_pdf1d.fits".format(
                settings.project, choose_sd_sub[0], choose_sd_sub[1]))
            pdf2d_files.append("{0}/{0}_bin{1}_sub{2}_pdf2d.fits".format(
                settings.project, choose_sd_sub[0], choose_sd_sub[1]))
            lnp_files.append("{0}/{0}_bin{1}_sub{2}_lnp.hd5".format(
                settings.project, choose_sd_sub[0], choose_sd_sub[1]))

            sd_sub_info.append([choose_sd_sub[0], choose_sd_sub[1]])

        # -- using source density info
        elif use_sd is True:

            photometry_files = sorted(
                glob.glob(settings.obsfile.replace(".fits",
                                                   "_bin*_sub*.fits")))

            for phot_file in photometry_files:
                # get the sd/sub number
                dpos = phot_file.rfind("_bin")
                spos = phot_file.rfind("sub")
                ppos = phot_file.rfind(".")
                curr_sd = phot_file[dpos + 4:spos - 1]
                curr_sub = phot_file[spos + 3:ppos]

                # construct other file names
                modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                    settings.project))
                modelsedgrid_trim_files.append(
                    "{0}/{0}_bin{1}_sub{2}_seds_trim.grid.hd5".format(
                        settings.project, curr_sd, curr_sub))
                noise_files.append("{0}/{0}_noisemodel_bin{1}.grid.hd5".format(
                    settings.project, curr_sd))
                noise_trim_files.append(
                    "{0}/{0}_bin{1}_sub{2}_noisemodel_trim.grid.hd5".format(
                        settings.project, curr_sd, curr_sub))

                stats_files.append("{0}/{0}_bin{1}_sub{2}_stats.fits".format(
                    settings.project, curr_sd, curr_sub))
                pdf_files.append("{0}/{0}_bin{1}_sub{2}_pdf1d.fits".format(
                    settings.project, curr_sd, curr_sub))
                pdf2d_files.append("{0}/{0}_bin{1}_sub{2}_pdf2d.fits".format(
                    settings.project, curr_sd, curr_sub))
                lnp_files.append("{0}/{0}_bin{1}_sub{2}_lnp.hd5".format(
                    settings.project, curr_sd, curr_sub))

                sd_sub_info.append([curr_sd, curr_sub])

        # -- no source density splitting
        else:

            photometry_files.append(settings.obsfile)
            modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                settings.project))
            modelsedgrid_trim_files.append("{0}/{0}_seds_trim.grid.hd5".format(
                settings.project))
            noise_files.append("{0}/{0}_noisemodel.grid.hd5".format(
                settings.project))
            noise_trim_files.append("{0}/{0}_noisemodel_trim.grid.hd5".format(
                settings.project))

            stats_files.append("{0}/{0}_stats.fits".format(settings.project))
            pdf_files.append("{0}/{0}_pdf1d.fits".format(settings.project))
            pdf2d_files.append("{0}/{0}_pdf2d.fits".format(settings.project))
            lnp_files.append("{0}/{0}_lnp.hd5".format(settings.project))

    # ** with subgrids **

    # subgrids require a pickle file with grid info
    gridpickle_files = []

    if nsubs > 1:

        # start with getting the model grid files (note these aren't trimmed ones)
        outdir = os.path.join(".", settings.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")
        temp = get_modelsubgridfiles(subgrid_names_file)
        # use that to get the number of subgrids and make a list of them
        gridsub_list = np.arange(len(temp))
        # or a subset if set
        if choose_subgrid is not None:
            gridsub_list = [choose_subgrid]

        # -- SD+sub specified
        if choose_sd_sub is not None:

            for gridsub in gridsub_list:

                photometry_files.append(
                    settings.obsfile.replace(
                        ".fits",
                        "_bin{0}_sub{1}.fits".format(choose_sd_sub[0],
                                                     choose_sd_sub[1]),
                    ))

                modelsedgrid_files.append("{0}/{0}_seds.gridsub{1}.hd5".format(
                    settings.project, gridsub))
                modelsedgrid_trim_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_seds_trim.grid.hd5"
                    .format(settings.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                noise_files.append(
                    "{0}/{0}_noisemodel_bin{1}.gridsub{2}.hd5".format(
                        settings.project, choose_sd_sub[0], gridsub))
                noise_trim_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_noisemodel_trim.grid.hd5"
                    .format(settings.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))

                stats_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_stats.fits"
                    .format(settings.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                pdf_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf1d.fits"
                    .format(settings.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                pdf2d_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf2d.fits"
                    .format(settings.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                lnp_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_lnp.hd5".
                    format(settings.project, choose_sd_sub[0],
                           choose_sd_sub[1], gridsub))

                gridpickle_files.append(
                    "{0}/bin{1}_sub{2}/grid_info_dict.pkl".format(
                        settings.project, choose_sd_sub[0], choose_sd_sub[1]))

                sd_sub_info.append([choose_sd_sub[0], choose_sd_sub[1]])
                gridsub_info.append(gridsub)

        # -- using source density info
        elif use_sd is True:

            phot_file_list = sorted(
                glob.glob(settings.obsfile.replace(".fits",
                                                   "_bin*_sub*.fits")))

            for phot_file in phot_file_list:
                # get the sd/sub number
                dpos = phot_file.rfind("_bin")
                spos = phot_file.rfind("sub")
                ppos = phot_file.rfind(".")
                curr_sd = phot_file[dpos + 4:spos - 1]
                curr_sub = phot_file[spos + 3:ppos]

                # construct other file names
                for gridsub in gridsub_list:
                    photometry_files.append(phot_file)
                    modelsedgrid_files.append(
                        "{0}/{0}_seds.gridsub{1}.hd5".format(
                            settings.project, gridsub))
                    modelsedgrid_trim_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_seds_trim.grid.hd5"
                        .format(settings.project, curr_sd, curr_sub, gridsub))
                    noise_files.append(
                        "{0}/{0}_noisemodel_bin{1}.gridsub{2}.hd5".format(
                            settings.project, curr_sd, gridsub))
                    noise_trim_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_noisemodel_trim.grid.hd5"
                        .format(settings.project, curr_sd, curr_sub, gridsub))

                    stats_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_stats.fits"
                        .format(settings.project, curr_sd, curr_sub, gridsub))
                    pdf_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf1d.fits"
                        .format(settings.project, curr_sd, curr_sub, gridsub))
                    pdf2d_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf2d.fits"
                        .format(settings.project, curr_sd, curr_sub, gridsub))
                    lnp_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_lnp.hd5"
                        .format(settings.project, curr_sd, curr_sub, gridsub))

                    gridpickle_files.append(
                        "{0}/bin{1}_sub{2}/grid_info_dict.pkl".format(
                            settings.project, curr_sd, curr_sub))

                    sd_sub_info.append([curr_sd, curr_sub])
                    gridsub_info.append(gridsub)

        # -- no source density splitting
        else:

            for gridsub in gridsub_list:
                photometry_files.append(settings.obsfile)
                modelsedgrid_files.append("{0}/{0}_seds.gridsub{1}.hd5".format(
                    settings.project, gridsub))
                modelsedgrid_trim_files.append(
                    "{0}/{0}_gridsub{1}_seds_trim.grid.hd5".format(
                        settings.project, gridsub))
                noise_files.append("{0}/{0}_noisemodel.gridsub{1}.hd5".format(
                    settings.project, gridsub))
                noise_trim_files.append(
                    "{0}/{0}_gridsub{1}_noisemodel_trim.grid.hd5".format(
                        settings.project, gridsub))

                stats_files.append("{0}/{0}_gridsub{1}_stats.fits".format(
                    settings.project, gridsub))
                pdf_files.append("{0}/{0}_gridsub{1}_pdf1d.fits".format(
                    settings.project, gridsub))
                pdf2d_files.append("{0}/{0}_gridsub{1}_pdf2d.fits".format(
                    settings.project, gridsub))
                lnp_files.append("{0}/{0}_gridsub{1}_lnp.hd5".format(
                    settings.project, gridsub))

                gridpickle_files.append("{0}/grid_info_dict.pkl".format(
                    settings.project))

                gridsub_info.append(gridsub)

    # double check that all file lists are the same length
    n_file_list = [
        len(x) for x in [
            photometry_files,
            modelsedgrid_files,
            modelsedgrid_trim_files,
            noise_files,
            noise_trim_files,
            stats_files,
            pdf_files,
            pdf2d_files,
            lnp_files,
        ]
    ]
    if len(np.unique(n_file_list)) > 1:
        print("file list lengths don't match!")
        return None

    return {
        "photometry_files": photometry_files,
        "modelsedgrid_files": modelsedgrid_files,
        "modelsedgrid_trim_files": modelsedgrid_trim_files,
        "noise_files": noise_files,
        "noise_trim_files": noise_trim_files,
        "stats_files": stats_files,
        "pdf_files": pdf_files,
        "pdf2d_files": pdf2d_files,
        "lnp_files": lnp_files,
        "gridpickle_files": gridpickle_files,
        "sd_sub_info": sd_sub_info,
        "gridsub_info": gridsub_info,
    }
Exemplo n.º 11
0
def run_fitting(
    beast_settings_info,
    use_sd=True,
    nsubs=1,
    nprocs=1,
    choose_sd_sub=None,
    choose_subgrid=None,
    pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'],
    pdf_max_nbins=200,
    resume=False,
):
    """
    Run the fitting.  If nsubs > 1, this will find existing subgrids.
    If use_sd is True, will also incorporate source density info.

    The additional choose_* options are to make queue scripts usable,
    by specifying a given SD+sub and/or subgrid for the fitting run.


    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to settings.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    choose_sd_sub : list of two strings (default=None)
        If this is set, the fitting will just be for this combo of SD+sub,
        rather than all of them.  Overrides use_sd.
        format of the list: ['#','#']

    choose_subgrid : int (default=None)
        If this is set, the fitting with just be for this subgrid index.
        If nsubs=1, this is ignored.

    pdf2d_param_list : list of strings or None
        If set, do 2D PDFs of these parameters.  If None, don't make 2D PDFs.

    pdf_max_nbins : int (default=100)
        Maxiumum number of bins to use for the 1D and 2D PDFs

    resume : boolean (default=False)
        choose whether to resume existing run or start over

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # keep track of time
    start_time = time.perf_counter()

    # --------------------
    # make lists of file names
    # --------------------

    file_dict = create_filenames.create_filenames(
        settings,
        use_sd=use_sd,
        nsubs=nsubs,
        choose_sd_sub=choose_sd_sub,
        choose_subgrid=choose_subgrid,
    )

    # input files
    photometry_files = file_dict["photometry_files"]
    # modelsedgrid_files = file_dict["modelsedgrid_files"]
    modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"]
    # noise_files = file_dict["noise_files"]
    noise_trim_files = file_dict["noise_trim_files"]

    # output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    pdf2d_files = file_dict["pdf2d_files"]
    if pdf2d_param_list is None:
        pdf2d_files = [None for i in range(len(pdf2d_files))]
    lnp_files = file_dict["lnp_files"]

    # total number of files
    n_files = len(photometry_files)

    # other potentially useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']

    # if using subgrids, make the grid dictionary file:
    # File where the ranges and number of unique values for the grid
    # will be stored (this can take a while to calculate)

    if nsubs > 1:

        gridpickle_files = file_dict["gridpickle_files"]

        for i in range(len(gridpickle_files)):
            if not os.path.isfile(gridpickle_files[i]):

                # list of corresponding SED grids and noise models

                # - with SD+sub: get file list for ALL subgrids at current SD+sub
                if use_sd or (choose_sd_sub is not None):
                    temp = create_filenames.create_filenames(
                        settings,
                        nsubs=nsubs,
                        choose_sd_sub=sd_sub_info[i],
                        choose_subgrid=None)
                    modelsedgrid_trim_list = temp["modelsedgrid_trim_files"]
                    noise_trim_list = temp["noise_trim_files"]

                # - no SD info: get file list for ALL subgrids
                else:
                    temp = create_filenames.create_filenames(
                        settings,
                        use_sd=False,
                        nsubs=nsubs,
                        choose_subgrid=None)
                    modelsedgrid_trim_list = temp["modelsedgrid_trim_files"]
                    noise_trim_list = temp["noise_trim_files"]

                # create the grid info dictionary
                print("creating grid_info_dict for " + gridpickle_files[i])
                grid_info_dict = subgridding_tools.reduce_grid_info(
                    modelsedgrid_trim_list, noise_trim_list, nprocs=nprocs)
                # save it
                with open(gridpickle_files[i], "wb") as p:
                    pickle.dump(grid_info_dict, p)
                print("wrote grid_info_dict to " + gridpickle_files[i])

    # --------------------
    # do the fitting!
    # --------------------

    # set up function inputs

    if nsubs == 1:

        input_list = [(
            settings,
            photometry_files[i],
            modelsedgrid_trim_files[i],
            noise_trim_files[i],
            pdf_max_nbins,
            stats_files[i],
            pdf_files[i],
            pdf2d_files[i],
            pdf2d_param_list,
            lnp_files[i],
            None,
            resume,
        ) for i in range(n_files)]

    if nsubs > 1:

        input_list = [(
            settings,
            photometry_files[i],
            modelsedgrid_trim_files[i],
            noise_trim_files[i],
            pdf_max_nbins,
            stats_files[i],
            pdf_files[i],
            pdf2d_files[i],
            pdf2d_param_list,
            lnp_files[i],
            gridpickle_files[i],
            resume,
        ) for i in range(n_files)]

    # run the fitting (via parallel wrapper)

    parallel_wrapper(fit_submodel, input_list, nprocs=nprocs)

    # see how long it took!
    new_time = time.perf_counter()
    print("time to fit: ", (new_time - start_time) / 60.0, " min")
Exemplo n.º 12
0
def create_obsmodel(
    beast_settings_info,
    use_sd=True,
    nsubs=1,
    nprocs=1,
    subset=[None, None],
):
    """
    Create the observation models.  If nsubs > 1, this will find existing
    subgrids.  If use_sd is True, will also incorporate source density
    info.


    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to settings.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    subset : list of two ints (default=[None,None])
        Only process subgrids in the range [start,stop].
        (only relevant if nsubs > 1)

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # --------------------
    # figure out if there are source density bins
    # --------------------

    ast_file_list = sorted(
        glob.glob(settings.astfile.replace(".fits", "*_bin*")))

    if use_sd and (len(ast_file_list) > 0):

        sd_list = []
        for ast_file in ast_file_list:
            dpos = ast_file.rfind("_bin")
            ppos = ast_file.rfind(".")
            sd_list.append(ast_file[dpos + 4:ppos])
        print("sd list: ", sd_list)

    else:
        # if there are no ASTs with source densities, the flag should be "false"
        use_sd = False

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        modelsedgridfile = "{0}/{0}_seds.grid.hd5".format(settings.project)

        # if we're splitting by source density
        if use_sd:

            input_list = [(settings, modelsedgridfile, curr_sd)
                          for curr_sd in sd_list]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

        # if we're not splitting by source density
        else:

            input_list = [(settings, modelsedgridfile, None)]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # get the list of physics model files
        outdir = os.path.join(".", settings.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")
        modelsedgridfiles = get_modelsubgridfiles(subgrid_names_file)[slice(
            subset[0], subset[1])]

        # if we're splitting by source density
        if use_sd:

            input_list = [(settings, sedfile, curr_sd)
                          for sedfile in modelsedgridfiles
                          for curr_sd in sd_list]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

        # if we're not splitting by source density
        else:

            input_list = [(settings, sedfile, None)
                          for sedfile in modelsedgridfiles]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)
def plot_source_density_map(sd_image_file, beast_settings_file):
    """
    Make a plot of the source density.
    
    The spatial bins are split into 5 arcsec^2. 
    Source density bins are determined by the specified beast settings file.
    The histogram bins are set by the bins originally used to create the ASTs
    (using the flux bin method), which are saved in
    ast_file.replace('inputAST','ASTfluxbins')
    and are automatically read in.

    Output plot is saved in the same location/name as image file, but with a .png
    instead of .fits.

    Parameters
    ----------
    sd_image_file : string
        name of SD image FITS file
 
    beast_settings_file : string
        name of beast settings .txt file


    """

    image_file = fits.open(sd_image_file)
    image_file.info()

    # assuming the image data is first
    image_data = image_file[0].data

    image_file.close()

    # read in beast settings file
    settings = beast_settings.beast_settings(beast_settings_file)

    if settings.sd_binmode == "custom":
        sd_bins = settings.sd_custom

    # throw error if binning isn't custom
    else:
        raise Exception(
            'Expected custom binning. Please ensure the right beast settings file is specified.'
        )

    # define colormap
    cmap = plt.cm.viridis

    # extract all colors from the .jet map
    cmaplist = [cmap(i) for i in range(cmap.N)]

    # create the new map
    cmap = mpl.colors.LinearSegmentedColormap.from_list(
        'Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    norm = mpl.colors.BoundaryNorm(sd_bins, cmap.N)

    fig = plt.figure(0, [10, 10])
    im = plt.imshow(image_data, origin="lower", cmap=cmap, norm=norm)

    plt.colorbar(im, fraction=0.046, pad=0.04, ticks=sd_bins)

    plt.xlabel("Pixel (originally RA)")
    plt.ylabel("Pixel (originally DEC)")
    plt.title(r"Density of Sources per 5 arcsec$^2$")

    plt.tight_layout()

    fig.savefig(sd_image_file.replace("_image.fits", "_map_plot.png"))
    plt.close(fig)
Exemplo n.º 14
0
def setup_batch_beast_fit(
    beast_settings_info,
    num_percore=5,
    nice=None,
    overwrite_logfile=True,
    prefix=None,
    use_sd=True,
    pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'],
    nsubs=1,
    nprocs=1,
):
    """
    Sets up batch files for submission to the 'at' queue on
    linux (or similar) systems

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    num_percore : int (default = 5)
        number of fitting runs per core

    nice : int (default = None)
        set this to an integer (-20 to 20) to prepend a "nice" level
        to the fitting command

    overwrite_logfile : boolean (default = True)
        if True, will overwrite the log file; if False, will append to
        existing log file

    prefix : string (default=None)
        Set this to a string (such as 'source activate astroconda') to prepend
        to each batch file (use '\n's to make multiple lines)

    use_sd : boolean (default=True)
        If True, split runs based on source density (determined by finding
        matches to settings.astfile with SD info)

    pdf2d_param_list : list of strings or None
        If set, do 2D PDFs of these parameters.  If None, don't make 2D PDFs.

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use when doing the fitting
        (currently only implemented for subgrids)


    Returns
    -------
    run_info_dict : dict
        Dictionary indicating which catalog files have complete modeling, and
        which job files need to be run

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # setup the subdirectory for the batch and log files
    job_path = settings.project + "/fit_batch_jobs/"
    if not os.path.isdir(job_path):
        os.mkdir(job_path)

    log_path = job_path + "logs/"
    if not os.path.isdir(log_path):
        os.mkdir(log_path)

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(settings,
                                                  use_sd=use_sd,
                                                  nsubs=nsubs)

    # - input files
    photometry_files = file_dict["photometry_files"]
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    lnp_files = file_dict["lnp_files"]

    # - total number of files
    n_files = len(photometry_files)

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    gridsub_info = file_dict["gridsub_info"]

    # names of output log files
    log_files = []

    # initialize a variable name (otherwise it got auto-added in the wrong
    # place and broke the code)
    pf = None

    for i in range(n_files):

        sd_piece = ""
        if use_sd is True:
            sd_piece = "_bin" + sd_sub_info[i][0] + "_sub" + sd_sub_info[i][1]

        gridsub_piece = ""
        if nsubs > 1:
            gridsub_piece = "_gridsub" + str(gridsub_info[i])

        log_files.append("beast_fit" + sd_piece + gridsub_piece + ".log")

    # start making the job files!

    pf_open = False
    cur_f = 0
    cur_total_size = 0.0
    j = -1

    # keep track of which files are done running
    run_info_dict = {
        "phot_file": photometry_files,
        "done": np.full(n_files, False),
        "files_to_run": [],
    }

    for i, phot_file in enumerate(photometry_files):

        print("")

        # check if this is a full run
        reg_run = False
        run_done = False
        if not os.path.isfile(stats_files[i]):
            reg_run = True
            print("no stats file")
        if not os.path.isfile(pdf_files[i]):
            reg_run = True
            print("no pdf1d file")
        if not os.path.isfile(lnp_files[i]):
            reg_run = True
            print("no lnp file")

        # first check if the pdf1d mass spacing is correct
        if not reg_run:
            hdulist = fits.open(pdf_files[i])
            delta1 = hdulist["M_ini"].data[-1, 1] - hdulist["M_ini"].data[-1,
                                                                          0]
            if delta1 > 1.0:  # old linear spacing
                print("pdf1d lin mass spacing - full refitting needed")
                old_mass_spacing = True
            else:
                old_mass_spacing = False
                print("pdf1d log mass spacing - ok")

            if old_mass_spacing:
                run_done = False
                reg_run = True

        # now check if the number of results is the same as
        #    the number of observations
        if not reg_run:
            # get the observed catalog
            obs = Table.read(phot_file)

            # get the fit results catalog
            t = Table.read(stats_files[i], hdu=1)
            # get the number of stars that have been fit
            (indxs, ) = np.where(t["Pmax"] != 0.0)

            # get the number of entries in the lnp file
            f = tables.open_file(lnp_files[i], "r")
            nlnp = f.root._v_nchildren - 2
            f.close()

            print("# obs, stats, lnp = ", len(obs), len(indxs), nlnp)
            if (len(indxs) == len(obs)) & (nlnp == len(obs)):

                # final check, is the pdf1d file correctly populated
                tot_prob = np.sum(hdulist["M_ini"].data, axis=1)
                (tindxs, ) = np.where(tot_prob > 0.0)
                print("# good pdf1d = ", len(tindxs) - 1)
                if len(tindxs) == (len(obs) + 1):
                    run_done = True

        if run_done:
            print(stats_files[i] + " done")
            run_info_dict["done"][i] = True
        else:
            j += 1
            if j % num_percore == 0:
                cur_f += 1

                # close previous files
                if j != 0:
                    pf.close()
                    # slurm needs the job file to be executable
                    #   flake8/codestyle error ignored as this if statement only executed
                    #   for j > 0 and appropriate joblist_file defined in j - 1
                    os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP
                             | stat.S_IROTH)  # noqa: F821

                    print(
                        "total sed_trim size [Gb] = ",
                        cur_total_size / (1024.0 * 1024.0 * 1024.0),
                    )
                    cur_total_size = 0.0

                # open the slurm and param files
                pf_open = True
                joblist_file = job_path + "beast_batch_fit_" + str(
                    cur_f) + ".joblist"
                pf = open(joblist_file, "w")
                run_info_dict["files_to_run"].append(joblist_file)

                # write out anything at the beginning of the file
                if prefix is not None:
                    pf.write(prefix + "\n")

            # flag for resuming
            resume_str = ""
            if reg_run:
                print(stats_files[i] + " does not exist " +
                      "- adding job as a regular fit job (not resume job)")
            else:
                print(stats_files[i] +
                      " not done - adding to continue fitting list (" +
                      str(len(indxs)) + "/" + str(len(t["Pmax"])) + ")")
                resume_str = "-r"

            # prepend a `nice` value
            nice_str = ""
            if nice is not None:
                nice_str = "nice -n" + str(int(nice)) + " "

            # choose whether to append or overwrite log file
            pipe_str = " > "
            if not overwrite_logfile:
                pipe_str = " >> "

            # set SD+sub option
            sd_str = ""
            if use_sd is True:
                sd_str = ' --choose_sd_sub "{0}" "{1}" '.format(
                    sd_sub_info[i][0], sd_sub_info[i][1])

            # set gridsub option
            gs_str = ""
            if nsubs > 1:
                gs_str = " --choose_subgrid {0} ".format(gridsub_info[i])

            # set 2D PDF option
            if pdf2d_param_list is None:
                pdf2d_str = "None"
            else:
                pdf2d_str = " " + " ".join(pdf2d_param_list) + " "

            job_command = (nice_str +
                           "python -m beast.tools.run.run_fitting " +
                           " {0} ".format(settings.settings_file) +
                           resume_str + sd_str + gs_str + " --nsubs " +
                           str(nsubs) + " --nprocs " + str(nprocs) +
                           " --pdf2d_param_list " + pdf2d_str + pipe_str +
                           log_path + log_files[i])

            pf.write(job_command + "\n")

    if pf_open:
        pf.close()

        # slurm needs the job file to be executable
        os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH)

    # return the info about completed modeling
    return run_info_dict
Exemplo n.º 15
0
def create_physicsmodel(beast_settings_info, nsubs=1, nprocs=1, subset=[None, None]):
    """
    Create the physics model grid.  If nsubs > 1, this will make sub-grids.


    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    nsubs : int (default=1)
        number of subgrids to split the physics model into

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    subset : list of two ints (default=[None,None])
        Only process subgrids in the range [start,stop].
        (only relevant if nsubs > 1)

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # filename for the SED grid
    modelsedgrid_filename = "%s/%s_seds.grid.hd5" % (
        settings.project,
        settings.project,
    )

    # grab the current subgrid slice
    subset_slice = slice(subset[0], subset[1])

    # make sure the project directory exists
    create_project_dir(settings.project)

    # download and load the isochrones
    (iso_fname, oiso) = make_iso_table(
        settings.project,
        oiso=settings.oiso,
        logtmin=settings.logt[0],
        logtmax=settings.logt[1],
        dlogt=settings.logt[2],
        z=settings.z,
    )

    if hasattr(settings, "add_spectral_properties_kwargs"):
        extra_kwargs = settings.add_spectral_properties_kwargs
    else:
        extra_kwargs = None

    if hasattr(settings, "velocity"):
        redshift = (settings.velocity / const.c).decompose().value
    else:
        redshift = 0

    # generate the spectral library (no dust extinction)
    (spec_fname, g_spec) = make_spectral_grid(
        settings.project,
        oiso,
        osl=settings.osl,
        redshift=redshift,
        distance=settings.distances,
        distance_unit=settings.distance_unit,
        extLaw=settings.extLaw,
        add_spectral_properties_kwargs=extra_kwargs,
    )

    # add the stellar priors as weights
    #   also computes the grid weights for the stellar part
    (pspec_fname, g_pspec) = add_stellar_priors(
        settings.project,
        g_spec,
        age_prior_model=settings.age_prior_model,
        mass_prior_model=settings.mass_prior_model,
        met_prior_model=settings.met_prior_model,
        distance_prior_model=settings.distance_prior_model,
    )

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:
        # generate the SED grid by integrating the filter response functions
        #   effect of dust extinction applied before filter integration
        #   also computes the dust priors as weights
        make_extinguished_sed_grid(
            settings.project,
            g_pspec,
            settings.filters,
            extLaw=settings.extLaw,
            av=settings.avs,
            rv=settings.rvs,
            fA=settings.fAs,
            rv_prior_model=settings.rv_prior_model,
            av_prior_model=settings.av_prior_model,
            fA_prior_model=settings.fA_prior_model,
            spec_fname=modelsedgrid_filename,
            add_spectral_properties_kwargs=extra_kwargs,
        )

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:
        # Work with the whole grid up to there (otherwise, priors need a
        # rework - they don't like having only a subset of the parameter
        # space, especially when there's only one age for example)

        # Make subgrids, by splitting the spectral grid into equal sized pieces
        custom_sub_pspec = subgridding_tools.split_grid(pspec_fname, nsubs)

        file_prefix = "{0}/{0}_".format(settings.project)

        # function to process the subgrids individually
        def gen_subgrid(i, sub_name):
            sub_g_pspec = SpectralGrid(sub_name)
            sub_seds_fname = "{}seds.gridsub{}.hd5".format(file_prefix, i)

            # generate the SED grid by integrating the filter response functions
            #   effect of dust extinction applied before filter integration
            #   also computes the dust priors as weights
            (sub_seds_fname, sub_g_seds) = make_extinguished_sed_grid(
                settings.project,
                sub_g_pspec,
                settings.filters,
                extLaw=settings.extLaw,
                av=settings.avs,
                rv=settings.rvs,
                fA=settings.fAs,
                rv_prior_model=settings.rv_prior_model,
                av_prior_model=settings.av_prior_model,
                fA_prior_model=settings.fA_prior_model,
                add_spectral_properties_kwargs=extra_kwargs,
                seds_fname=sub_seds_fname,
            )

            return sub_seds_fname

        # run the above function
        par_tuples = [(i, sub_name) for i, sub_name in enumerate(custom_sub_pspec)][
            subset_slice
        ]

        parallel_wrapper(gen_subgrid, par_tuples, nprocs=nprocs)

        # Save a list of subgrid names that we expect to see
        required_names = [
            "{}seds.gridsub{}.hd5".format(file_prefix, i) for i in range(nsubs)
        ]

        outdir = os.path.join(".", settings.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")

        with open(subgrid_names_file, "w") as fname_file:
            for fname in required_names:
                fname_file.write(fname + "\n")
Exemplo n.º 16
0
    def setUpClass(cls):

        # download the BEAST library files
        get_libfiles.get_libfiles()

        cls.dset = "metal"
        if cls.dset == "metal":
            cls.basesubdir = "metal_small_15Mar22/"
            cls.basename = f"{cls.basesubdir}beast_metal_small"
            cls.obsname = f"{cls.basesubdir}14675_LMC-13361nw-11112.gst_samp.fits"
            cls.astname = f"{cls.basesubdir}14675_LMC-13361nw-11112.gst.fake.fits"

        # download the cached version for use and comparision
        # - photometry and ASTs
        cls.obs_fname_cache = download_rename(cls.obsname)
        cls.asts_fname_cache = download_rename(cls.astname)
        # - isochrones
        cls.iso_fname_cache = download_rename(f"{cls.basename}_iso.csv")
        # - spectra

        # - spectra
        cls.spec_fname_cache = download_rename(f"{cls.basename}_spec_grid.hd5")
        # - spectra with priors
        cls.priors_fname_cache = download_rename(
            f"{cls.basename}_spec_w_priors.grid.hd5")
        cls.priors_sub0_fname_cache = download_rename(
            f"{cls.basename}_subgrids_spec_w_priors.gridsub0.hd5")
        cls.priors_sub1_fname_cache = download_rename(
            f"{cls.basename}_subgrids_spec_w_priors.gridsub1.hd5")
        # - SED grids
        cls.seds_fname_cache = download_rename(f"{cls.basename}_seds.grid.hd5")
        cls.seds_sub0_fname_cache = download_rename(
            f"{cls.basename}_subgrids_seds.gridsub0.hd5")
        cls.seds_sub1_fname_cache = download_rename(
            f"{cls.basename}_subgrids_seds.gridsub1.hd5")
        # - noise model
        cls.noise_fname_cache = download_rename(
            f"{cls.basename}_noisemodel.grid.hd5")
        cls.noise_sub0_fname_cache = download_rename(
            f"{cls.basename}_subgrids_noisemodel.gridsub0.hd5")
        cls.noise_sub1_fname_cache = download_rename(
            f"{cls.basename}_subgrids_noisemodel.gridsub1.hd5")
        # - trimmed files
        cls.noise_trim_fname_cache = download_rename(
            f"{cls.basename}_noisemodel_trim.grid.hd5")
        cls.seds_trim_fname_cache = download_rename(
            f"{cls.basename}_seds_trim.grid.hd5")
        # - output files
        cls.stats_fname_cache = download_rename(f"{cls.basename}_stats.fits")
        cls.lnp_fname_cache = download_rename(f"{cls.basename}_lnp.hd5")
        cls.pdf1d_fname_cache = download_rename(f"{cls.basename}_pdf1d.fits")
        cls.pdf2d_fname_cache = download_rename(f"{cls.basename}_pdf2d.fits")

        # create the beast_settings object
        # (copied over from the metal_small example in beast-examples)
        cls.settings_fname_cache = download_rename(
            f"{cls.basesubdir}beast_settings.txt")
        cls.settings = beast_settings.beast_settings(cls.settings_fname_cache)
        # update names of photometry and AST files
        cls.settings.obsfile = cls.obs_fname_cache
        cls.settings.astfile = cls.asts_fname_cache
        # also make a version with 2 subgrids
        cls.settings_sg = copy.deepcopy(cls.settings)
        cls.settings_sg.n_subgrid = 2
        cls.settings_sg.project = f"{cls.settings.project}_subgrids"
Exemplo n.º 17
0
def make_trim_scripts(
    beast_settings_info,
    num_subtrim=1,
    nice=None,
    prefix=None,
):
    """
    `setup_batch_beast_trim.py` uses file names to create batch trim files.  This
    generates all of the file names for that function.

    NOTE: This assumes you're using source density or background dependent noise
    models.

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    num_subtrim : int (default = 1)
        number of trim batch jobs

    nice : int (default = None)
        set this to an integer (-20 to 20) to prepend a "nice" level
        to the trimming command

    prefix : string (default=None)
        Set this to a string (such as 'source activate astroconda') to prepend
        to each batch file (use '\n's to make multiple lines)

    Returns
    -------
    job_files : list of strings
        Names of the newly created job files
    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # make lists of file names
    file_dict = create_filenames.create_filenames(
        settings,
        use_sd=True,
        nsubs=settings.n_subgrid,
    )
    # extract some useful ones
    photometry_files = file_dict["photometry_files"]
    modelsedgrid_files = file_dict["modelsedgrid_files"]
    noise_files = file_dict["noise_files"]
    modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"]
    noise_trim_files = file_dict["noise_trim_files"]
    # the unique sets of things
    unique_sedgrid = [
        x for i, x in enumerate(modelsedgrid_files)
        if i == modelsedgrid_files.index(x)
    ]

    # save the list of job files
    job_file_list = []

    # iterate through each model grid
    for i in range(settings.n_subgrid):

        # indices for this model grid
        grid_ind = [
            ind for ind, mod in enumerate(modelsedgrid_files)
            if mod == unique_sedgrid[i]
        ]

        # create corresponding files for each of those
        input_noise = [noise_files[ind] for ind in grid_ind]
        input_phot = [photometry_files[ind] for ind in grid_ind]
        # to get the trim prefix, find the common string between trimmed noise
        # files and trimmed SED files
        input_trim_prefix = []
        for ind in grid_ind:
            str1 = modelsedgrid_trim_files[ind]
            str2 = noise_trim_files[ind]
            # find longest match
            match = SequenceMatcher(None, str1, str2).find_longest_match(
                0, len(str1), 0, len(str2))
            # grab that substring (and remove trailing "_")
            input_trim_prefix.append(str1[match.a:match.a + match.size][:-1])

        # check if the trimmed grids exist before moving on
        check_trim = [
            os.path.isfile(noise_trim_files[ind]) for ind in grid_ind
        ]

        # if any aren't trimmed for this model grid, set up trimming
        if np.sum(check_trim) < len(input_noise):

            job_path = "./{0}/trim_batch_jobs/".format(settings.project)
            if settings.n_subgrid > 1:
                file_prefix = "BEAST_gridsub" + str(i)
            if settings.n_subgrid == 1:
                file_prefix = "BEAST"

            # generate trimming at-queue commands
            setup_batch_beast_trim.generic_batch_trim(
                unique_sedgrid[i],
                input_noise,
                input_phot,
                input_trim_prefix,
                settings.obs_colnames,
                job_path=job_path,
                file_prefix=file_prefix,
                num_subtrim=num_subtrim,
                nice=nice,
                prefix=prefix,
            )

            job_file_list.append(job_path + file_prefix +
                                 "_batch_trim.joblist")

    return job_file_list
Exemplo n.º 18
0
    make_spectral_grid,
    add_stellar_priors,
    make_extinguished_sed_grid,
)

from beast.observationmodel.observations import Observations
import beast.observationmodel.noisemodel.generic_noisemodel as noisemodel
from beast.observationmodel.ast import make_ast_input_list, make_ast_xy_list
from beast.fitting import fit, trim_grid
from beast.physicsmodel.grid import FileSEDGrid
from beast.tools import beast_settings, subgridding_tools

import pickle
import os

settings = beast_settings.beast_settings('beast_settings.txt')
outdir = os.path.join(".", settings.project)
subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")

if __name__ == "__main__":
    # commandline parser
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-p",
        "--physicsmodel",
        help="Generate the physics model grid",
        action="store_true",
    )
    parser.add_argument("-a",
                        "--ast",
                        help="Generate an input AST file",
Exemplo n.º 19
0
def beast_ast_inputs(field_name=None, ref_image=None, filter_ids=None, galaxy=None):
    """
    This does all of the steps for generating AST inputs and can be used
    a wrapper to automatically do most steps for multiple fields.
    * make field's beast_settings file
    * make source density map
    * make background density map
    * split catalog by source density
    * make physics model (SED grid)
    * make input list for ASTs
    * prune input ASTs

    ----
    Inputs:

    field_name (str): name of field
    ref_image (str): path to reference image
    filter_ids (list): list of indexes corresponding to the filters in the
                        observation, referenced to the master list below.
    galaxy (str): name of target galaxy (e.g., 'SMC', 'LMC')
    ----

    Places for user to manually do things:
    * editing code before use
        - here: list the catalog filter names with the corresponding BEAST names
        - here: choose settings (pixel size, filter, mag range) for the source density map
        - here: choose settings (pixel size, reference image) for the background map

    """

    # the list of fields
    field_names = [field_name]

    # the path+file for a reference image
    im_path = [ref_image]
    ref_filter = ["F475W"]

    # choose a filter to use for removing artifacts
    # (remove catalog sources with filter_FLAG > 99)
    flag_filter = ["F475W"]

    # number of fields
    n_field = len(field_names)

    # Need to know what the correspondence is between filter names in the
    # catalog and the BEAST filter names.
    #
    # These will be used to automatically determine the filters present in
    # each GST file and fill in the beast settings file.  The order doesn't
    # matter, as long as the order in one list matches the order in the other
    # list.
    #
    gst_filter_names = [
        "F225W",
        "F275W",
        "F336W",
        "F475W",
        "F814W",
        "F110W",
        "F160W",
        "F657N",
    ]
    beast_filter_names = [
        "HST_WFC3_F225W",
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_WFC3_F475W",
        "HST_WFC3_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
        "HST_WFC3_F657N",
    ]

    filter_ids = [int(i) for i in filter_ids]

    gst_filter_names = [gst_filter_names[i] for i in filter_ids]
    beast_filter_names = [beast_filter_names[i] for i in filter_ids]

    for b in range(n_field):

        print("********")
        print("field " + field_names[b])
        print("********")

        # -----------------
        # data file names
        # -----------------

        # paths for the data/AST files
        gst_file = "./data/" + field_names[b] + ".st.fits"
        ast_file = "./data/" + field_names[b] + ".st.fake.fits"
        # path for the reference image (if using for the background map)
        im_file = im_path[b]

        # region file with catalog stars
        # make_region_file(gst_file, ref_filter[b])

        # -----------------
        # 0. make beast settings file
        # -----------------

        print("")
        print("creating beast settings file")
        print("")

        create_beast_settings(
            gst_file,
            ast_file,
            gst_filter_names,
            beast_filter_names,
            galaxy,
            ref_image=im_file,
        )

        # load in beast settings to get number of subgrids
        settings = beast_settings.beast_settings(
            "beast_settings_" + galaxy + "_asts_" + field_names[b] + ".txt"
        )

        # -----------------
        # 1a. make magnitude histograms
        # -----------------

        print("")
        print("making magnitude histograms")
        print("")

        # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'):
        peak_mags = plot_mag_hist.plot_mag_hist(gst_file, stars_per_bin=70, max_bins=75)

        # -----------------
        # 1b. make a source density map
        # -----------------

        print("")
        print("making source density map")
        print("")

        # not currently doing background density bins
        # use_bg_info = True
        use_bg_info = False
        if use_bg_info:
            background_args = types.SimpleNamespace(
                subcommand="background",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                reference=im_file,
                mask_radius=10,
                ann_width=20,
                cat_filter=[ref_filter, "90"],
            )
            create_background_density_map.main_make_map(background_args)

        # but we are doing source density bins!
        if not os.path.isfile(gst_file.replace(".fits", "_source_den_image.fits")):
            # - pixel size of 10 arcsec
            # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5
            sourceden_args = types.SimpleNamespace(
                subcommand="sourceden",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                mag_name=ref_filter[0] + "_VEGA",
                mag_cut=[17, peak_mags[ref_filter[0]] - 0.5],
                flag_name=flag_filter[0] + "_FLAG",
            )
            create_background_density_map.main_make_map(sourceden_args)

        # new file name with the source density column
        gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits")

        # -----------------
        # 2. make physics model
        # -----------------

        print("")
        print("making physics model")
        print("")

        # see which subgrid files already exist
        gs_str = ""
        if settings.n_subgrid > 1:
            gs_str = "sub*"

        sed_files = glob.glob(
            "./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(field_names[b], gs_str)
        )

        # only make the physics model they don't already exist
        if len(sed_files) < settings.n_subgrid:
            # directly create physics model grids
            create_physicsmodel.create_physicsmodel(
                settings, nprocs=1, nsubs=settings.n_subgrid
            )

        # -------------------
        # 3. make AST inputs
        # -------------------

        # only create an AST input list if the ASTs don't already exist
        ast_input_file = "./" + field_names[b] + "/" + field_names[b] + "_inputAST.txt"

        if not os.path.isfile(ast_input_file):
            make_ast_inputs.make_ast_inputs(settings, pick_method="flux_bin_method")

        # list of SED files (physics models)
        model_grid_files = sorted(
            glob.glob(
                "./{0}/{0}_seds.grid*.hd5".format(
                    field_names[b],
                )
            )
        )

        # --------------------
        # 3.1 "prune" AST inputs
        # --------------------

        # prune input AST by flux (empirically determined)
        ast_input_tab = Table.read(ast_input_file, format="ascii")
        ast_input_tab_pruned = ast_input_tab.copy()

        if "F336W" in gst_filter_names:
            prune_spots = (
                (ast_input_tab_pruned["HST_WFC3_F336W"] > 30.5)
                & (ast_input_tab_pruned["HST_WFC3_F475W"] > 32.5)
                & (ast_input_tab_pruned["HST_WFC3_F814W"] > 29.0)
            )
        else:
            prune_spots = (ast_input_tab_pruned["HST_WFC3_F475W"] > 32.5) & (
                ast_input_tab_pruned["HST_WFC3_F814W"] > 29.0
            )

        ast_input_tab_pruned = ast_input_tab_pruned[~prune_spots]

        # write pruned ast input table to a txt file
        ast_input_file_pruned = (
            "./" + field_name + "/" + field_name + "_inputAST_pruned.txt"
        )
        ast_input_tab_pruned.write(
            ast_input_file_pruned, format="ascii", overwrite=True
        )

        # print out number of pruned ASTs per source density bin as a sanity check
        print("pruned input AST statistics per bin")
        input_ast_bin_stats(settings, ast_input_file_pruned, field_names[b])

        # compare magnitude histograms of pruned ASTs with catalog
        plot_ast_histogram.plot_ast_histogram(
            ast_file=ast_input_file_pruned, sed_grid_file=model_grid_files[0]
        )

        print("now go check the diagnostic plots!")