Exemplo n.º 1
0
def merge_files(use_sd=True, nsubs=1):
    """
    Merge all of the results from the assorted fitting sub-files (divided by
    source density, subgrids, or both).


    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    """

    # if there's no SD and no subgridding, running this is unnecessary
    if use_sd and (nsubs == 1):
        print("No merging necessary")
        return

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(use_sd=use_sd, nsubs=nsubs)

    # - input files
    # photometry_files = file_dict['photometry_files']
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    # lnp_files = file_dict['lnp_files']

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']
    # the unique sets of gridsub
    unique_sd_sub = [
        x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
    ]

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        out_filebase = "{0}/{0}".format(datamodel.project)
        reorder_tags = [
            "sd{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
        ]
        merge_beast_stats.merge_stats_files(stats_files,
                                            out_filebase,
                                            reorder_tag_list=reorder_tags)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # runs were split by source density
        if use_sd:

            # lists to save the merged file names
            merged_pdf_files = []
            merged_stats_files = []

            for i, sd_sub in enumerate(unique_sd_sub):

                # indices with the current sd_sub
                ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub]

                # merge the subgrid files for that SD+sub
                out_filebase = "{0}/SD{1}_sub{2}/{0}_SD{1}_sub{2}".format(
                    datamodel.project, sd_sub[0], sd_sub[1])

                merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
                    [pdf_files[j] for j in ind],
                    [stats_files[j] for j in ind],
                    re_run=False,
                    output_fname_base=out_filebase,
                )

                merged_pdf_files.append(merged_pdf1d_fname)
                merged_stats_files.append(merged_stats_fname)

            # merge the merged stats files
            out_filebase = "{0}/{0}".format(datamodel.project)
            reorder_tags = [
                "sd{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
            ]
            merge_beast_stats.merge_stats_files(merged_stats_files,
                                                out_filebase,
                                                reorder_tag_list=reorder_tags)

        # runs weren't split by source density
        else:

            out_filebase = "{0}/{0}".format(datamodel.project)

            subgridding_tools.merge_pdf1d_stats(pdf_files,
                                                stats_files,
                                                output_fname_base=out_filebase)
Exemplo n.º 2
0
def merge_files(beast_settings_info, use_sd=True, nsubs=1, partial=False):
    """
    Merge all of the results from the assorted fitting sub-files (divided by
    source density, subgrids, or both).

    If fitting is in progress but you want to check results of completed stars,
    set partial=True.  This is only relevant when using subgrids.

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    use_sd : boolean (default=True)
        set to True if the fitting used source density bins

    nsubs : int (default=1)
        number of subgrids used for the physics model

    partial : boolean (default=False)
        If True, the output merged files will only have stars that have been
        run across all subgrids.  If stars have only been fit in some subgrids
        and not others, they will be discarded in the "partial" output files.
        Currently only implemented for 1D PDFs and stats (not lnP) files.

    """

    # if there's no SD and no subgridding, running this is unnecessary
    if (not use_sd) and (nsubs == 1):
        print("No merging necessary")
        return

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(settings,
                                                  use_sd=use_sd,
                                                  nsubs=nsubs)

    # - input files
    # photometry_files = file_dict['photometry_files']
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    lnp_files = file_dict["lnp_files"]

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']
    # the unique sets of gridsub
    unique_sd_sub = [
        x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
    ]

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        out_filebase = "{0}/{0}".format(settings.project)
        reorder_tags = [
            "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
        ]
        merge_beast_stats.merge_stats_files(stats_files,
                                            out_filebase,
                                            reorder_tag_list=reorder_tags)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # runs were split by source density
        if use_sd:

            # lists to save the merged file names
            merged_pdf_files = []
            merged_stats_files = []
            merged_lnp_files = []

            for sd_sub in unique_sd_sub:

                # indices with the current sd_sub
                ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub]

                # merge the subgrid files for that SD+sub
                out_filebase = "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}".format(
                    settings.project, sd_sub[0], sd_sub[1])
                if partial:
                    out_filebase += "_partial"

                # - 1D PDFs and stats
                (
                    merged_pdf1d_fname,
                    merged_stats_fname,
                ) = subgridding_tools.merge_pdf1d_stats(
                    [pdf_files[j] for j in ind],
                    [stats_files[j] for j in ind],
                    re_run=False,
                    output_fname_base=out_filebase,
                    partial=partial,
                )

                merged_pdf_files.append(merged_pdf1d_fname)
                merged_stats_files.append(merged_stats_fname)

                # - lnP files
                if not partial:
                    merged_lnp_fname = subgridding_tools.merge_lnp(
                        [lnp_files[j] for j in ind],
                        re_run=False,
                        output_fname_base=out_filebase,
                        threshold=-10,
                    )
                    merged_lnp_files.append(merged_lnp_fname)

            # merge the merged stats files
            out_filebase = "{0}/{0}".format(settings.project)
            reorder_tags = [
                "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
            ]
            merge_beast_stats.merge_stats_files(merged_stats_files,
                                                out_filebase,
                                                reorder_tag_list=reorder_tags)

        # runs weren't split by source density
        else:

            out_filebase = "{0}/{0}".format(settings.project)

            # - 1D PDFs and stats
            subgridding_tools.merge_pdf1d_stats(
                pdf_files,
                stats_files,
                output_fname_base=out_filebase,
                partial=partial,
            )

            # - lnP files
            if not partial:
                subgridding_tools.merge_lnp(
                    lnp_files,
                    re_run=False,
                    output_fname_base=out_filebase,
                    threshold=-10,
                )
Exemplo n.º 3
0
def beast_production_wrapper():
    """
    This does all of the steps for a full production run, and can be used as
    a wrapper to automatically do most steps for multiple fields.
    * make datamodel.py file
    * make source density map
    * make background density map
    * split catalog by source density
    * make physics model (SED grid)
    * make input list for ASTs
    * make noise model
    * generate batch script to trim models
    * generate batch script to fit models
    * merge stats files back together
    * spatially reorder the results

    Places for user to manually do things:
    * editing code before use
        - datamodel_template.py: setting up the file with desired parameters
        - here: list the catalog filter names with the corresponding BEAST names
        - here: choose settings (pixel size, filter, mag range) for the source density map
        - here: choose settings (pixel size, reference image) for the background map
        - here: choose settings (filter, number per file) for dividing catalog by source density
        - here: choose settings (# files, nice level) for the trimming/fitting batch scripts
    * process the ASTs, as described in BEAST documentation
    * run the trimming scripts
    * run the fitting scripts

    BEWARE: When running the trimming/fitting scripts, ensure that the correct
    datamodel.py file is in use.  Since it gets updated every time this code is
    run, you may be unexpectedly be using one from another field.
    """

    # the list of fields
    field_names = ["15275_IC1613"]

    # distance moduli and velocities
    # http://adsabs.harvard.edu/abs/2013AJ....146...86T
    dist_mod = [24.36]
    velocity = [-236]

    # the path+file for a reference image
    im_path = ["../beast_dwarfs/images/15275_IC1613_F555W_drz.fits.gz"]
    ref_filter = ["F555W"]

    # choose a filter to use for removing artifacts
    # (remove catalog sources with filter_FLAG > 99)
    flag_filter = ["F555W"]

    # number of fields
    n_field = len(field_names)

    # Need to know what the correspondence is between filter names in the
    # catalog and the BEAST filter names.
    #
    # These will be used to automatically determine the filters present in
    # each GST file and fill in the datamodel.py file.  The order doesn't
    # matter, as long as the order in one list matches the order in the other
    # list.
    #
    gst_filter_names = [
        "F275W", "F336W", "F390M", "F555W", "F814W", "F110W", "F160W"
    ]
    beast_filter_names = [
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_WFC3_F390M",
        "HST_WFC3_F555W",
        "HST_WFC3_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
    ]

    for b in range(n_field):
        # for b in [0]:

        print("********")
        print("field " + field_names[b])
        print("********")

        # -----------------
        # data file names
        # -----------------

        # paths for the data/AST files
        gst_file = "./data/" + field_names[b] + ".gst.fits"
        ast_file = "./data/" + field_names[b] + ".gst.fake.fits"
        # path for the reference image (if using for the background map)
        im_file = im_path[b]

        # region file with catalog stars
        # make_region_file(gst_file, ref_filter[b])

        # -----------------
        # 0. make datamodel file
        # -----------------

        # need to do this first, because otherwise any old version that exists
        # will be imported, and changes made here won't get imported again

        print("")
        print("creating datamodel file")
        print("")

        create_datamodel(
            gst_file,
            ast_file,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
        )

        # load in datamodel to get number of subgrids
        import datamodel

        importlib.reload(datamodel)

        # -----------------
        # 1a. make magnitude histograms
        # -----------------

        print("")
        print("making magnitude histograms")
        print("")

        # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'):
        peak_mags = plot_mag_hist.plot_mag_hist(gst_file,
                                                stars_per_bin=70,
                                                max_bins=75)
        # test = plot_mag_hist.plot_mag_hist(ast_file, stars_per_bin=200, max_bins=30)

        # -----------------
        # 1b. make a source density map
        # -----------------

        print("")
        print("making source density map")
        print("")

        # not currently doing background density bins
        # use_bg_info = True
        use_bg_info = False
        if use_bg_info:
            background_args = types.SimpleNamespace(
                subcommand="background",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                reference=im_file,
                mask_radius=10,
                ann_width=20,
                cat_filter=[ref_filter, "90"],
            )
            create_background_density_map.main_make_map(background_args)

        # but we are doing source density bins!
        if not os.path.isfile(
                gst_file.replace(".fits", "_source_den_image.fits")):
            # - pixel size of 10 arcsec
            # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5
            sourceden_args = types.SimpleNamespace(
                subcommand="sourceden",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                mag_name=ref_filter + "_VEGA",
                mag_cut=[15, peak_mags[ref_filter - 0.5]],
                flag_name=flag_filter[b] + '_FLAG',
            )
            create_background_density_map.main_make_map(sourceden_args)

        # new file name with the source density column
        gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits")

        # -----------------
        # 2. make physics model
        # -----------------

        print("")
        print("making physics model")
        print("")

        # see which subgrid files already exist
        gs_str = ""
        if datamodel.n_subgrid > 1:
            gs_str = "sub*"

        sed_files = glob.glob("./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(
            field_names[b], gs_str))

        # only make the physics model they don't already exist
        if len(sed_files) < datamodel.n_subgrid:
            # directly create physics model grids
            #create_physicsmodel.create_physicsmodel(nprocs=1, nsubs=datamodel.n_subgrid)
            # create grids with script
            create_physicsmodel.split_create_physicsmodel(
                nprocs=1, nsubs=datamodel.n_subgrid)
            print('\n**** go run physics model code for ' + field_names[b] +
                  '! ****')
            continue

        # list of SED files
        model_grid_files = sorted(
            glob.glob("./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(
                field_names[b], gs_str)))

        # -----------------
        # 3. make ASTs
        # -----------------

        # only create an AST input list if the ASTs don't already exist
        ast_input_file = ("./" + field_names[b] + "_beast/" + field_names[b] +
                          "_beast_inputAST.txt")

        if not os.path.isfile(ast_file):
            if not os.path.isfile(ast_input_file):
                print("")
                print("creating artificial stars")
                print("")
                make_ast_inputs.make_ast_inputs(flux_bin_method=True)

            split_ast_input_file.split_asts(field_names[b] + "_beast",
                                            ast_input_file, 2000)

            print("\n**** go run ASTs for " + field_names[b] + "! ****\n")
            continue

        # -----------------
        # 4/5. edit photometry/AST catalogs
        # -----------------

        # remove sources that are
        # - in regions without full imaging coverage,
        # - flagged in flag_filter

        print("")
        print("editing photometry/AST catalogs")
        print("")

        # - photometry
        gst_file_cut = gst_file.replace(".fits", "_with_sourceden_cut.fits")
        cut_catalogs.cut_catalogs(
            gst_file_sd,
            gst_file_cut,
            partial_overlap=True,
            flagged=True,
            flag_filter=flag_filter[b],
            region_file=True,
        )

        # - ASTs
        ast_file_cut = ast_file.replace(".fits", "_cut.fits")
        cut_catalogs.cut_catalogs(
            ast_file,
            ast_file_cut,
            partial_overlap=True,
            flagged=True,
            flag_filter=flag_filter[b],
            region_file=True,
        )
        # test = plot_mag_hist.plot_mag_hist(ast_file_cut, stars_per_bin=200, max_bins=30)

        # edit the datamodel.py file to have the correct photometry file name
        # (AST file name is already automatically the cut version)
        create_datamodel(
            gst_file_cut,
            ast_file_cut,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
        )

        # -----------------
        # 6. split observations by source density
        # -----------------

        print("")
        print("splitting observations by source density")
        print("")

        # - photometry

        if len(glob.glob(gst_file_cut.replace('.fits', '*sub*fits'))) == 0:

            # a smaller value for n_per_file will mean more individual files/runs,
            # but each run will take a shorter amount of time

            split_catalog_using_map.split_main(
                gst_file_cut,
                ast_file_cut,
                gst_file.replace('.fits', '_sourceden_map.hd5'),
                bin_width=1,
                n_per_file=6250,
            )

        # -- at this point, we can run the code to create lists of filenames
        file_dict = create_filenames.create_filenames(
            use_sd=True, nsubs=datamodel.n_subgrid)

        # figure out how many files there are
        sd_sub_info = file_dict["sd_sub_info"]
        # - number of SD bins
        temp = set([i[0] for i in sd_sub_info])
        print("** total SD bins: " + str(len(temp)))
        # - the unique sets of SD+sub
        unique_sd_sub = [
            x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
        ]
        print("** total SD subfiles: " + str(len(unique_sd_sub)))

        # -----------------
        # 7. make noise models
        # -----------------

        print("")
        print("making noise models")
        print("")

        # create the noise model (this code will check if it exists)
        create_obsmodel.create_obsmodel(use_sd=True,
                                        nsubs=datamodel.n_subgrid,
                                        nprocs=1)

        # -----------------
        # 8. make script to trim models
        # -----------------

        print("")
        print("setting up script to trim models")
        print("")

        # save any at-queue commands
        at_list = []

        # iterate through each model grid
        for i in range(datamodel.n_subgrid):

            # gst list
            temp = file_dict["photometry_files"]
            gst_input_list = [
                x for i, x in enumerate(temp) if i == temp.index(x)
            ]

            # create corresponding files for each of those
            ast_input_list = []
            noise_files = []
            trim_prefix = []

            for j in range(len(gst_input_list)):
                # get the sd/sub number
                curr_sd = unique_sd_sub[j][0]
                curr_sub = unique_sd_sub[j][1]
                subfolder = "bin{0}_sub{1}".format(curr_sd, curr_sub)

                # create file names
                ast_input_list.append(
                    ast_file_cut.replace(".fits", "_bin" + curr_sd + ".fits"))
                if datamodel.n_subgrid > 1:
                    noise_files.append(
                        "./{0}_beast/{0}_beast_noisemodel_bin{1}.gridsub{2}.hd5"
                        .format(field_names[b], curr_sd, i))

                    trim_prefix.append(
                        "./{0}_beast/{1}/{0}_beast_{1}_gridsub{2}".format(
                            field_names[b], subfolder, i))

                if datamodel.n_subgrid == 1:
                    noise_files.append(file_dict['noise_files'][j])
                    trim_prefix.append("./{0}_beast/{0}_beast_{1}".format(
                        field_names[b], subfolder))

            # check if the trimmed grids exist before moving on
            if datamodel.n_subgrid > 1:
                trim_files = sorted(
                    glob.glob(
                        "./{0}_beast/bin*_sub*/{0}_beast_*_gridsub{1}_sed_trim.grid.hd5"
                        .format(field_names[b], i)))
            if datamodel.n_subgrid == 1:
                trim_files = sorted(
                    glob.glob("./{0}_beast/{0}_beast_*_sub*_sed_trim.grid.hd5".
                              format(field_names[b])))

            if len(trim_files) < len(gst_input_list):

                job_path = "./" + field_names[b] + "_beast/trim_batch_jobs/"
                if datamodel.n_subgrid > 1:
                    file_prefix = "BEAST_gridsub" + str(i)
                if datamodel.n_subgrid == 1:
                    file_prefix = "BEAST"

                # generate trimming at-queue commands
                setup_batch_beast_trim.generic_batch_trim(
                    model_grid_files[i],
                    noise_files,
                    gst_input_list,
                    ast_input_list,
                    trim_prefix,
                    job_path=job_path,
                    file_prefix=file_prefix,
                    num_subtrim=1,
                    nice=19,
                    prefix="source activate b13",
                )

                at_list.append("at -f " + job_path + file_prefix +
                               "_batch_trim.joblist now")

        if len(at_list) > 0:
            print("\n**** go run trimming code for " + field_names[b] +
                  "! ****")
            print("Here are the command(s) to run:")
            for cmd in at_list:
                print(cmd)
            return
        else:
            print("all files are trimmed for " + field_names[b])

        # -----------------
        # 9. make script to fit models
        # -----------------

        print("")
        print("setting up script to fit models")
        print("")

        fit_run_info = setup_batch_beast_fit.setup_batch_beast_fit(
            num_percore=1,
            nice=19,
            overwrite_logfile=False,
            prefix="source activate b13",
            use_sd=True,
            nsubs=datamodel.n_subgrid,
            nprocs=1,
        )

        # check if the fits exist before moving on
        tot_remaining = len(fit_run_info["done"]) - np.sum(
            fit_run_info["done"])
        if tot_remaining > 0:
            print("\n**** go run fitting code for " + field_names[b] +
                  "! ****")
            print("Here are the " + str(len(fit_run_info["files_to_run"])) +
                  " commands to run:")
            for job_file in fit_run_info["files_to_run"]:
                print("at -f ./" + job_file + " now")
            continue
        else:
            print("all fits are complete for " + field_names[b])

        # -----------------
        # 10. merge stats files from each fit
        # -----------------

        print("")
        print("merging stats files")
        print("")

        merge_files.merge_files(use_sd=True, nsubs=datamodel.n_subgrid)
Exemplo n.º 4
0
def setup_batch_beast_fit(
    beast_settings_info,
    num_percore=5,
    nice=None,
    overwrite_logfile=True,
    prefix=None,
    use_sd=True,
    pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'],
    nsubs=1,
    nprocs=1,
):
    """
    Sets up batch files for submission to the 'at' queue on
    linux (or similar) systems

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    num_percore : int (default = 5)
        number of fitting runs per core

    nice : int (default = None)
        set this to an integer (-20 to 20) to prepend a "nice" level
        to the fitting command

    overwrite_logfile : boolean (default = True)
        if True, will overwrite the log file; if False, will append to
        existing log file

    prefix : string (default=None)
        Set this to a string (such as 'source activate astroconda') to prepend
        to each batch file (use '\n's to make multiple lines)

    use_sd : boolean (default=True)
        If True, split runs based on source density (determined by finding
        matches to settings.astfile with SD info)

    pdf2d_param_list : list of strings or None
        If set, do 2D PDFs of these parameters.  If None, don't make 2D PDFs.

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use when doing the fitting
        (currently only implemented for subgrids)


    Returns
    -------
    run_info_dict : dict
        Dictionary indicating which catalog files have complete modeling, and
        which job files need to be run

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # setup the subdirectory for the batch and log files
    job_path = settings.project + "/fit_batch_jobs/"
    if not os.path.isdir(job_path):
        os.mkdir(job_path)

    log_path = job_path + "logs/"
    if not os.path.isdir(log_path):
        os.mkdir(log_path)

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(settings,
                                                  use_sd=use_sd,
                                                  nsubs=nsubs)

    # - input files
    photometry_files = file_dict["photometry_files"]
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    lnp_files = file_dict["lnp_files"]

    # - total number of files
    n_files = len(photometry_files)

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    gridsub_info = file_dict["gridsub_info"]

    # names of output log files
    log_files = []

    # initialize a variable name (otherwise it got auto-added in the wrong
    # place and broke the code)
    pf = None

    for i in range(n_files):

        sd_piece = ""
        if use_sd is True:
            sd_piece = "_bin" + sd_sub_info[i][0] + "_sub" + sd_sub_info[i][1]

        gridsub_piece = ""
        if nsubs > 1:
            gridsub_piece = "_gridsub" + str(gridsub_info[i])

        log_files.append("beast_fit" + sd_piece + gridsub_piece + ".log")

    # start making the job files!

    pf_open = False
    cur_f = 0
    cur_total_size = 0.0
    j = -1

    # keep track of which files are done running
    run_info_dict = {
        "phot_file": photometry_files,
        "done": np.full(n_files, False),
        "files_to_run": [],
    }

    for i, phot_file in enumerate(photometry_files):

        print("")

        # check if this is a full run
        reg_run = False
        run_done = False
        if not os.path.isfile(stats_files[i]):
            reg_run = True
            print("no stats file")
        if not os.path.isfile(pdf_files[i]):
            reg_run = True
            print("no pdf1d file")
        if not os.path.isfile(lnp_files[i]):
            reg_run = True
            print("no lnp file")

        # first check if the pdf1d mass spacing is correct
        if not reg_run:
            hdulist = fits.open(pdf_files[i])
            delta1 = hdulist["M_ini"].data[-1, 1] - hdulist["M_ini"].data[-1,
                                                                          0]
            if delta1 > 1.0:  # old linear spacing
                print("pdf1d lin mass spacing - full refitting needed")
                old_mass_spacing = True
            else:
                old_mass_spacing = False
                print("pdf1d log mass spacing - ok")

            if old_mass_spacing:
                run_done = False
                reg_run = True

        # now check if the number of results is the same as
        #    the number of observations
        if not reg_run:
            # get the observed catalog
            obs = Table.read(phot_file)

            # get the fit results catalog
            t = Table.read(stats_files[i], hdu=1)
            # get the number of stars that have been fit
            (indxs, ) = np.where(t["Pmax"] != 0.0)

            # get the number of entries in the lnp file
            f = tables.open_file(lnp_files[i], "r")
            nlnp = f.root._v_nchildren - 2
            f.close()

            print("# obs, stats, lnp = ", len(obs), len(indxs), nlnp)
            if (len(indxs) == len(obs)) & (nlnp == len(obs)):

                # final check, is the pdf1d file correctly populated
                tot_prob = np.sum(hdulist["M_ini"].data, axis=1)
                (tindxs, ) = np.where(tot_prob > 0.0)
                print("# good pdf1d = ", len(tindxs) - 1)
                if len(tindxs) == (len(obs) + 1):
                    run_done = True

        if run_done:
            print(stats_files[i] + " done")
            run_info_dict["done"][i] = True
        else:
            j += 1
            if j % num_percore == 0:
                cur_f += 1

                # close previous files
                if j != 0:
                    pf.close()
                    # slurm needs the job file to be executable
                    #   flake8/codestyle error ignored as this if statement only executed
                    #   for j > 0 and appropriate joblist_file defined in j - 1
                    os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP
                             | stat.S_IROTH)  # noqa: F821

                    print(
                        "total sed_trim size [Gb] = ",
                        cur_total_size / (1024.0 * 1024.0 * 1024.0),
                    )
                    cur_total_size = 0.0

                # open the slurm and param files
                pf_open = True
                joblist_file = job_path + "beast_batch_fit_" + str(
                    cur_f) + ".joblist"
                pf = open(joblist_file, "w")
                run_info_dict["files_to_run"].append(joblist_file)

                # write out anything at the beginning of the file
                if prefix is not None:
                    pf.write(prefix + "\n")

            # flag for resuming
            resume_str = ""
            if reg_run:
                print(stats_files[i] + " does not exist " +
                      "- adding job as a regular fit job (not resume job)")
            else:
                print(stats_files[i] +
                      " not done - adding to continue fitting list (" +
                      str(len(indxs)) + "/" + str(len(t["Pmax"])) + ")")
                resume_str = "-r"

            # prepend a `nice` value
            nice_str = ""
            if nice is not None:
                nice_str = "nice -n" + str(int(nice)) + " "

            # choose whether to append or overwrite log file
            pipe_str = " > "
            if not overwrite_logfile:
                pipe_str = " >> "

            # set SD+sub option
            sd_str = ""
            if use_sd is True:
                sd_str = ' --choose_sd_sub "{0}" "{1}" '.format(
                    sd_sub_info[i][0], sd_sub_info[i][1])

            # set gridsub option
            gs_str = ""
            if nsubs > 1:
                gs_str = " --choose_subgrid {0} ".format(gridsub_info[i])

            # set 2D PDF option
            if pdf2d_param_list is None:
                pdf2d_str = "None"
            else:
                pdf2d_str = " " + " ".join(pdf2d_param_list) + " "

            job_command = (nice_str +
                           "python -m beast.tools.run.run_fitting " +
                           " {0} ".format(settings.settings_file) +
                           resume_str + sd_str + gs_str + " --nsubs " +
                           str(nsubs) + " --nprocs " + str(nprocs) +
                           " --pdf2d_param_list " + pdf2d_str + pipe_str +
                           log_path + log_files[i])

            pf.write(job_command + "\n")

    if pf_open:
        pf.close()

        # slurm needs the job file to be executable
        os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH)

    # return the info about completed modeling
    return run_info_dict
Exemplo n.º 5
0
def beast_verification_wrapper():
    """
    This wrapper does the processing for BEAST verification

    Parameter recovery
    * create simulated data for a given model grid + noise model
    * generate batch script to trim models
    * generate batch script to fit models
    * merge stats files back together

    Places for user to manually do things:
    * editing code before use
        - beast_settings_template.py: setting up the file with desired parameters
        - here: list the catalog filter names with the corresponding BEAST names
        - here: number of simulated stars to generate
        - here: choose settings (# files, nice level) for the trimming/fitting batch scripts
    * process the ASTs, as described in BEAST documentation
    * run the trimming scripts
    * run the fitting scripts

    """

    # the list of fields
    field_names = ["15275_IC1613"]

    # distance moduli and velocities
    # http://adsabs.harvard.edu/abs/2013AJ....146...86T
    dist_mod = [24.36]
    velocity = [-236]

    # the path+file for a reference image
    im_path = ["../beast_dwarfs/images/15275_IC1613_F555W_drz.fits.gz"]
    ref_filter = ["F555W"]

    # number of fields
    n_field = len(field_names)

    # Need to know what the correspondence is between filter names in the
    # catalog and the BEAST filter names.
    #
    # These will be used to automatically determine the filters present in
    # each GST file and fill in the beast settings file.  The order doesn't
    # matter, as long as the order in one list matches the order in the other
    # list.
    #
    gst_filter_names = [
        "F275W", "F336W", "F390M", "F555W", "F814W", "F110W", "F160W"
    ]
    beast_filter_names = [
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_WFC3_F390M",
        "HST_WFC3_F555W",
        "HST_WFC3_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
    ]

    for b in range(n_field):
        # for b in [0]:

        print("********")
        print("field " + field_names[b])
        print("********")

        # -----------------
        # 0. get original file names
        # -----------------

        print('')
        print('retrieving original file names')
        print('')

        # paths for the data/AST files
        gst_file_orig = './data/' + field_names[
            b] + '.gst_with_sourceden_cut.fits'
        ast_file_orig = './data/' + field_names[b] + '.gst.fake_cut.fits'
        # path for the reference image (if using for the background map)
        im_file = im_path[b]

        create_beast_settings(
            gst_file_orig,
            ast_file_orig,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
            proj_type='beast',
        )

        # load in beast settings to get number of subgrids
        settings = beast_settings.beast_settings("beast_settings_" +
                                                 field_names[i] + "_beast.txt")

        # grab relevant file names
        file_dict = create_filenames.create_filenames(
            settings,
            use_sd=True,
            nsubs=settings.n_subgrid,
        )
        modelsedgrid_files = file_dict['modelsedgrid_files']
        noise_files = file_dict['noise_files']
        sd_sub_info = file_dict["sd_sub_info"]
        tot_files = len(sd_sub_info)

        # -----------------
        # 1. create simulated data
        # -----------------

        print('')
        print('simulating data')
        print('')

        gst_file = gst_file_orig.replace('.gst', '.sim.gst')
        gst_subfile_form = gst_file.replace('.fits', '_bin*_sub0.fits')

        # loop through all files
        # only grab files and simulate data when we're at sd_sub = [N,0]
        for i in range(tot_files):

            # find matches to sd_sub = [i,0]
            inds_to_use = [
                ind for ind in range(tot_files)
                if sd_sub_info[ind] == [str(i), '0']
            ]

            # if there are matches, use those corresponding files
            if len(inds_to_use) > 0:

                output_catalog = gst_subfile_form.replace(
                    '_bin*', '_bin' + str(i))

                if not os.path.isfile(output_catalog):

                    print('generating simulated observations for bin=' +
                          str(i))

                    grid_sublist = [modelsedgrid_files[x] for x in inds_to_use]
                    noise_sublist = [noise_files[x] for x in inds_to_use]

                    simulate_obs.simulate_obs(
                        grid_sublist,
                        noise_sublist,
                        output_catalog,
                        nsim=5000,
                        compl_filter=ref_filter[b],
                    )

                else:
                    print('simulated observations already exist for bin=' +
                          str(i))

        # combine them all into one catalog
        if not os.path.isfile(gst_file):
            table_list = []
            for cat_file in glob.glob(gst_subfile_form):
                table_list.append(Table.read(cat_file))
            vstack(table_list).write(gst_file, overwrite=True)

        # -----------------
        # 2. make new settings file
        # -----------------

        print('')
        print('creating beast settings file')
        print('')

        create_beast_settings(
            gst_file,
            ast_file_orig,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
            proj_type='sim',
        )

        # load in beast settings again
        settings = beast_settings.beast_settings("beast_settings_" +
                                                 field_names[i] + "_sim.txt")

        # -----------------
        # 3. make symbolic links to model grids and noise models
        # -----------------

        # make new directory
        if not os.path.isdir('./' + settings.project):
            os.mkdir('./' + settings.project)

        # symlink the physics/noise models
        orig_phys = list(set(modelsedgrid_files))
        for grid in orig_phys:
            source = os.path.abspath(grid)
            dest = os.path.abspath(grid.replace('_beast', '_sim'))
            if not os.path.islink(dest):
                os.symlink(source, dest)
        orig_noise = list(set(noise_files))
        for grid in orig_noise:
            source = os.path.abspath(grid)
            dest = os.path.abspath(grid.replace('_beast', '_sim'))
            if not os.path.islink(dest):
                os.symlink(source, dest)

        # -----------------
        # 4. make script to trim models
        # -----------------

        print("")
        print("setting up script to trim models")
        print("")

        job_file_list = make_trim_scripts.make_trim_scripts(
            settings, num_subtrim=1, prefix='source activate b13')

        if len(job_file_list) > 0:
            print('\n**** go run trimming code for ' + field_names[b] +
                  '! ****')
            print('Here are the command(s) to run:')
            for job in job_file_list:
                print('at -f ' + job + ' now')
            return
        else:
            print('all files are trimmed for ' + field_names[b])

        # -----------------
        # 5. make script to fit models
        # -----------------

        print("")
        print("setting up script to fit models")
        print("")

        fit_run_info = setup_batch_beast_fit.setup_batch_beast_fit(
            settings,
            num_percore=1,
            nice=19,
            overwrite_logfile=False,
            prefix="source activate b13",
            use_sd=True,
            nsubs=settings.n_subgrid,
            nprocs=1,
        )

        # check if the fits exist before moving on
        tot_remaining = len(fit_run_info["done"]) - np.sum(
            fit_run_info["done"])
        if tot_remaining > 0:
            print("\n**** go run fitting code for " + field_names[b] +
                  "! ****")
            print("Here are the " + str(len(fit_run_info["files_to_run"])) +
                  " commands to run:")
            for job_file in fit_run_info["files_to_run"]:
                print("at -f ./" + job_file + " now")
            continue
        else:
            print("all fits are complete for " + field_names[b])

        # -----------------
        # 6. plots
        # -----------------

        print('')
        print('making plots')
        print('')

        # grab relevant file names
        file_dict = create_filenames.create_filenames(
            use_sd=True,
            nsubs=settings.n_subgrid,
        )

        plot_param_recovery.plot_param_recovery(
            file_dict['photometry_files'],
            file_dict['stats_files'],
            field_names[b] + '_param_recovery.pdf',
            max_nbins=20,
        )

        for stats_file in file_dict['stats_files']:
            plot_param_err.plot(stats_file, n_bins=10)
            plot_triangle.plot_triangle(stats_file)
Exemplo n.º 6
0
def run_fitting(
    use_sd=True,
    nsubs=1,
    nprocs=1,
    choose_sd_sub=None,
    choose_subgrid=None,
    pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'],
    resume=False,
):
    """
    Run the fitting.  If nsubs > 1, this will find existing subgrids.
    If use_sd is True, will also incorporate source density info.

    The additional choose_* options are to make queue scripts usable,
    by specifying a given SD+sub and/or subgrid for the fitting run.


    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    choose_sd_sub : list of two strings (default=None)
        If this is set, the fitting will just be for this combo of SD+sub,
        rather than all of them.  Overrides use_sd.
        format of the list: ['#','#']

    choose_subgrid : int (default=None)
        If this is set, the fitting with just be for this subgrid index.
        If nsubs=1, this is ignored.

    pdf2d_param_list : list of strings or None
        If set, do 2D PDFs of these parameters.  If None, don't make 2D PDFs.

    resume : boolean (default=False)
        choose whether to resume existing run or start over

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # keep track of time
    start_time = time.clock()

    # --------------------
    # make lists of file names
    # --------------------

    file_dict = create_filenames.create_filenames(
        use_sd=use_sd,
        nsubs=nsubs,
        choose_sd_sub=choose_sd_sub,
        choose_subgrid=choose_subgrid,
    )

    # input files
    photometry_files = file_dict["photometry_files"]
    # modelsedgrid_files = file_dict["modelsedgrid_files"]
    modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"]
    # noise_files = file_dict["noise_files"]
    noise_trim_files = file_dict["noise_trim_files"]

    # output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    pdf2d_files = file_dict["pdf2d_files"]
    if pdf2d_param_list is None:
        pdf2d_files = [None for i in range(len(pdf2d_files))]
    lnp_files = file_dict["lnp_files"]

    # total number of files
    n_files = len(photometry_files)

    # other potentially useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']

    # if using subgrids, make the grid dictionary file:
    # File where the ranges and number of unique values for the grid
    # will be stored (this can take a while to calculate)

    if nsubs > 1:

        gridpickle_files = file_dict["gridpickle_files"]

        for i in range(len(gridpickle_files)):
            if not os.path.isfile(gridpickle_files[i]):

                # list of corresponding SED grids and noise models

                # - with SD+sub: get file list for ALL subgrids at current SD+sub
                if use_sd or (choose_sd_sub is not None):
                    temp = create_filenames.create_filenames(
                        nsubs=nsubs, choose_sd_sub=sd_sub_info[i], choose_subgrid=None
                    )
                    modelsedgrid_trim_list = temp["modelsedgrid_trim_files"]
                    noise_trim_list = temp["noise_trim_files"]

                # - no SD info: get file list for ALL subgrids
                else:
                    temp = create_filenames.create_filenames(
                        use_sd=False, nsubs=nsubs, choose_subgrid=None
                    )
                    modelsedgrid_trim_list = temp["modelsedgrid_trim_files"]
                    noise_trim_list = temp["noise_trim_files"]

                # create the grid info dictionary
                print("creating grid_info_dict for " + gridpickle_files[i])
                grid_info_dict = subgridding_tools.reduce_grid_info(
                    modelsedgrid_trim_list, noise_trim_list, nprocs=nprocs
                )
                # save it
                with open(gridpickle_files[i], "wb") as p:
                    pickle.dump(grid_info_dict, p)
                print("wrote grid_info_dict to " + gridpickle_files[i])

    # --------------------
    # do the fitting!
    # --------------------

    # set up function inputs

    if nsubs == 1:

        input_list = [
            (
                photometry_files[i],
                modelsedgrid_trim_files[i],
                noise_trim_files[i],
                stats_files[i],
                pdf_files[i],
                pdf2d_files[i],
                pdf2d_param_list,
                lnp_files[i],
                None,
                resume,
            )
            for i in range(n_files)
        ]

    if nsubs > 1:

        input_list = [
            (
                photometry_files[i],
                modelsedgrid_trim_files[i],
                noise_trim_files[i],
                stats_files[i],
                pdf_files[i],
                pdf2d_files[i],
                pdf2d_param_list,
                lnp_files[i],
                gridpickle_files[i],
                resume,
            )
            for i in range(n_files)
        ]

    # run the fitting (via parallel wrapper)

    parallel_wrapper(fit_submodel, input_list, nprocs=nprocs)

    # see how long it took!
    new_time = time.clock()
    print("time to fit: ", (new_time - start_time) / 60.0, " min")
Exemplo n.º 7
0
def make_trim_scripts(
    beast_settings_info,
    num_subtrim=1,
    nice=None,
    prefix=None,
):
    """
    `setup_batch_beast_trim.py` uses file names to create batch trim files.  This
    generates all of the file names for that function.

    NOTE: This assumes you're using source density or background dependent noise
    models.

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    num_subtrim : int (default = 1)
        number of trim batch jobs

    nice : int (default = None)
        set this to an integer (-20 to 20) to prepend a "nice" level
        to the trimming command

    prefix : string (default=None)
        Set this to a string (such as 'source activate astroconda') to prepend
        to each batch file (use '\n's to make multiple lines)

    Returns
    -------
    job_files : list of strings
        Names of the newly created job files
    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # make lists of file names
    file_dict = create_filenames.create_filenames(
        settings,
        use_sd=True,
        nsubs=settings.n_subgrid,
    )
    # extract some useful ones
    photometry_files = file_dict["photometry_files"]
    modelsedgrid_files = file_dict["modelsedgrid_files"]
    noise_files = file_dict["noise_files"]
    modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"]
    noise_trim_files = file_dict["noise_trim_files"]
    # the unique sets of things
    unique_sedgrid = [
        x for i, x in enumerate(modelsedgrid_files)
        if i == modelsedgrid_files.index(x)
    ]

    # save the list of job files
    job_file_list = []

    # iterate through each model grid
    for i in range(settings.n_subgrid):

        # indices for this model grid
        grid_ind = [
            ind for ind, mod in enumerate(modelsedgrid_files)
            if mod == unique_sedgrid[i]
        ]

        # create corresponding files for each of those
        input_noise = [noise_files[ind] for ind in grid_ind]
        input_phot = [photometry_files[ind] for ind in grid_ind]
        # to get the trim prefix, find the common string between trimmed noise
        # files and trimmed SED files
        input_trim_prefix = []
        for ind in grid_ind:
            str1 = modelsedgrid_trim_files[ind]
            str2 = noise_trim_files[ind]
            # find longest match
            match = SequenceMatcher(None, str1, str2).find_longest_match(
                0, len(str1), 0, len(str2))
            # grab that substring (and remove trailing "_")
            input_trim_prefix.append(str1[match.a:match.a + match.size][:-1])

        # check if the trimmed grids exist before moving on
        check_trim = [
            os.path.isfile(noise_trim_files[ind]) for ind in grid_ind
        ]

        # if any aren't trimmed for this model grid, set up trimming
        if np.sum(check_trim) < len(input_noise):

            job_path = "./{0}/trim_batch_jobs/".format(settings.project)
            if settings.n_subgrid > 1:
                file_prefix = "BEAST_gridsub" + str(i)
            if settings.n_subgrid == 1:
                file_prefix = "BEAST"

            # generate trimming at-queue commands
            setup_batch_beast_trim.generic_batch_trim(
                unique_sedgrid[i],
                input_noise,
                input_phot,
                input_trim_prefix,
                settings.obs_colnames,
                job_path=job_path,
                file_prefix=file_prefix,
                num_subtrim=num_subtrim,
                nice=nice,
                prefix=prefix,
            )

            job_file_list.append(job_path + file_prefix +
                                 "_batch_trim.joblist")

    return job_file_list
def generate_files_for_tests(run_beast=True, run_tools=True):
    """
    Use the metal_small example to generate a full set of files for the BEAST
    regression tests.

    Parameters
    ----------
    run_beast : boolean (default=True)
        if True, run the BEAST

    run_tools : boolean (default=True)
        if True, run the code to generate things for tools
    """

    # read in BEAST settings
    settings_orig = beast_settings.beast_settings("beast_settings.txt")
    # also make a version with subgrids
    settings_subgrids = copy.deepcopy(settings_orig)
    settings_subgrids.n_subgrid = 2
    settings_subgrids.project = f"{settings_orig.project}_subgrids"

    # ==========================================
    # run the beast for each set of settings
    # ==========================================

    if run_beast:

        for settings in [settings_orig, settings_subgrids]:

            # -----------------
            # physics model
            # -----------------
            create_physicsmodel.create_physicsmodel(
                settings,
                nsubs=settings.n_subgrid,
                nprocs=1,
            )

            # -----------------
            # ASTs
            # -----------------

            # currently only works for no subgrids
            if settings.n_subgrid == 1:
                make_ast_inputs.make_ast_inputs(settings,
                                                pick_method="flux_bin_method")

            # -----------------
            # obs model
            # -----------------
            create_obsmodel.create_obsmodel(
                settings,
                use_sd=False,
                nsubs=settings.n_subgrid,
                nprocs=1,
                use_rate=True,
            )

            # -----------------
            # trimming
            # -----------------

            # make file names
            file_dict = create_filenames.create_filenames(
                settings, use_sd=False, nsubs=settings.n_subgrid)

            # read in the observed data
            obsdata = Observations(settings.obsfile, settings.filters,
                                   settings.obs_colnames)

            for i in range(settings.n_subgrid):

                # get the modesedgrid on which to generate the noisemodel
                modelsedgridfile = file_dict["modelsedgrid_files"][i]
                modelsedgrid = SEDGrid(modelsedgridfile)

                # read in the noise model just created
                noisemodel_vals = noisemodel.get_noisemodelcat(
                    file_dict["noise_files"][i])

                # trim the model sedgrid
                sed_trimname = file_dict["modelsedgrid_trim_files"][i]
                noisemodel_trimname = file_dict["noise_trim_files"][i]

                trim_grid.trim_models(
                    modelsedgrid,
                    noisemodel_vals,
                    obsdata,
                    sed_trimname,
                    noisemodel_trimname,
                    sigma_fac=3.0,
                )

            # -----------------
            # fitting
            # -----------------

            run_fitting.run_fitting(
                settings,
                use_sd=False,
                nsubs=settings.n_subgrid,
                nprocs=1,
                pdf2d_param_list=["Av", "M_ini", "logT"],
                pdf_max_nbins=200,
            )

            # -----------------
            # merging
            # -----------------

            # it'll automatically skip for no subgrids
            merge_files.merge_files(settings,
                                    use_sd=False,
                                    nsubs=settings.n_subgrid)

            print("\n\n")

    # ==========================================
    # reference files for assorted tools
    # ==========================================

    if run_tools:

        # -----------------
        # compare_spec_type
        # -----------------

        # the input settings
        input = {
            "spec_ra": [72.67213351],
            "spec_dec": [-67.71720515],
            "spec_type": ["A"],
            "spec_subtype": [0],
            "lumin_class": ["IV"],
            "match_radius": 0.2,
        }

        # run it
        output = compare_spec_type.compare_spec_type(
            settings_orig.obsfile,
            "{0}/{0}_stats.fits".format(settings_orig.project),
            **input,
        )

        # save the inputs and outputs
        asdf.AsdfFile({
            "input": input,
            "output": output
        }).write_to("{0}/{0}_compare_spec_type.asdf".format(
            settings_orig.project))

        # -----------------
        # star_type_probability
        # -----------------

        # input settings
        input = {
            "output_filebase": None,
            "ext_O_star_params": {
                "min_M_ini": 10,
                "min_Av": 0.5,
                "max_Av": 5
            },
        }

        # run it
        output = star_type_probability.star_type_probability(
            "{0}/{0}_pdf1d.fits".format(settings_orig.project),
            "{0}/{0}_pdf2d.fits".format(settings_orig.project),
            **input,
        )

        # save the inputs and outputs
        asdf.AsdfFile({
            "input": input,
            "output": output
        }).write_to("{0}/{0}_star_type_probability.asdf".format(
            settings_orig.project))

    # ==========================================
    # asdf file permissions
    # ==========================================

    # for unknown reasons, asdf currently writes files with permissions set
    # to -rw-------.  This changes it to -rw-r--r-- (like the rest of the
    # BEAST files) so Karl can easily copy them over to the cached file
    # website.

    # list of asdf files
    asdf_files = glob.glob("*/*.asdf")
    # go through each one to change permissions
    for fname in asdf_files:
        os.chmod(fname,
                 stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)