示例#1
0
def check_metrics(models, sites):
    """Checks metrics to see if they're bullshit

    :models: TODO
    :sites: TODO
    :returns: TODO

    """
    # Glob all metric filenames

    bad_simulations = []

    for model in models:
        for site in sites:
            csv_path = 'source/models/{m}/metrics/{m}_{s}_metrics.csv'.format(
                m=model, s=site)
            if not os.path.exists(csv_path):
                continue

            metrics = pd.read_csv(csv_path, index_col=0)
            if ((metrics > 500).any().any() or (metrics.loc['corr'] > 1).any()
                    or (metrics.loc['corr'] < -1).any()
                    or (metrics.loc['overlap'] > 1).any()
                    or (metrics.loc['overlap'] < 0).any()):
                print_bad("Crazy value for {m} at {s}".format(m=model, s=site))
                bad_simulations.append((model, site))

    return bad_simulations
示例#2
0
def check_model_data(models, sites):
    """Checks all models

    :models: list of model names
    """
    bad_simulations = []

    print("Checking {nm} models at {ns} sites.".format(nm=len(models),
                                                       ns=len(sites)))
    for model in models:
        print('Checking {m}:'.format(m=model))
        for site in sites:
            file_path = 'model_data/{m}/{m}_{s}.nc'.format(m=model, s=site)
            if not os.path.exists(file_path):
                # print('\nmissing model run: {m} at {s}'.format(m=model, s=site))
                print('x', end='', flush=True)
                continue
            with xr.open_dataset(file_path) as ds:
                try:
                    model_sanity_check(ds, model, site)
                except RuntimeError as e:
                    print_bad('\n' + str(e))
                    bad_simulations.append((model, site))
                else:
                    print('.', end='', flush=True)
        print('')

    return bad_simulations
示例#3
0
def run_simulation(model,
                   name,
                   site,
                   multivariate=False,
                   overwrite=False,
                   fix_closure=True):
    """Main function for fitting and running a model.

    :model: sklearn-style model or pipeline (regression estimator)
    :name: name of the model
    :site: PALS site name to run the model at (or 'all', or 'debug')
    """
    sim_dir = 'model_data/{n}'.format(n=name)
    os.makedirs(sim_dir, exist_ok=True)

    nc_file = '{d}/{n}_{s}.nc'.format(d=sim_dir, n=name, s=site)

    if os.path.isfile(nc_file) and not overwrite:
        print_warn(
            "Sim netcdf already exists for {n} at {s}, use --overwrite to re-run."
            .format(n=name, s=site))
        return

    for i in range(3):
        # We attempt to run the model up to 3 times, incase of numerical problems
        sim_data = fit_predict(model,
                               name,
                               site,
                               multivariate=multivariate,
                               fix_closure=fix_closure)

        try:
            model_sanity_check(sim_data, name, site)
        except RuntimeError as e:
            print_warn(str(e))

            if i < 2:
                print_warn('Attempting a %s run.' % ['2nd', '3rd'][i])
                continue
            else:
                print_bad(
                    'Giving up after 3 failed runs. Check your model structres or met data.'
                )
                sim_data.attrs.update(
                    {'Warning': 'model failed after 3 attempts, saved anyway'})
        else:
            # model run successful, presumably
            break

    if os.path.isfile(nc_file):
        print_warn("Overwriting sim file at {f}".format(f=nc_file))
    else:
        print_good("Writing sim file at {f}".format(f=nc_file))

    # if site != 'debug':
    sim_data.to_netcdf(nc_file)

    return
示例#4
0
def eval_simulation(name,
                    site,
                    sim_file=None,
                    plots=False,
                    fix_closure=True,
                    qc=True):
    """Main function for evaluating an existing simulation.

    Copies simulation data to source directory.

    TODO: skip running if cached, for easier page regeneration

    :name: name of the model
    :site: PALS site name to run the model at
    :sim_file: Path to simulation netcdf
    """
    nc_path = get_sim_nc_path(name, site)

    if sim_file is None:
        filename = nc_path
    else:
        filename = sim_file

    try:
        sim_data = xr.open_dataset(filename)
    except (OSError, RuntimeError) as e:
        print_bad(
            "Sim file ({f}) doesn't exist. What are you doing? {e}".format(
                f=filename, e=e))
        return

    if sim_file is not None:
        # WARNING! over writes existing sim!
        sim_data.to_netcdf(nc_path)

    flux_data = get_flux_data([site], fix_closure=fix_closure)[site]

    evaluate_simulation(sim_data, flux_data, name, qc=qc)

    if plots:
        diagnostic_plots(sim_data, flux_data, name)

    sim_data.close()

    return
示例#5
0
def main_rst_gen(name, site):
    """Main function for formatting existing simulation evaluations and plots

    Copies simulation data to source directory.

    :name: name of the model
    :site: PALS site name to run the model at
    """

    try:
        eval_results = load_sim_evaluation(name, site)
        plot_files = get_existing_plots(name, site)
    except (OSError, RuntimeError) as e:
        print_bad('one or more files missing for {n} at {s}: {e}'.format(
            n=name, s=site, e=e))
        return

    model_site_rst_write(name, site, eval_results, plot_files)

    return