Пример #1
0
def eval_simulation(name, site, sim_file=None, plots=False, fix_closure=True,
                    qc=True, overwrite=False):
    """Main function for evaluating an existing simulation.

    Copies simulation data to source directory.

    TODO: skip running if cached, for easier page regeneration

    :name: name of the model
    :site: PALS site name to run the model at
    :sim_file: Path to simulation netcdf. Only required if simulation is at a non-standard place.
    """
    args = locals()
    args_str = '\n'.join([k + ': ' + str(args[k]) for k in sorted(args.keys())])

    logger = setup_logger(__name__, 'logs/eval/{m}/{s}/{m}_{s}.log'.format(m=name, s=site))
    logger.info("Evaluating model.\nArgs:\n{a}".format(a=args_str))

    nc_path = get_sim_nc_path(name, site)

    if sim_file is None:
        filename = nc_path
    else:
        filename = sim_file

    eval_path = 'source/models/{n}/metrics/{n}_{s}_metrics.csv'.format(n=name, s=site)

    if not overwrite:
        if os.path.exists(filename) and os.path.exists(eval_path) and \
                os.path.getmtime(filename) > os.path.getmtime(eval_path):
            logger.warning("Overwriting evaluation file because simulation is newer")
        else:
            logger.warning("Evaluation file already exists, skipping")
            return

    try:
        sim_data = xr.open_dataset(filename)
    except (OSError, RuntimeError) as e:
        logger.error("Sim file ({f}) doesn't exist. What are you doing? {e}".format(f=filename, e=e))
        return

    if sim_file is not None:
        logger.warning("Overwriting existing sim!")
        sim_data.to_netcdf(nc_path)

    flux_data = get_flux_data([site], fix_closure=fix_closure)[site]

    evaluate_simulation(sim_data, flux_data, name, site=site, qc=qc)

    if plots:
        diagnostic_plots(sim_data, flux_data, name, site=site)

    sim_data.close()

    return
Пример #2
0
from docopt import docopt

import os
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt

from mpl_toolkits import basemap
from mpl_toolkits.axes_grid1 import ImageGrid
from matplotlib.colors import LinearSegmentedColormap

from empirical_lsm.gridded_datasets import get_MODIS_data  # , get_GLEAM3a_data, get_MPI_data

from pals_utils.logging import setup_logger
logger = setup_logger(__name__, 'logs/plot_dataset.log')


def get_range(x):
    return np.quantile(x, [0, 100])


def colormap(x0):
    """custom colourmap for map plots"""

    if x0 > 0:
        x2 = (1 + 2 * x0) / 3.0
        x3 = (2 + x0) / 3.0

        # blue, white, red, yellow, purple
        cdict1 = {
Пример #3
0
    -h, --help    Show this screen and exit.
"""

from docopt import docopt

import os
from glob import glob

from empirical_lsm.data import get_sites
from empirical_lsm.checks import check_model_data, check_metrics

from empirical_lsm.offline_simulation import run_model_site_tuples_mp
from empirical_lsm.offline_eval import eval_simulation

from pals_utils.logging import setup_logger
logger = setup_logger(None, 'logs/check_sanity.log')


def main(args):
    if args['--sites'] is None:
        sites = get_sites('PLUMBER_ext')
    else:
        try:
            sites = get_sites(args['--sites'])
        except:
            sites = args['--sites'].split(',')

    if args['all']:
        if args['data']:
            models = [os.path.basename(f) for f in glob('model_data/*')]
        else:
Пример #4
0
Usage:
    run_model.py run <name> <site> [--no-mp] [--multivariate] [--overwrite] [--no-fix-closure]

Options:
    -h, --help  Show this screen and exit.
"""

from docopt import docopt

from pals_utils.data import set_config

from empirical_lsm.offline_simulation import run_simulation_mp

from pals_utils.logging import setup_logger

logger = setup_logger(__name__, 'logs/run_model.log')

set_config(['vars', 'flux'], ['NEE', 'Qle', 'Qh'])


def main(args):
    name = args['<name>']
    site = args['<site>']

    run_simulation_mp(name,
                      site,
                      no_mp=args['--no-mp'],
                      multivariate=args['--multivariate'],
                      overwrite=args['--overwrite'],
                      fix_closure=not args['--no-fix-closure'])
Пример #5
0
def run_simulation(model,
                   name,
                   site,
                   multivariate=False,
                   overwrite=False,
                   fix_closure=True):
    """Main function for fitting and running a model.

    :model: sklearn-style model or pipeline (regression estimator)
    :name: name of the model
    :site: PALS site name to run the model at (or 'all', or 'debug')
    """
    args = locals()
    args_str = '\n'.join(
        [k + ': ' + str(args[k]) for k in sorted(args.keys())])

    sim_dir = 'model_data/{n}'.format(n=name)
    os.makedirs(sim_dir, exist_ok=True)

    nc_file = '{d}/{n}_{s}.nc'.format(d=sim_dir, n=name, s=site)

    if os.path.isfile(nc_file) and not overwrite:
        logger.warning(
            "Sim netcdf already exists for {n} at {s}, use --overwrite to re-run."
            .format(n=name, s=site))
        return

    run_logger = setup_logger(
        __name__, 'logs/run/{m}/{s}/{m}_{s}.log'.format(m=name, s=site))
    run_logger.info("Running model.\nArgs:\n{a}".format(a=args_str))

    for i in range(3):
        # We attempt to run the model up to 3 times, incase of numerical problems
        try:
            sim_data = fit_predict(model,
                                   name,
                                   site,
                                   multivariate=multivariate,
                                   fix_closure=fix_closure)
        except AssertionError as e:
            logger.exception("Model failed: " + str(e))
            return

        try:
            model_sanity_check(sim_data, name, site)
        except RuntimeError as e:
            logger.warning(str(e))

            if i < 2:
                logger.warning('Attempting a %s run.' % ['2nd', '3rd'][i])
                continue
            else:
                logger.error(
                    'Giving up after 3 failed runs. Check your model structres or met data.'
                )
                sim_data.attrs.update(
                    {'Warning': 'model failed after 3 attempts, saved anyway'})
        else:
            # model run successful, presumably
            break

    if os.path.isfile(nc_file):
        logger.warning("Overwriting sim file at {f}".format(f=nc_file))
    else:
        logger.info("Writing sim file at {f}".format(f=nc_file))

    # if site != 'debug':
    sim_data.to_netcdf(nc_file)

    return
Пример #6
0
from docopt import docopt

import os
import datetime

import numpy as np
import xarray as xr

import pals_utils.data as pud

from empirical_lsm.data import get_sites, get_data_dir
from empirical_lsm.gridded_datasets import get_dataset_data, get_dataset_freq
from empirical_lsm.models import get_model

from pals_utils.logging import setup_logger
logger = setup_logger(__name__, 'logs/gridded_benchmarks.log')


def predict_gridded(model, dataset_data, flux_vars, datafreq=None):
    """predict model results for gridded data

    :model: scikit-learn style model/pipeline
    :data: xarray-style dataset
    :returns: xarray-style dataset

    """
    # set prediction metadata
    prediction = dataset_data[list(dataset_data.coords)]

    # Arrays like (var, lon, lat, time)
    result = np.full([
Пример #7
0
    inspect_cluster_regression.py <model_path>
    inspect_cluster_regression.py (-h | --help | --version)

Options:
    -h, --help    Show this screen and exit.
    --option=<n>  Option description [default: 3]
"""

from docopt import docopt

import pickle
import numpy as np
import empirical_lsm

from pals_utils.logging import setup_logger
logger = setup_logger(__name__, 'logs/inspect_cluster_regression.log')


def get_wrapper_vars(wrapper):
    return [v + '_' + l for v, lags in wrapper.var_lags.items() for l in lags]


def get_cluster_regression_model(wrapper):
    """Unwraps a model"""
    if isinstance(wrapper, empirical_lsm.clusterregression.ModelByCluster):
        return wrapper
    else:
        return get_cluster_regression_model(wrapper.model)


def get_cluster_regression_centers(model):
Пример #8
0
    import_sim.py benchmark <name> [<site>...]
    import_sim.py sim <name> <site> <file>

Options:
    -h, --help  Show this screen and exit.
"""

from docopt import docopt
import numpy as np
import xarray as xr

from pals_utils.data import get_flux_data
from empirical_lsm.data import get_sites, get_sim_nc_path

from pals_utils.logging import setup_logger
logger = setup_logger(__name__, 'logs/check_sanity.log')


def fix_benchmark(site_data, name, site):
    """Performs checks on broken benchmarks, and fixes them inplace

    :name: benchmark name
    :site_data: xarray dataset (will be modified inplace
    """

    if name in ['Manabe_Bucket.2', 'Penman_Monteith.1']:
        lon = site_data['longitude']
        lat = site_data['latitude']
        del site_data['longitude'], site_data['latitude'], lon[
            'latitude'], lat['longitude']
        site_data['longitude'] = lon
Пример #9
0
Options:
    -h, --help       Show this screen and exit.
    --sites=<sites>  Sites to run the models at [default: PLUMBER_ext]
"""

from docopt import docopt

import subprocess

from empirical_lsm.offline_simulation import run_simulation_mp
from empirical_lsm.offline_eval import eval_simulation_mp, main_rst_gen_mp
from empirical_lsm.model_sets import get_model_set
from empirical_lsm.model_search import model_site_index_rst_mp, model_search_index_rst, get_available_models

from pals_utils.logging import setup_logger
logger = setup_logger(__name__, 'logs/eval_all.log')


def eval_simulation_all(names,
                        sites,
                        run=False,
                        multivariate=True,
                        evalu=False,
                        plots=False,
                        rst=False,
                        html=False,
                        rebuild=False,
                        no_mp=False,
                        overwrite=False,
                        fix_closure=True):
Пример #10
0
Description: Evaluates a model (sim or set of sims) and produces rst output with diagnostics

Usage:
    eval_model.py eval <name> <site> [<file>] [--no-mp] [--plot] [--no-fix-closure] [--no-qc]
    eval_model.py rst-gen <name> <site> [--no-mp]

Options:
    -h, --help  Show this screen and exit.
"""

from docopt import docopt

from empirical_lsm.offline_eval import eval_simulation_mp, main_rst_gen_mp

from pals_utils.logging import setup_logger
logger = setup_logger(__name__, 'logs/eval_model.log')


def main(args):
    name = args['<name>']
    site = args['<site>']
    sim_file = args['<file>']
    plots = args['--plot']

    if args['eval']:
        eval_simulation_mp(name,
                           site,
                           sim_file,
                           plots,
                           no_mp=args['--no-mp'],
                           fix_closure=not args['--no-fix-closure'],
Пример #11
0
    model_combos.py (-h | --help | --version)

Options:
    -h, --help       Show this screen and exit.
    --sites=<sites>  Sites to run the models at [default: PLUMBER_ext]
"""

from docopt import docopt

from empirical_lsm.model_sets import get_combo_model_names

from empirical_lsm.offline_simulation import run_simulation_mp
from empirical_lsm.offline_eval import eval_simulation_mp

from pals_utils.logging import setup_logger
logger = setup_logger(__name__, 'logs/model_combos.log')


def main(sites, run=False, multivariate=True, evalu=False, plots=False,
         no_mp=False, overwrite=False, fix_closure=True):

    names = get_combo_model_names()

    if args['--run']:
        for name in names:
            run_simulation_mp(name, sites, no_mp=no_mp, multivariate=multivariate,
                              overwrite=overwrite, fix_closure=fix_closure)

    if args['--eval']:
        for name in names:
            eval_simulation_mp(name, sites, plots=plots, no_mp=no_mp,