Exemplo n.º 1
0
def eval_simulation_mp(name,
                       site,
                       sim_file=None,
                       plots=False,
                       no_mp=False,
                       fix_closure=True,
                       qc=True):
    """Evaluate using multiple processes if necessary"""
    # will only work if simulations are already run.
    try:
        datasets = get_sites(site)

    except KeyError:  # Unknown site, single site
        eval_simulation(name,
                        site,
                        sim_file,
                        plots=plots,
                        fix_closure=fix_closure,
                        qc=qc)

    else:
        if no_mp:
            for s in datasets:
                eval_simulation(name,
                                s,
                                plots=plots,
                                fix_closure=fix_closure,
                                qc=qc)
        else:
            f_args = [[name, s, None, plots, fix_closure, qc]
                      for s in datasets]
            ncores = min(os.cpu_count(), 1 + int(os.cpu_count() * 0.5))
            with Pool(ncores) as p:
                p.starmap(eval_simulation, f_args)
Exemplo n.º 2
0
def run_simulation_mp(name,
                      site,
                      no_mp=False,
                      multivariate=False,
                      overwrite=False,
                      fix_closure=True):
    """Multi-processor run handling."""
    # TODO: refactor to work with above caller.

    model = get_model(name)

    if site in ['all', 'PLUMBER_ext', 'PLUMBER']:
        logger.info('Running {n} at {s} sites'.format(n=name, s=site))
        datasets = get_sites(site)
        if no_mp:
            for s in datasets:
                run_simulation(model, name, s, multivariate, overwrite,
                               fix_closure)
        else:
            f_args = [(model, name, s, multivariate, overwrite, fix_closure)
                      for s in datasets]
            ncores = get_suitable_ncores()
            if site is not 'debug' and hasattr(model, 'memory_requirement'):
                ncores = max(
                    1,
                    int((psutil.virtual_memory().total / 2) //
                        model.memory_requirement))
            logger.info("Running on %d core(s)" % ncores)

            with Pool(ncores) as p:
                p.starmap(run_simulation, f_args)
    else:
        run_simulation(model, name, site, multivariate, overwrite, fix_closure)

    return
Exemplo n.º 3
0
def get_PLUMBER_metrics(name, site='all', variables=['Qle', 'Qh', 'NEE']):
    """get metrics dataframe from a site, with benchmarks for comparison

    :returns: dataframe with metrics for model at site
    """
    csv_file = './source/models/{n}/metrics/{n}_{s}_metrics.csv'

    # benchmark_names = ['1lin', '2lin', '3km27']
    benchmark_names = ['S_lin', 'ST_lin', 'STH_km27']

    if site == 'all':
        sites = get_sites('PLUMBER_ext')
    else:
        sites = [site]

    metric_df = []

    failures = []
    for s in sites:
        try:
            site_metrics = pd.read_csv(csv_file.format(n=name, s=s))
            site_metrics = pd.melt(site_metrics, id_vars='metric')
            site_metrics['name'] = name
            site_metrics['site'] = s
            metric_df.append(
                site_metrics[site_metrics.variable.isin(variables)])

            for b in benchmark_names:
                benchmark_metrics = pd.read_csv(csv_file.format(n=b, s=s))
                benchmark_metrics = pd.melt(benchmark_metrics,
                                            id_vars='metric')
                benchmark_metrics['name'] = b
                benchmark_metrics['site'] = s
                metric_df.append(benchmark_metrics[
                    benchmark_metrics.variable.isin(variables)])
        except Exception:
            failures.append(s)
            continue

    if len(failures) > 0:
        logging.warning('Skipped {l} sites: {f}'.format(l=len(failures),
                                                        f=', '.join(failures)))

    if len(metric_df) == 0:
        logger.error(
            'Failed to load any csv files for {n} at {s} - skipping plot.'.
            format(n=name, s=site))
        return

    metric_df = pd.concat(metric_df).reset_index(drop=True)

    metric_df = rank_metric_df(metric_df)

    return metric_df
Exemplo n.º 4
0
def get_train_test_data(site,
                        met_vars,
                        flux_vars,
                        use_names,
                        qc=True,
                        fix_closure=True):
    """Gets training and testing data, PLUMBER style (leave one out)

    Set the training set using pals.data.set_config(['datasets', 'train'])"""

    if site == 'debug':
        train_sites = ['Amplero']
        test_site = 'Tumba'

        # Use non-quality controlled data, to ensure there's enough to train
        qc = False

    else:
        train_sites = get_sites(get_config(['datasets', 'train']))
        test_site = site
        if test_site not in train_sites:
            # Running on a non-training site, train on all training sites.
            train_sites = train_sites
        else:
            # Running on a training site, so leave it out.
            train_sites = [s for s in train_sites if s != test_site]
        print("Training with {n} datasets".format(n=len(train_sites)))

    train_dict = get_train_data(train_sites,
                                met_vars,
                                flux_vars,
                                use_names=use_names,
                                qc=qc,
                                fix_closure=fix_closure)

    test_dict = get_test_data(test_site, met_vars, use_names=use_names, qc=qc)

    train_test_data = train_dict
    train_test_data.update(test_dict)
    train_test_data['site'] = site

    with open('new_data.txt', 'w') as f:
        for k in sorted(train_test_data.keys()):
            print(k, ':\n', train_test_data[k], file=f)

    return train_test_data
Exemplo n.º 5
0
def main_rst_gen_mp(name, site, sim_file=None, no_mp=False):
    """Generate rst files using multiple processes if necessary

    :name: TODO
    :site: TODO
    :sim_file: TODO
    :returns: TODO

    """
    if site in ['all', 'PLUMBER_ext']:
        # will only work if simulations are already evaluated.
        datasets = get_sites('PLUMBER_ext')

        if no_mp:
            for s in datasets:
                main_rst_gen(name, s)
        else:
            f_args = [[name, s] for s in datasets]
            ncores = min(os.cpu_count(), 2 + int(os.cpu_count() * 0.25))
            with Pool(ncores) as p:
                p.starmap(main_rst_gen, f_args)

    else:
        main_rst_gen(name, site)