def estimator_values_df(run_list, estimator_list, **kwargs):
    """Get a dataframe of estimator values.

    NB when parallelised the results will not be produced in order (so results
    from some run number will not nessesarily correspond to that number run in
    run_list).

    Parameters
    ----------
    run_list: list of dicts
        List of nested sampling run dicts.
    estimator_list: list of functions
        Estimators to apply to runs.
    estimator_names: list of strs, optional
        Name of each func in estimator_list.
    parallel: bool, optional
        Whether or not to parallelise - see parallel_utils.parallel_apply.
    save_name: str or None, optional
        See nestcheck.io_utils.save_load_result.
    save: bool, optional
        See nestcheck.io_utils.save_load_result.
    load: bool, optional
        See nestcheck.io_utils.save_load_result.
    overwrite_existing: bool, optional
        See nestcheck.io_utils.save_load_result.

    Returns
    -------
    df: pandas DataFrame
        Results table showing calculation values and diagnostics. Rows
        show different runs.
        Columns have titles given by estimator_names and show results for the
        different functions in estimators_list.
    """
    estimator_names = kwargs.pop(
        'estimator_names',
        ['est_' + str(i) for i in range(len(estimator_list))])
    parallel = kwargs.pop('parallel', True)
    if kwargs:
        raise TypeError('Unexpected **kwargs: {0}'.format(kwargs))
    values_list = pu.parallel_apply(nestcheck.ns_run_utils.run_estimators,
                                    run_list,
                                    func_args=(estimator_list, ),
                                    parallel=parallel)
    df = pd.DataFrame(np.stack(values_list, axis=0))
    df.columns = estimator_names
    df.index.name = 'run'
    return df
def thread_values_df(run_list, estimator_list, estimator_names, **kwargs):
    """Calculates estimator values for the constituent threads of the input
    runs.

    Parameters
    ----------
    run_list: list of dicts
        List of nested sampling run dicts.
    estimator_list: list of functions
        Estimators to apply to runs.
    estimator_names: list of strs
        Name of each func in estimator_list.
    kwargs:
        Kwargs to pass to parallel_apply.

    Returns
    -------
    df: pandas data frame
        Columns represent estimators and rows represent runs.
        Each cell contains a 1d numpy array with length equal to the number
        of threads in the run, containing the results from evaluating the
        estimator on each thread.
    """
    tqdm_kwargs = kwargs.pop('tqdm_kwargs', {'desc': 'thread values'})
    assert len(estimator_list) == len(estimator_names), (
        'len(estimator_list) = {0} != len(estimator_names = {1}'.format(
            len(estimator_list), len(estimator_names)))
    # get thread results
    thread_vals_arrays = pu.parallel_apply(
        nestcheck.error_analysis.run_thread_values,
        run_list,
        func_args=(estimator_list, ),
        tqdm_kwargs=tqdm_kwargs,
        **kwargs)
    df = pd.DataFrame()
    for i, name in enumerate(estimator_names):
        df[name] = [arr[i, :] for arr in thread_vals_arrays]
    # Check there are the correct number of thread values in each cell
    for vals_shape in df.loc[0].apply(lambda x: x.shape).values:
        assert vals_shape == (run_list[0]['thread_min_max'].shape[0],), \
            ('Should be nlive=' + str(run_list[0]['thread_min_max'].shape[0]) +
             ' values in each cell. The cell contains array with shape ' +
             str(vals_shape))
    return df
def bs_values_df(run_list, estimator_list, estimator_names, n_simulate,
                 **kwargs):
    """Computes a data frame of bootstrap resampled values.

    Parameters
    ----------
    run_list: list of dicts
        List of nested sampling run dicts.
    estimator_list: list of functions
        Estimators to apply to runs.
    estimator_names: list of strs
        Name of each func in estimator_list.
    n_simulate: int
        Number of bootstrap replications to use on each run.
    kwargs:
        Kwargs to pass to parallel_apply.

    Returns
    -------
    bs_values_df: pandas data frame
        Columns represent estimators and rows represent runs.
        Each cell contains a 1d array of bootstrap resampled values for the run
        and estimator.
    """
    tqdm_kwargs = kwargs.pop('tqdm_kwargs', {'desc': 'bs values'})
    assert len(estimator_list) == len(estimator_names), (
        'len(estimator_list) = {0} != len(estimator_names = {1}'.format(
            len(estimator_list), len(estimator_names)))
    bs_values_list = pu.parallel_apply(
        nestcheck.error_analysis.run_bootstrap_values,
        run_list,
        func_args=(estimator_list, ),
        func_kwargs={'n_simulate': n_simulate},
        tqdm_kwargs=tqdm_kwargs,
        **kwargs)
    df = pd.DataFrame()
    for i, name in enumerate(estimator_names):
        df[name] = [arr[i, :] for arr in bs_values_list]
    # Check there are the correct number of bootstrap replications in each cell
    for vals_shape in df.loc[0].apply(lambda x: x.shape).values:
        assert vals_shape == (n_simulate, ), (
            'Should be n_simulate=' + str(n_simulate) + ' values in ' +
            'each cell. The cell contains array with shape ' + str(vals_shape))
    return df
Пример #4
0
def get_bootstrap_results(n_run, n_simulate, estimator_list, settings,
                          **kwargs):
    """
    Generate data frame showing the standard deviations of the results of
    repeated calculations and estimated sampling errors from bootstrap
    resampling.

    This function was used for Table 5 in 'Dynamic nested sampling: an improved
    algorithm for nested sampling parameter estimation and evidence
    calculation' (Higson et al., 2019). See the paper for more details.

    Parameters
    ----------
    n_run: int
        how many runs to use
    n_simulate: int
        how many times to resample the nested sampling run in each bootstrap
        standard deviation estimate.
    estimator_list: list of estimator objects
    settings: PerfectNSSettings object
    load: bool, optional
        should run data and results be loaded if available?
    save: bool, optional
        should run data and results be saved?
    parallel: bool, optional
    cache_dir: str, optional
        Directory to use for caching.
    add_sim_method: bool, optional
        should we also calculate standard deviations using the simulated
        weights method for comparison with bootstrap resampling? This method is
        inaccurate for parameter estimation.
    n_simulate_ci: int, optional
        how many times to resample the nested sampling run in each bootstrap
        credible interval estimate. These may require more simulations than the
        standard deviation estimate.
    run_random_seeds: list, optional
        list of random seeds to use for generating runs.
    n_run_ci: int, optional
        how many runs to use for each credible interval estimate. You may want
        to set this to lower than n_run if n_simulate_ci is large as otherwise
        the credible interval estimate may take a long time.
    cred_int: float, optional
        one-tailed credible interval to calculate
    max_workers: int or None, optional
        Number of processes.
        If max_workers is None then concurrent.futures.ProcessPoolExecutor
        defaults to using the number of processors of the machine.
        N.B. If max_workers=None and running on supercomputer clusters with
        multiple nodes, this may default to the number of processors on a
        single node and therefore there will be no speedup from multiple
        nodes (must specify manually in this case).

    Returns
    -------
    results: pandas data frame
        results data frame.
        Contains two columns for each estimator - the second column (with
        '_unc' appended to the title) shows the numerical uncertainty in the
        first column.
        Contains rows:
            true values: analytical values of estimators for this likelihood
                and posterior if available
            repeats mean: mean calculation result
            repeats std: standard deviation of calculation results
            bs std / repeats std: mean bootstrap standard deviation estimate as
                a fraction of the standard deviation of repeated results.
            bs estimate % variation: standard deviation of bootstrap estimates
                as a percentage of the mean estimate.
            [only if add sim method is True]:
                sim std / repeats std: as for 'bs std / repeats std' but with
                    simulation method standard deviation estimates.
                sim estimate % variation: as for 'bs estimate % variation' but
                    with simulation method standard deviation estimates.
            bs [cred_int] CI: mean bootstrap credible interval estimate.
            bs +-1std % coverage: % of calculation results falling within +- 1
                mean bootstrap standard deviation estimate of the mean.
            bs [cred_int] CI % coverage: % of calculation results which are
                less than the mean bootstrap credible interval estimate.
    """
    load = kwargs.pop('load', False)
    save = kwargs.pop('save', False)
    max_workers = kwargs.pop('max_workers', None)
    ninit_sep = kwargs.pop('ninit_sep', True)
    parallel = kwargs.pop('parallel', True)
    cache_dir = kwargs.pop('cache_dir', 'cache')
    add_sim_method = kwargs.pop('add_sim_method', False)
    n_simulate_ci = kwargs.pop('n_simulate_ci', n_simulate)
    n_run_ci = kwargs.pop('n_run_ci', n_run)
    cred_int = kwargs.pop('cred_int', 0.95)
    run_random_seeds = kwargs.pop('run_random_seeds', list(range(n_run)))
    if kwargs:
        raise TypeError('Unexpected **kwargs: {0}'.format(kwargs))
    # make save_name
    save_root = ('bootstrap_results_' + str(n_simulate) + 'nsim_' +
                 str(ninit_sep) + 'sep')
    save_root += '_' + settings.save_name()
    save_root += '_' + str(n_run) + 'reps'
    save_file = cache_dir + '/' + save_root + '.pkl'
    # try loading results
    if load:
        try:
            return pd.read_pickle(save_file)
        except OSError:
            pass
    # start function
    est_names = [est.latex_name for est in estimator_list]
    # generate runs
    run_list = ns.get_run_data(settings, n_run, save=save, load=load,
                               random_seeds=run_random_seeds,
                               cache_dir=cache_dir,
                               max_workers=max_workers,
                               parallel=parallel)
    # sort in order of random seeds. This makes credible intervals results
    # reproducable even when only the first section of run_list is used.
    run_list = sorted(run_list, key=lambda r: r['random_seed'])
    rep_values = pu.parallel_apply(
        nestcheck.ns_run_utils.run_estimators, run_list,
        func_args=(estimator_list,), max_workers=max_workers,
        parallel=parallel)
    results = pf.summary_df_from_list(rep_values, est_names)
    new_index = ['repeats ' +
                 results.index.get_level_values('calculation type'),
                 results.index.get_level_values('result type')]
    results.set_index(new_index, inplace=True)
    results.index.rename('calculation type', level=0, inplace=True)
    # get bootstrap std estimate
    bs_values = pu.parallel_apply(
        nestcheck.error_analysis.run_std_bootstrap, run_list,
        func_args=(estimator_list,),
        func_kwargs={'n_simulate': n_simulate},
        max_workers=max_workers,
        parallel=parallel)
    bs_df = pf.summary_df_from_list(bs_values, est_names)
    # Get the mean bootstrap std estimate as a fraction of the std measured
    # from repeated calculations.
    results.loc[('bs std / repeats std', 'value'), :] = \
        (bs_df.loc[('mean', 'value')] / results.loc[('repeats std', 'value')])
    bs_std_ratio_unc = pf.array_ratio_std(
        bs_df.loc[('mean', 'value')],
        bs_df.loc[('mean', 'uncertainty')],
        results.loc[('repeats std', 'value')],
        results.loc[('repeats std', 'uncertainty')])
    results.loc[('bs std / repeats std', 'uncertainty'), :] = \
        bs_std_ratio_unc
    # Get the fractional variation of std estimates
    # multiply by 100 to express as a percentage
    results.loc[('bs estimate % variation', 'value'), :] = \
        100 * bs_df.loc[('std', 'value')] / bs_df.loc[('mean', 'value')]
    results.loc[('bs estimate % variation', 'uncertainty'), :] = \
        100 * bs_df.loc[('std', 'uncertainty')] / bs_df.loc[('mean', 'value')]
    if add_sim_method:
        # get std from simulation estimate
        sim_values = pu.parallel_apply(
            nestcheck.error_analysis.run_std_simulate, run_list,
            func_args=(estimator_list,),
            func_kwargs={'n_simulate': n_simulate},
            max_workers=max_workers,
            parallel=parallel)
        sim_df = pf.summary_df_from_list(sim_values, est_names)
        # Get the mean simulation std estimate as a fraction of the std
        # measured from repeated calculations.
        results.loc[('sim std / repeats std', 'value'), :] = \
            (sim_df.loc[('mean', 'value')] /
             results.loc[('repeats std', 'value')])
        sim_std_ratio_unc = pf.array_ratio_std(
            sim_df.loc[('mean', 'value')],
            sim_df.loc[('mean', 'uncertainty')],
            results.loc[('repeats std', 'value')],
            results.loc[('repeats std', 'uncertainty')])
        results.loc[('sim std / repeats std', 'uncertainty'), :] = \
            sim_std_ratio_unc
        # Get the fractional variation of std estimates
        # Multiply by 100 to express as a percentage
        results.loc[('sim estimate % variation', 'value'), :] = \
            100 * sim_df.loc[('std', 'value')] / sim_df.loc[('mean', 'value')]
        results.loc[('sim estimate % variation', 'uncertainty'), :] = \
            (100 * sim_df.loc[('std', 'uncertainty')] /
             sim_df.loc[('mean', 'value')])
    # get bootstrap CI estimates
    bs_cis = pu.parallel_apply(
        nestcheck.error_analysis.run_ci_bootstrap, run_list[:n_run_ci],
        func_args=(estimator_list,),
        func_kwargs={'n_simulate': n_simulate_ci,
                     'cred_int': cred_int,
                     'random_seeds': range(n_simulate_ci)},
        max_workers=max_workers, parallel=parallel)
    bs_ci_df = pf.summary_df_from_list(bs_cis, est_names)
    results.loc[('bs ' + str(cred_int) + ' CI', 'value'), :] = \
        bs_ci_df.loc[('mean', 'value')]
    results.loc[('bs ' + str(cred_int) + ' CI', 'uncertainty'), :] = \
        bs_ci_df.loc[('mean', 'uncertainty')]
    # add coverage for +- 1 bootstrap std estimate
    max_value = (results.loc[('repeats mean', 'value')].values
                 + bs_df.loc[('mean', 'value')].values)
    min_value = (results.loc[('repeats mean', 'value')].values
                 - bs_df.loc[('mean', 'value')].values)
    rep_values_array = np.stack(rep_values, axis=1)
    assert rep_values_array.shape == (len(estimator_list), n_run)
    coverage = np.zeros(rep_values_array.shape[0])
    for i, _ in enumerate(coverage):
        ind = np.where((rep_values_array[i, :] > min_value[i]) &
                       (rep_values_array[i, :] < max_value[i]))
        coverage[i] = ind[0].shape[0] / rep_values_array.shape[1]
    # multiply by 100 to express as a percentage
    results.loc[('bs +-1std % coverage', 'value'), :] = coverage * 100
    # add credible interval coverage
    max_value = results.loc[('bs ' + str(cred_int) + ' CI', 'value')].values
    ci_coverage = np.zeros(len(estimator_list))
    for i, _ in enumerate(coverage):
        ind = np.where(rep_values_array[i, :] < max_value[i])
        ci_coverage[i] = ind[0].shape[0] / rep_values_array.shape[1]
    # multiply by 100 to express as a percentage
    results.loc[('bs ' + str(cred_int) + ' CI % coverage', 'value'), :] = \
        (ci_coverage * 100)
    if save:
        # save the results data frame
        print('get_bootstrap_results: results saved to\n' + save_file)
        results.to_pickle(save_file)
    return results
Пример #5
0
def get_dynamic_results(n_run, dynamic_goals_in, estimator_list_in,
                        settings_in, **kwargs):
    """
    Generate data frame showing the standard deviations of the results of
    repeated calculations and efficiency gains (ratios of variances of results
    calculations) from different dynamic goals. To make the comparison fair,
    for dynamic nested sampling settings.n_samples_max is set to slightly below
    the mean number of samples used by standard nested sampling.

    This function was used for Tables 1, 2, 3 and 4, as well as to generate the
    results shown in figures 6 and 7 of 'Dynamic nested sampling: an improved
    algorithm for nested sampling parameter estimation and evidence
    calculation' (Higson et al., 2019). See the paper for a more detailed
    description.

    Parameters
    ----------
    n_run: int
        how many runs to use
    dynamic_goals_in: list of floats
        which dynamic goals to test
    estimator_list_in: list of estimator objects
    settings_in: PerfectNSSettings object
    load: bool, optional
        should run data and results be loaded if available?
    save: bool, optional
        should run data and results be saved?
    overwrite_existing: bool, optional
        if a file exists already but we generate new run data, should we
        overwrite the existing file when saved?
    run_random_seeds: list, optional
        list of random seeds to use for generating runs.
    parallel: bool, optional
    cache_dir: str, optional
        Directory to use for caching.
    tuned_dynamic_ps: list of bools, same length as dynamic_goals_in, optional
    max_workers: int or None, optional
        Number of processes.
        If max_workers is None then concurrent.futures.ProcessPoolExecutor
        defaults to using the number of processors of the machine.
        N.B. If max_workers=None and running on supercomputer clusters with
        multiple nodes, this may default to the number of processors on a
        single node and therefore there will be no speedup from multiple
        nodes (must specify manually in this case).

    Returns
    -------
    results: pandas data frame
        results data frame.
        Contains rows:
            mean [dynamic goal]: mean calculation result for standard nested
                sampling and dynamic nested sampling with each input dynamic
                goal.
            std [dynamic goal]: standard deviation of results for standard
                nested sampling and dynamic nested sampling with each input
                dynamic goal.
            gain [dynamic goal]: the efficiency gain (computational speedup)
                from dynamic nested sampling compared to standard nested
                sampling. This equals (variance of standard results) /
                (variance of dynamic results); see the dynamic nested
                sampling paper for more details.
    """
    load = kwargs.pop('load', False)
    save = kwargs.pop('save', False)
    max_workers = kwargs.pop('max_workers', None)
    parallel = kwargs.pop('parallel', True)
    cache_dir = kwargs.pop('cache_dir', 'cache')
    overwrite_existing = kwargs.pop('overwrite_existing', True)
    run_random_seeds = kwargs.pop('run_random_seeds', list(range(n_run)))
    tuned_dynamic_ps = kwargs.pop('tuned_dynamic_ps',
                                  [False] * len(dynamic_goals_in))
    assert len(tuned_dynamic_ps) == len(dynamic_goals_in)
    for goal in dynamic_goals_in:
        assert goal is not None, \
            'Goals should be dynamic - standard NS already included'
    # Add a standard nested sampling run for comparison:
    dynamic_goals = [None] + dynamic_goals_in
    tuned_dynamic_ps = [False] + tuned_dynamic_ps
    if kwargs:
        raise TypeError('Unexpected **kwargs: {0}'.format(kwargs))
    # Make a copy of the input settings to stop us editing them
    settings = copy.deepcopy(settings_in)
    # make save_name
    save_root = 'dynamic_test'
    for dg in dynamic_goals_in:
        save_root += '_' + str(dg).replace('.', '_')
    save_root += '_' + settings.save_name(include_dg=False)
    save_root += '_' + str(n_run) + 'reps'
    save_file = cache_dir + '/' + save_root + '.pkl'
    # try loading results
    if load:
        try:
            return pd.read_pickle(save_file)
        except OSError:
            print('Could not load file: ' + save_file)
    # start function
    # --------------
    # get info on the number of samples taken in each run as well
    estimator_list = [e.CountSamples()] + estimator_list_in
    est_names = [est.latex_name for est in estimator_list]
    method_names = []
    method_values = []
    assert dynamic_goals[0] is None, (
        'Need to start with standard ns to calculate efficiency gains')
    for i, dynamic_goal in enumerate(dynamic_goals):
        # set up settings
        settings.dynamic_goal = dynamic_goal
        settings.tuned_dynamic_p = tuned_dynamic_ps[i]
        # if we have already done the standard calculation, set n_samples_max
        # for dynamic calculations so it is slightly smaller than the number
        # of samples the standard calculation used to ensure a fair comparison
        # of performance. Otherwise dynamic nested sampling will end up using
        # more samples than standard nested sampling as it does not terminate
        # until after the number of samples is greater than n_samples_max.
        if i != 0 and settings.dynamic_goal is not None:
            assert dynamic_goals[0] is None
            assert isinstance(estimator_list[0], e.CountSamples)
            n_samples_max = np.mean(np.asarray([val[0] for val in
                                                method_values[0]]))
            # This factor is a function of the dynamic goal as typically
            # evidence calculations have longer additional threads than
            # parameter estimation calculations.
            reduce_factor = 1 - ((1.5 - 0.5 * settings.dynamic_goal) *
                                 (settings.nbatch / settings.nlive_const))
            settings.n_samples_max = int(n_samples_max * reduce_factor)
        print('dynamic_goal=' + str(settings.dynamic_goal),
              'n_samples_max=' + str(settings.n_samples_max))
        # get a name for this calculation method
        if dynamic_goal is None:
            method_names.append('standard')
        else:
            method_names.append('dynamic $G=' +
                                str(settings.dynamic_goal) + '$')
            if settings.tuned_dynamic_p is True:
                method_names[-1] += ' tuned'
        # generate runs and get results
        run_list = ns.get_run_data(settings, n_run, parallel=parallel,
                                   random_seeds=run_random_seeds,
                                   load=load, save=save,
                                   max_workers=max_workers,
                                   cache_dir=cache_dir,
                                   overwrite_existing=overwrite_existing)
        method_values.append(pu.parallel_apply(
            nestcheck.ns_run_utils.run_estimators, run_list,
            func_args=(estimator_list,), max_workers=max_workers,
            parallel=parallel))
    results = pf.efficiency_gain_df(method_names, method_values, est_names)
    if save:
        # save the results data frame
        print('get_dynamic_results: saving results to\n' + save_file)
        results.to_pickle(save_file)
    return results
Пример #6
0
def get_run_data(settings, n_repeat, **kwargs):
    """
    Tests if runs with the specified settings are already cached. If not
    the runs are generated and saved.

    Parameters
    ----------
    settings: PerfectNSSettings object
    n_repeat: int
        Number of nested sampling runs to generate.
    parallel: bool, optional
        Should runs be generated in parallel?
    max_workers: int or None, optional
        Number of processes.
        If max_workers is None then concurrent.futures.ProcessPoolExecutor
        defaults to using the number of processors of the machine.
        N.B. If max_workers=None and running on supercomputer clusters with
        multiple nodes, this may default to the number of processors on a
        single node and therefore there will be no speedup from multiple
        nodes (must specify manually in this case).
    load: bool, optional
        Should previously saved runs be loaded? If False, new runs are
        generated.
    save: bool, optional
        Should any new runs generated be saved?
    cache_dir: str, optional
        Directory for caching
    overwrite_existing: bool, optional
        if a file exists already but we generate new run data, should we
        overwrite the existing file when saved?
    check_loaded_settings: bool, optional
        if we load a cached file, should we check if the loaded file's settings
        match the current settings (and generate fresh runs if they do not)?
    random_seeds: list, optional
        random_seed arguments for each call of generate_ns_run.

    Returns
    -------
    run_list
        list of n_repeat nested sampling runs.
    """
    parallel = kwargs.pop('parallel', True)
    max_workers = kwargs.pop('max_workers', None)
    load = kwargs.pop('load', True)
    save = kwargs.pop('save', True)
    cache_dir = kwargs.pop('cache_dir', 'cache')
    overwrite_existing = kwargs.pop('overwrite_existing', True)
    check_loaded_settings = kwargs.pop('check_loaded_settings', True)
    random_seeds = kwargs.pop('random_seeds', [None] * n_repeat)
    assert len(random_seeds) == n_repeat
    if kwargs:
        raise TypeError('Unexpected **kwargs: {0}'.format(kwargs))
    save_name = cache_dir + '/' + settings.save_name()
    save_name += '_' + str(n_repeat) + 'reps'
    if load:
        try:
            data = iou.pickle_load(save_name)
            if check_loaded_settings:
                # Assume all runs in the loaded list have the same settings, in
                # which case we only need check the first one.
                loaded = copy.deepcopy(data[0]['settings'])
                current = copy.deepcopy(settings.get_settings_dict())
                # If runs are standard nested sampling there is no need to
                # check settings which only affect dynamic ns match
                if loaded['dynamic_goal'] is None and (current['dynamic_goal']
                                                       is None):
                    for key in [
                            'dynamic_goal', 'n_samples_max', 'ninit', 'nbatch',
                            'dynamic_fraction', 'tuned_dynamic_p'
                    ]:
                        del loaded[key]
                        del current[key]
                if loaded != current:
                    #  remove shared keys and only print differences
                    rm = [
                        k for k in set(loaded.keys()) & set(current.keys())
                        if loaded[k] == current[k]
                    ]
                    loaded_diff = {
                        k: v
                        for k, v in loaded.items() if k not in rm
                    }
                    current_diff = {
                        k: v
                        for k, v in current.items() if k not in rm
                    }
                    msg = (('Loaded settings != current settings. Differences '
                            'are: {0} != = {1}. Generating new runs instead.'
                            ).format(loaded_diff, current_diff))
                    warnings.warn(msg, UserWarning)
                    del data
                    load = False
        except (OSError, EOFError) as exception:
            print(('Loading {0} failed due to {1}'.format(
                save_name,
                type(exception).__name__) +
                   ' - try generating new runs instead.'))
            load = False
            overwrite_existing = True
    if not load:
        # Must check cache is up to date before parallel_apply or each process
        # will have to update the cache seperately
        if type(settings.prior).__name__ == 'GaussianCached':
            settings.prior.check_cache(settings.n_dim)
        data = pu.parallel_apply(generate_ns_run,
                                 random_seeds,
                                 func_pre_args=(settings, ),
                                 max_workers=max_workers,
                                 parallel=parallel)
        if save:
            iou.pickle_save(data,
                            save_name,
                            overwrite_existing=overwrite_existing)
    return data