Exemplo n.º 1
0
def noisepost(data, noise, X_inputs):

    #### load config files for data and noise

    ## note that inputs and outputs are already in same order for both
    GD = g.setup("config-data-recon", datashuffle=False, scaleinputs=False)
    GN = g.setup("config-noise-recon", datashuffle=False, scaleinputs=False)

    #### prediction of r at known data points 'x'

    ## get r values from the noise emulator
    x = GD.training.inputs
    x1 = __emuc.Data(x, None, GN.basis, GN.par, GN.beliefs, GN.K)
    p1 = __emuc.Posterior(x1,
                          GN.training,
                          GN.par,
                          GN.beliefs,
                          GN.K,
                          predict=True)
    GN_mean, GN_var = p1.mean, p1.var
    r = np.exp(GN_mean + np.diag(GN_var) / 2.0)

    ## set r values in the data emulator
    GD.training.set_r(r)
    GD.training.make_A(s2=GD.par.sigma**2, predict=True)

    #### prediction of r at new data points 'X'

    ## get R values from the noise emulator
    X = X_inputs
    x2 = __emuc.Data(X, None, GN.basis, GN.par, GN.beliefs, GN.K)
    p2 = __emuc.Posterior(x2,
                          GN.training,
                          GN.par,
                          GN.beliefs,
                          GN.K,
                          predict=True)
    GN_mean, GN_var = p2.mean, p2.var
    R = np.exp(GN_mean + np.diag(GN_var) / 2.0)  ## mean of noise prediction
    ## set R values for new points (X) Data object
    xp = __emuc.Data(X, None, GD.basis, GD.par, GD.beliefs, GD.K)
    xp.set_r(R)
    xp.make_A(s2=GD.par.sigma**2, predict=True)
    post = __emuc.Posterior(xp,
                            GD.training,
                            GD.par,
                            GD.beliefs,
                            GD.K,
                            predict=True)
    GD_mean = post.mean  ## mean of mean prediction

    ## return the predictive mean for data and noise
    return GD_mean, R
Exemplo n.º 2
0
def posterior_sample(E, x, predict=True):
    """Return a sample from the posterior given inputs x and emulator E.

    Args:
        E (Emulator): Emulator instance.
        x (Numpy array): Test points (same dimensions as emulator training data).
        predict (bool): Default True. Gives posterior prediction (as opposed to posterior estimation).

    Returns:
        sample (Numpy array): Posterior sample

    """

    # if 1D inputs, store in 2D array with only 1 column
    if x[0].size == 1:
        x = _np.array([x,]).T

    ## tests here to make sure that x is compatible with E
    if x[0,:].size != E.training.inputs[0,:].size:
        print("ERROR: test points have different number of columns"
              "to data in emulator. Exiting.")
        exit()

    xs = __emuc.Data(x, None, E.basis, E.par, E.beliefs, E.K)
    p = __emuc.Posterior(xs, E.training, E.par, E.beliefs, E.K, predict=predict)
    pmean, pvar = p.mean, p.var

    # draw a sample from the posterior distribution and return it
    L = _np.linalg.cholesky(pvar)
    u = _np.random.randn(x[:,0].size)
    sample = pmean + L.dot(u)
    return sample
Exemplo n.º 3
0
def posterior(E, x, predict=True):
    """Return the posterior mean and variance given inputs x and emulator E.

    Args:
        E (Emulator): Emulator instance.
        x (Numpy array): Test points (same dimensions as emulator training data).
        predict (bool): Default True. Gives posterior prediction (as opposed to posterior estimation).

    Returns:
        (pmean, pvar): (Numpy array, Numpy array): Posterior mean, Posterior Var

    """

    # if 1D inputs, store in 2D array with only 1 column
    if x[0].size == 1:
        x = _np.array([x,]).T

    ## tests here to make sure that x is compatible with E
    if x[0,:].size != E.training.inputs[0,:].size:
        print("ERROR: test points have different number of columns"
              "to data in emulator. Exiting.")
        exit()

    xs = __emuc.Data(x, None, E.basis, E.par, E.beliefs, E.K)
    p = __emuc.Posterior(xs, E.training, E.par, E.beliefs, E.K, predict=predict)
    pmean, pvar = p.mean, p.var
    return ( pmean, pvar )
Exemplo n.º 4
0
def setup(config_file, datashuffle=True, scaleinputs=True):
    """Do initialisation of classes Beliefs, Hyperparams, Basis, TV_config, All_Data, Data, Posterior, Optimize, and K. Return instance of Emulator class.

    Args:
        config_file (str): Name of configuration file.
        datashuffle (bool): Default is True. Randomly orders dataset.
        scaleinputs (bool): Default is True. Scales inputs into range 0 to 1.

    Returns:
        Emulator: Initialised Emulator class.

    """

    # returns instance of configuration
    config = __emuc.Config(config_file)

    # read from beliefs file
    beliefs = __emuc.Beliefs(config.beliefs)
    par = __emuc.Hyperparams(beliefs)
    basis = __emuc.Basis(beliefs)

    # split data T & V ; (k,c,noV) - no.sets, set for first V, no.V.sets
    tv_conf = __emuc.TV_config(*(config.tv_config))
    all_data = __emuc.All_Data(\
      config.inputs, config.outputs, tv_conf,\
      beliefs, par, datashuffle, scaleinputs)

    # build the kernel
    if beliefs.alt_nugget != 'T':
        K = __emuk.kernel(all_data.x_full[0].size, par)
    else:
        print("\n*** Using alternative nugget ***")
        K = __emuk.kernel_alt_nug(all_data.x_full[0].size, par)

    # build remaining structures
    (x_T, y_T) = all_data.choose_T()
    (x_V, y_V) = all_data.choose_V()
    training = __emuc.Data(x_T, y_T, basis, par, beliefs, K)
    validation = __emuc.Data(x_V, y_V, basis, par, beliefs, K)
    post = __emuc.Posterior(validation, training, par, beliefs, K)
    opt_T = __emuo.Optimize(training, basis, par, beliefs, config)
    
    return __emuc.Emulator(\
      config, beliefs, par, basis, tv_conf,\
      all_data, training, validation, post, opt_T, K)
Exemplo n.º 5
0
def imp_plot(emuls,
             zs,
             cm,
             var_extra,
             maxno=1,
             olhcmult=100,
             grid=10,
             act=[],
             fileStr="",
             plot=True):
    """Create an implausibility and optical depth plot, made of subplots for each pair of active inputs (or only those specified). Implausibility plots in the lower triangle, optical depth plots in the upper triangle. The diagonal is blank, and implausibility plots are paired with optical depth plots across the diagonal.

    Args:
        emuls (Emulator list): list of Emulator instances
        zs (float list): list of output values to match
        cm (float list): cut-off for implausibility
        var_extra (float list): extra (non-emulator) variance on outputs
        maxno (int): which maximum implausibility to consider, default 1
        olhcmult (int): option for size of oLHC design across other inputs not in the considered pair, size = olhcmult*(no. active inputs - 2), default 100
        grid (int): divisions of each input range to make, with values of each input for a subplot centred on the gridpoint, default 10
        act (int list): list of active inputs for plot, default [] (all inputs)
        fileStr (str): string to prepend to output files, default ""
        plot (bool): choice to plot (e.g. False for batches), default True

    Returns:
        None

    """

    sets, minmax, orig_minmax = emulsetup(emuls)
    check_act(act, sets)
    act_ref = ref_act(minmax)
    plt_ref = ref_plt(act)

    num_inputs = len(minmax)  # number of inputs we'll look at
    dim = num_inputs - 2  # dimensions of input that we'll change with oLHC

    maxno = int(maxno)
    IMP, ODP = [], []  ## need an IMP and ODP for each I_max
    for i in range(maxno):
        IMP.append(_np.zeros((grid, grid)))
        ODP.append(_np.zeros((grid, grid)))

    ## space for all plots, and reference index to subplot indices
    print("Creating plot objects... may take some time...")
    plot = True if plot == True else False
    rc = num_inputs if act == [] else len(act)
    if plot:
        fig, ax = _plt.subplots(nrows=rc, ncols=rc)
    plot_ref = act_ref if act == [] else ref_plt(act)

    ## reduce sets to only the chosen ones
    less_sets = []
    if act == []:
        less_sets = sets
    else:
        for s in sets:
            if s[0] in act and s[1] in act:
                less_sets.append(s)
    print("HM for input pairs:", less_sets)

    ## calculate plot for each pair of inputs
    for s in less_sets:
        print("\nset:", s)

        ## rows and columns of 2D grid for the {i,j} value of pair of inputs
        X1 = _np.linspace(minmax[str(s[0])][0],
                          minmax[str(s[0])][1],
                          grid,
                          endpoint=False)
        X1 = X1 + 0.5 * (minmax[str(s[0])][1] -
                         minmax[str(s[0])][0]) / float(grid)
        X2 = _np.linspace(minmax[str(s[1])][0],
                          minmax[str(s[1])][1],
                          grid,
                          endpoint=False)
        X2 = X2 + 0.5 * (minmax[str(s[1])][1] -
                         minmax[str(s[1])][0]) / float(grid)
        print("Values of the grid 1:", X1)
        print("Values of the grid 2:", X2)
        x_all = _np.zeros((grid * grid, 2))
        for i in range(0, grid):
            for j in range(0, grid):
                x_all[i * grid + j, 0] = X1[i]
                x_all[i * grid + j, 1] = X2[j]

        ## use an OLHC design for all remaining inputs
        n = dim * int(olhcmult)  # no. of design_points
        N = int(n / 2)  # number of designs from which 1 maximin is chosen
        olhc_range = [it[1] for it in sorted(minmax.items(), key=lambda x: int(x[0])) \
                      if int(it[0])!=s[0] and int(it[0])!=s[1]]
        print("olhc_range:", olhc_range)
        filename = "imp_input_" + str(s[0]) + '_' + str(s[1])
        _gd.optLatinHyperCube(dim, n, N, olhc_range, filename)
        x_other_inputs = _np.loadtxt(filename)  # read generated oLHC file in

        ## enough for ALL inputs - we'll mask any inputs not used by a particular emulator later
        x = _np.empty([n, num_inputs])

        ## stepping over the grid {i,j} to build subplot
        print("\nCalculating Implausibilities...")
        for i in range(0, grid):
            for j in range(0, grid):
                I2 = _np.zeros((n, len(emuls)))

                ## loop over outputs (i.e. over emulators)
                for o in range(len(emuls)):
                    E, z, var_e = emuls[o], zs[o], var_extra[o]
                    Eai = E.beliefs.active_index
                    ind_in_active = True if s[0] in Eai and s[
                        1] in Eai else False
                    if ind_in_active:

                        ## set the input pair for this subplot
                        x[:, act_ref[str(s[0])]] = x_all[i * grid + j, 0]
                        x[:, act_ref[str(s[1])]] = x_all[i * grid + j, 1]

                        ## figure out what the other inputs active_indices are
                        other_dim = [
                            act_ref[str(key)] for key in act_ref
                            if int(key) not in s
                        ]
                        if len(other_dim) == 1:
                            x[:, other_dim] = _np.array([
                                x_other_inputs,
                            ]).T
                        else:
                            x[:, other_dim] = x_other_inputs

                        ## inactive inputs are masked
                        act_ind_list = [act_ref[str(l)] for l in Eai]
                        ni = __emuc.Data(x[:, act_ind_list], None, E.basis,
                                         E.par, E.beliefs, E.K)
                        post = __emuc.Posterior(ni,
                                                E.training,
                                                E.par,
                                                E.beliefs,
                                                E.K,
                                                predict=False)
                        mean = post.mean
                        var = _np.diag(post.var)

                        ## calculate implausibility^2 values
                        for r in range(0, n):
                            I2[r, o] = (mean[r] - z)**2 / (var[r] + var_e)

                ## find maximum implausibility across different outputs
                I = _np.sqrt(I2)
                odp_count = _np.zeros(maxno, dtype=_np.uint32)
                Imaxes = _np.empty([n, maxno])
                for r in range(0, n):
                    Imaxes[r, :] = _np.sort(
                        _np.partition(I[r, :], -maxno)[-maxno:])[-maxno:]
                    for m in range(maxno):
                        if Imaxes[r, -(
                                m + 1)] < cm:  # check cut-off using this value
                            odp_count[m] = odp_count[m] + 1

                for m in range(maxno):
                    IMP[m][i, j] = _np.amin(
                        Imaxes[:, -(m + 1)])  # minimise across n points
                    ODP[m][i, j] = float(odp_count[m]) / float(n)

        ## save the results to file
        nfileStr = fileStr + "_" if fileStr != "" else fileStr
        for m in range(maxno):  ## different file for each max
            _np.savetxt(
                nfileStr + str(m + 1) + "_" + "IMP_" + str(s[0]) + '_' +
                str(s[1]), IMP[m])
            _np.savetxt(
                nfileStr + str(m + 1) + "_" + "ODP_" + str(s[0]) + '_' +
                str(s[1]), ODP[m])

        if plot:
            make_plots(s, plt_ref, cm, maxno, ax, IMP, ODP, minmax=minmax)

    if plot:
        plot_options(plt_ref, ax, fig, minmax)
        _plt.show()

    return
Exemplo n.º 6
0
def new_wave_design(emuls,
                    zs,
                    cm,
                    var_extra,
                    datafiles,
                    maxno=1,
                    olhcmult=100,
                    act=[],
                    fileStr=""):
    """Create a set of non-implausible design inputs to use for more simulations or experiments. Datafiles of non-implausible inputs (and corresponding outputs) should be provided so the design is optimised with respect to this data. An optimised Latin Hypercube design is made and only non-implausible inputs from this are kept. To adjust the design size while fixing cm, try adjusting olhcmult.

    Args:
        emuls (Emulator list): list of Emulator instances
        zs (float list): list of output values to match
        cm (float list): cut-off for implausibility
        var_extra (float list): extra (non-emulator) variance on outputs
        datafiles(str list): specify names of inputs and outputs files. These should be correspond to non-implausible inputs only; see nonimp_data() function 
        maxno (int): which maximum implausibility to consider, default 1
        olhcmult (int): option for size of oLHC design across other inputs not in the considered pair, size = olhcmult*(no. active inputs - 2), default 100
        act (int list): list of active inputs for plot, default [] (all inputs)
        fileStr (str): string to prepend to output files, default ""

    Returns:
        nimp_inputs (int): number of non-implausible design points created

    """

    sets, minmax, orig_minmax = emulsetup(emuls)
    act_ref = ref_act(minmax)
    check_act(act, sets)
    num_inputs = len(minmax)
    dim = num_inputs
    maxno = int(maxno)

    sim_x, sim_y = load_datafiles(datafiles, orig_minmax)

    ## use an OLHC design for all remaining inputs
    n = dim * int(olhcmult)  # no. of design_points
    N = int(n / 2)  # number of designs from which 1 maximin is chosen
    olhc_range = [
        it[1] for it in sorted(minmax.items(), key=lambda x: int(x[0]))
    ]
    print("olhc_range:", olhc_range)
    filename = "olhc_des"
    if sim_x == None:
        _gd.optLatinHyperCube(dim, n, N, olhc_range, filename)
    else:
        _gd.optLatinHyperCube(dim, n, N, olhc_range, filename, fextra=sim_x)
    x = _np.loadtxt(filename)  # read generated oLHC file in

    print("\nCalculating Implausibilities...")
    I2 = _np.zeros((n, len(emuls)))

    ## loop over outputs (i.e. over emulators)
    for o in range(len(emuls)):
        E, z, var_e = emuls[o], zs[o], var_extra[o]
        Eai = E.beliefs.active_index
        act_ind_list = [act_ref[str(l)] for l in Eai]

        ni = __emuc.Data(x[:, act_ind_list], None, E.basis, E.par, E.beliefs,
                         E.K)
        post = __emuc.Posterior(ni,
                                E.training,
                                E.par,
                                E.beliefs,
                                E.K,
                                predict=False)
        mean = post.mean
        var = _np.diag(post.var)

        ## calculate implausibility^2 values
        for r in range(0, n):
            I2[r, o] = (mean[r] - z)**2 / (var[r] + var_e)

    ## find maximum implausibility across different outputs
    I = _np.sqrt(I2)
    Imaxes = _np.empty([n, maxno])
    nimp_inputs = []
    for r in range(0, n):
        Imaxes[r, :] = _np.sort(_np.partition(I[r, :],
                                              -maxno)[-maxno:])[-maxno:]

        m = maxno - 1
        if Imaxes[r, -(m + 1)] < cm:  # check cut-off using this value
            nimp_inputs.append(x[r, :])

    ## save the results to file
    nfileStr = fileStr + "_" if fileStr != "" else fileStr
    for m in range(maxno):  ## different file for each max
        _np.savetxt(nfileStr + datafiles[0], nimp_inputs)

    print("Generated", len(nimp_inputs), "new data points")

    return len(nimp_inputs)
Exemplo n.º 7
0
def nonimp_data(emuls,
                zs,
                cm,
                var_extra,
                datafiles,
                maxno=1,
                act=[],
                fileStr=""):
    """Determine which inputs from a specified input file are non-implausible, and output these values (along with the corresponding outputs from a specified output file) to new files.

    Args:
        emuls (Emulator list): list of Emulator instances
        zs (float list): list of output values to match
        cm (float list): cut-off for implausibility
        var_extra (float list): extra (non-emulator) variance on outputs
        datafiles(str list): specify names of inputs and outputs files
        maxno (int): which maximum implausibility to consider, default 1
        act (int list): list of active inputs for plot, default [] (all inputs)
        fileStr (str): string to prepend to output files of non-implausible inputs and outputs, default ""

    Returns:
        nimp_inputs (int): number of non-implausible input points in input datafile

    """

    sets, minmax, orig_minmax = emulsetup(emuls)
    act_ref = ref_act(minmax)
    num_inputs = len(minmax)
    check_act(act, sets)
    maxno = int(maxno)

    sim_x, sim_y = load_datafiles(datafiles, orig_minmax)
    n = sim_x[:, 0].size

    print("\nCalculating Implausibilities...")
    I2 = _np.zeros((n, len(emuls)))

    ## loop over outputs (i.e. over emulators)
    for o in range(len(emuls)):
        E, z, var_e = emuls[o], zs[o], var_extra[o]
        Eai = E.beliefs.active_index
        act_ind_list = [act_ref[str(l)] for l in Eai]

        ni = __emuc.Data(sim_x[:, act_ind_list], None, E.basis, E.par,
                         E.beliefs, E.K)
        post = __emuc.Posterior(ni,
                                E.training,
                                E.par,
                                E.beliefs,
                                E.K,
                                predict=False)
        mean = post.mean
        var = _np.diag(post.var)

        ## calculate implausibility^2 values
        for r in range(0, n):
            I2[r, o] = (mean[r] - z)**2 / (var[r] + var_e)

    ## find maximum implausibility across different outputs
    I = _np.sqrt(I2)
    Imaxes = _np.empty([n, maxno])
    nimp_inputs, nimp_outputs = [], []
    for r in range(0, n):
        Imaxes[r, :] = _np.sort(_np.partition(I[r, :],
                                              -maxno)[-maxno:])[-maxno:]

        m = maxno - 1
        if Imaxes[r, -(m + 1)] < cm:  # check cut-off using this value
            nimp_inputs.append(sim_x[r, :])
            nimp_outputs.append(sim_y[r, :])

    ## save the results to file
    nfileStr = fileStr + "_" if fileStr != "" else fileStr

    for m in range(maxno):
        _np.savetxt(nfileStr + "nonimp_" + datafiles[0], nimp_inputs)
        _np.savetxt(nfileStr + "nonimp_" + datafiles[1], nimp_outputs)

    print(len(nimp_inputs), "data points were non-implausible")

    return len(nimp_inputs)
Exemplo n.º 8
0
def plot(E, plot_dims, fixed_dims=[], fixed_vals=[], mean_or_var="mean", customLabels=[], points=False, predict=True):
    """Do plot of the Emulator posterior against 1 or 2 input variables, while holding the other inputs at constant values.

    Args:
        E (Emulator): Emulator instance.
        plot_dims (int list): Dimensions of inputs to plot (1 or 2 list items).
        fixed_dims (int list): Dimensions of inputs to hold fixed.
        fixed_vals (float list): Values of the inputs that aren't being plotted.
        mean_or_var (string): Choose to plot mean ("mean") of variance ("var").
        customLabels (string list): Labels ["x","y"] for the x and y axes.

    Returns:
        None

    """

    dim = E.training.inputs[0].size
    minmax = []
    # for plotting training points on a scatter graph
    x = []
    y = []
    print("\n*** Generating plot ***")

    # if we are doing a 1D plot for multidimensional inputs
    if len(plot_dims) == 1 and dim>1:
        one_d = True

        minmax.append( [_np.amin(E.training.inputs[:,plot_dims[0]]) , _np.amax(E.training.inputs[:,plot_dims[0]])] )

        if points and mean_or_var == "mean":
            x = E.training.inputs[:,plot_dims[0]]
            y = E.training.outputs

        # set labels
        if customLabels == []:
            xlabel="input " + str(plot_dims[0])
            ylabel="output " + str(E.beliefs.output)
        else:
            try:
                xlabel=customLabels[0]
            except IndexError as e:
                xlabel="input " + str(plot_dims[0])
            try:
                ylabel=customLabels[1]
            except IndexError as e:
                ylabel="output " + str(E.beliefs.output)

    else:
        one_d =False

        if points and mean_or_var == "mean":
            x = E.training.inputs[:,plot_dims[0]]
            y = E.training.outputs

        minmax.append( [_np.amin(E.training.inputs[:,plot_dims[0]]) , _np.amax(E.training.inputs[:,plot_dims[0]])] )
        if dim > 1:
            minmax.append( [_np.amin(E.training.inputs[:,plot_dims[1]]) , _np.amax(E.training.inputs[:,plot_dims[1]])] )
        # set labels
        if customLabels == []:
            xlabel="input " + str(plot_dims[0])
            if dim == 1:
                ylabel="output "
            else:
                ylabel="input " + str(plot_dims[1])
        else:
            try:
                xlabel=customLabels[0]
            except IndexError as e:
                xlabel="input " + str(plot_dims[0])
            try:
                ylabel=customLabels[1]
            except IndexError as e:
                if dim == 1:
                    ylabel="output "
                else:
                    ylabel="input " + str(plot_dims[1])


    # number of inputs along each prediction dim
    pn=30
    # generate range of inputs to make predictions
    full_xrange = __emup.make_inputs(dim, pn, pn,\
        plot_dims, fixed_dims, fixed_vals, one_d, minmax)
    newinputs = __emuc.Data(full_xrange, None, E.basis, E.par, E.beliefs, E.K)

    if predict == False:
        print("Estimation (rather than prediction)")
    else:
        print("Prediction (rather than estimation)")

    post = __emuc.Posterior(newinputs, E.training, E.par, E.beliefs, E.K, predict)

    # call the actual plotting routine
    __emup.plotting(dim, post, pn, pn, one_d, mean_or_var, minmax , x, y, labels=[xlabel,ylabel])

    return None
Exemplo n.º 9
0
def noisefit(data, noise, stopat=20, olhcmult=100, samples=200, fileStr=""):
    """Try to fit one emualtor to the mean of the data and another emulator to the noise of the data. Results of estimating the noise are saved to the files 'noise-inputs' and 'noise-outputs'.

    Args:
        data (str): Name of configuration file for fitting the input-output data.
        noise (str): Name of configuration file for fitting the input-noise.
        stopat (int): Number of iterations.
        olhcmult (int): Scales the number of data points in the results files.

    Returns:
        None

    """

    #### check transform option
    ## if not "log", no transformation will be done

    #### check consistency
    datac, noisec = __read_file(data), __read_file(noise)
    datab, noiseb = __read_file(datac["beliefs"]), __read_file(
        noisec["beliefs"])
    if datac["inputs"] != noisec["inputs"]:
        print("\nWARNING: different inputs files in config files. Exiting.")
        return None
    if datab["mucm"] == 'T':
        print(
            "\nWARNING: data beliefs must have mucm F, "
            "as sigma (presumably) not valid if extra pointwise variance is added. Exiting."
        )
        return None
    if datab["fix_nugget"] == 'T' or noiseb["fix_nugget"] == 'T':
        print("\nWARNING: data and noise beliefs need fix_nugget F. Exiting.")
        return None
    if datac["tv_config"] != noisec["tv_config"]:
        print("\nWARNING: different tv_config in config files. Exiting.")
        return None
    if noisec["outputs"] != "zp-outputs":
        print("\nWARNING: noise config outputs must be 'zp-outputs'. Exiting.")
        return None

    ## setup emulators here
    GD = g.setup(data, datashuffle=True, scaleinputs=False)
    ## create 'zp-outputs' file with zeros
    np.savetxt("zp-outputs", \
      np.zeros(GD.training.outputs.size + GD.validation.outputs.size*GD.tv_conf.noV).T)
    GN = g.setup(noise, datashuffle=True, scaleinputs=False)

    ## if shuffled, fix the inconsistencies
    GN.training.inputs = GD.training.inputs
    GN.validation.inputs = GD.validation.inputs
    GN.training.remake()
    GN.validation.remake()

    ## if we have validation sets, set no_retrain=True
    if GD.all_data.tv.noV > 1:
        print(
            "\nWARNING: should have 0 or 1 validation sets for noise fitting. Exiting."
        )
        ## extra validation sets would be totally unused
        exit()
    valsets = False if GD.all_data.tv.noV == 0 else True

    #### step 1 ####
    print("\n****************" "\nTRAIN GP ON DATA" "\n****************")
    #GD = g.setup(data, datashuffle=False, scaleinputs=False)
    x = GD.training.inputs  # values of the inputs
    t = GD.training.outputs  # values of the noisy outputs
    if valsets:
        xv = GD.validation.inputs  # values of the inputs
        tv = GD.validation.outputs

    #print(np.amin(x), np.amax(x))
    g.train(GD, no_retrain=valsets)

    r = np.zeros(t.size)
    if valsets:
        rv = np.zeros(tv.size)

    ## we stay within this loop until done 'stopat' fits
    count = 0
    while True:
        if count == 0:
            xp = __emuc.Data(x, None, GD.basis, GD.par, GD.beliefs, GD.K)
            if valsets:
                xvp = __emuc.Data(xv, None, GD.basis, GD.par, GD.beliefs, GD.K)
        else:
            #### step 5 - return to step 2 if not converged ####
            xp = __emuc.Data(x, None, GD.basis, GD.par, GD.beliefs, GD.K)
            xp.set_r(r)
            xp.make_A(s2=GD.par.sigma**2, predict=True)
            if valsets:
                xvp = __emuc.Data(xv, None, GD.basis, GD.par, GD.beliefs, GD.K)
                xvp.set_r(rv)
                xvp.make_A(s2=GD.par.sigma**2, predict=True)
        count = count + 1

        #### step 2 - generate D'={(xi,zi)} ####
        print("\n***********************"
              "\nESTIMATING NOISE LEVELS " + str(count) +
              "\n***********************")

        post = __emuc.Posterior(xp,
                                GD.training,
                                GD.par,
                                GD.beliefs,
                                GD.K,
                                predict=True)
        L = np.linalg.cholesky(post.var)
        z_prime = np.zeros(t.size)
        s = samples
        for j in range(s):  # predict 's' different values
            u = np.random.randn(t.size)
            tij = post.mean + L.dot(u)
            z_prime = z_prime + 0.5 * (t - tij)**2
        z_prime = __transform(z_prime / float(s))
        np.savetxt('zp-outputs', z_prime)

        # estimate noise levels for validation set
        if valsets:
            post = __emuc.Posterior(xvp,
                                    GD.training,
                                    GD.par,
                                    GD.beliefs,
                                    GD.K,
                                    predict=True)
            L = np.linalg.cholesky(post.var)
            z_prime_V = np.zeros(tv.size)
            s = samples
            for j in range(s):  # predict 's' different values
                u = np.random.randn(tv.size)
                tij = post.mean + L.dot(u)
                z_prime_V = z_prime_V + 0.5 * (tv - tij)**2
            z_prime_V = __transform(z_prime_V / float(s))

        #### step 3 ####
        # train a GP on x and z
        print("\n*****************"
              "\nTRAIN GP ON NOISE " + str(count) + "\n*****************")
        ## need to setup again so as to re-read updated zp-outputs
        #GN = g.setup(noise, datashuffle=False, scaleinputs=False)
        #GN.training.outputs = np.loadtxt('zp-outputs').T
        GN.training.outputs = z_prime.T
        GN.training.remake()
        if valsets:
            GN.validation.outputs = z_prime_V.T
            GN.validation.remake()
        ## fix to allow retraining using same training set against validation
        GN.tv_conf.no_of_trains = 0
        GN.tv_conf.retrain = 'y'
        g.train(GN, no_retrain=valsets)

        #### step 4 - use GN to predict noise values for G3 ####
        print("\n***********************************"
              "\nTRAIN GP ON DATA WITH NOISE FROM GP " + str(count) +
              "\n***********************************")

        xp_GN = __emuc.Data(x, None, GN.basis, GN.par, GN.beliefs, GN.K)
        p_GN = __emuc.Posterior(xp_GN,
                                GN.training,
                                GN.par,
                                GN.beliefs,
                                GN.K,
                                predict=True)  ## I'VE CHANGED THIS TO FALSE
        r = __untransform(p_GN.mean, np.diag(p_GN.var))
        #r = __untransform(p_GN.mean, 0.0)

        #GD = g.setup(data, datashuffle=False, scaleinputs=False)
        GD.training.set_r(r)

        ## add estimated r to the validation set for better diagnostics
        if valsets:
            v_GN = __emuc.Data(xv, None, GN.basis, GN.par, GN.beliefs, GN.K)
            pv_GN = __emuc.Posterior(v_GN,
                                     GN.training,
                                     GN.par,
                                     GN.beliefs,
                                     GN.K,
                                     predict=True)
            rv = __untransform(pv_GN.mean, np.diag(pv_GN.var))
            #rv = __untransform(pv_GN.mean, 0.0)
            GD.validation.set_r(rv)

        ## fix to allow retraining using same training set against validation
        GD.tv_conf.no_of_trains = 0
        GD.tv_conf.retrain = 'y'
        g.train(GD, no_retrain=valsets)

        # break when we've done 'stopat' fits
        if count == stopat:
            print("\nCompleted", count, "fits, stopping here.")

            ## use an OLHC design for x_values of noise guesses we'll save
            print("\nGenerating input points to predict noise values at...")
            n = x[0].size * int(olhcmult)
            N = int(n)
            olhc_range = [[np.amin(col), np.amax(col)] for col in x.T]
            #print("olhc_range:", olhc_range)
            filename = "x_range_input"
            _gd.optLatinHyperCube(x[0].size, n, N, olhc_range, filename)
            x_range = np.loadtxt(filename)  # read generated oLHC file in

            # if 1D inputs, store in 2D array with only 1 column
            if x[0].size == 1:
                x_range = np.array([
                    x_range,
                ]).T

            ## save data to file
            x_plot = __emuc.Data(x_range, None, GN.basis, GN.par, GN.beliefs,
                                 GN.K)
            p_plot = __emuc.Posterior(x_plot,
                                      GN.training,
                                      GN.par,
                                      GN.beliefs,
                                      GN.K,
                                      predict=True)
            mean_plot = p_plot.mean
            var_plot = p_plot.var
            p_plot.interval()
            UI, LI = p_plot.UI, p_plot.LI

            print("\nSaving results to file...")
            nfileStr = fileStr + "_" if fileStr != "" else fileStr
            np.savetxt(nfileStr + 'noise-inputs', x_range)

            #np.savetxt(nfileStr + 'noise-outputs', np.transpose(\
            #  [np.sqrt(__untransform(mean_plot, np.diag(var_plot), reg=REG)),\
            #  np.sqrt(__untransform(LI, 0.0, reg=REG)), np.sqrt(__untransform(UI, 0.0, reg=REG))] ) )

            np.savetxt(nfileStr + 'noise-outputs', np.transpose(\
              [  __untransform(mean_plot, np.diag(var_plot)) ,\
                 __untransform(LI, 0.0) ,\
                 __untransform(UI, 0.0) ] ) )

            break

    return None