Python Campaign示例，easysurrogate.Campaign Python示例

示例#1

0

显示文件

文件： fusion_tutorial.py 项目： wedeling/EasySurrogate

def ann_surrogate_test(samples_c, n_train):
    campaign = es.Campaign()

    # create a vanilla ANN surrogate
    surrogate_ann = es.methods.ANN_Surrogate()

    # -- If not using all the grid points
    # samples_c = samples_c[:, 0].reshape(-1, 1)  # axial
    # samples_c = samples_c[:, np.arange(0, 100, 5).tolist()]  # sparse

    # number of output neurons
    n_out = samples_c.shape[1]

    # train the surrogate on the data
    n_iter = 20000

    surrogate_ann.train(theta,
                        samples_c,
                        n_iter,
                        test_frac=test_frac,
                        n_layers=2,
                        n_neurons=1)
    campaign.add_app(name='ann_campaign', surrogate=surrogate_ann)
    campaign.save_state()

    # -- If loading a pretrained surrogate
    # campaign = es.Campaign(load_state=True)
    # surrogate = campaign.surrogate

    # evaluate the surrogate on the test data
    test_predictions = np.zeros([n_mc - n_train, n_out])
    for count, i in enumerate(range(n_train, n_mc)):
        theta_point = [x[i] for x in theta]
        test_predictions[count] = surrogate_ann.predict(theta_point)

    # plot the test predictions and data
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(test_predictions.T, 'b')
    ax.plot(samples[n_train:].T, 'r+')
    plt.tight_layout()
    plt.savefig('ann_ax_test_data_res.png')

    # -- If plotting around y=x for a scalar QoI
    # plot_prediction_results(samples_c[I:], test_predictions)

    # print the relative test error
    rel_err_test = np.linalg.norm(test_predictions - samples_c[n_train:]
                                  ) / np.linalg.norm(test_predictions)
    print('Relative error on the test set is %.2f percent' %
          (rel_err_test * 100))

    # perfrom derivative based surrogate
    ann_derivative_based_sa(surrogate_ann, order_orig)

示例#2

0

显示文件

def main():

    campaign = es.Campaign()
    feat_eng = es.methods.Feature_Engineering()

    # reading input and output data of simulations
    theta = pd.read_pickle('inputs_2704.pickle').to_numpy()[:, :]
    theta = theta.T.tolist()
    theta = [np.array(x).reshape(-1, 1) for x in theta]

    samples = pd.read_pickle('outputs_2704.pickle').to_numpy()[:, :]

    # Choosing the X, Y subsets to analyse
    ndim_in = 10
    n_mc = 500
    samples_c = samples[:n_mc]
    theta_c = [t[:n_mc] for t in theta]

    # Defining the current ordering of parameters by sensitivity
    name_order_orig = [
        "Qe_tot",
        "H0",
        "Hw",
        "chi",
        "Te_bc",
        "b_pos",
        "b_height",
        "b_sol",
        "b_width",
        "b_slope"]

    # -- If we change order of the features by their importance
    # order = get_sa_order(order_orig, order_sa_ann)

    order_num = np.arange(len(name_order_orig))

    theta_reod = feat_eng.chose_feat_subset(theta_c, ndim_in, order_num)

    # save reference data as hdf

    data_sim = {}
    data_sim['Te'] = samples_c
    for i, name in enumerate(name_order_orig):
        data_sim[name] = theta_reod[i]
    campaign.store_data_to_hdf5(data_sim, file_path='sim_data.hdf5')

示例#3

0

显示文件

文件： gp_surrogate_predict.py 项目： wedeling/EasySurrogate

def main(order_num=None):

    campaign = es.Campaign(load_state=True,
                           file_path='mogp_10_model_1006_full_100.pickle')

    # load the training and testing data from simulations
    data_frame = campaign.load_hdf5_data(file_path='sim_data.hdf5')

    # defining name of input features
    feature_names = [
        "Qe_tot", "H0", "Hw", "chi", "Te_bc", "b_pos", "b_height", "b_sol",
        "b_width", "b_slope"
    ]
    # choosing all features
    features = dict((k, data_frame[k]) for k in feature_names)

    # preparing a list of feature to train
    theta = [v for k, v in features.items()]

    ndim_in = campaign.surrogate.n_in
    ndim_out = campaign.surrogate.n_out
    n_mc_l = theta[0].shape[0]

    # getting order of input features, list of numbers corresponding to the feature name list
    if order_num is None:
        order_num = np.arange(ndim_in)

    # getting a subset of input data, take first ndim_in features defined by order_num list
    theta_reod = campaign.surrogate.feat_eng.chose_feat_subset(
        theta, ndim_in, order_num)

    # Total dataset

    predictions = np.zeros([n_mc_l, ndim_out])
    pred_vars = np.zeros([n_mc_l, ndim_out])
    for i in range(n_mc_l):
        theta_point = [x[i] for x in theta_reod]
        predictions[i], pred_vars[i] = campaign.surrogate.predict(theta_point)

    data_sur = {}
    data_sur['Te'] = predictions
    data_sur['Te_sigmasq'] = pred_vars
    campaign.store_data_to_hdf5(data_sur, file_path='sur_data.hdf5')

示例#4

0

显示文件

from itertools import chain
import numpy as np
import easysurrogate as es

# create EasySurrogate campaign
campaign = es.Campaign()

# load HDF5 data frame
data_frame = campaign.load_hdf5_data()

# supervised training data set
features = data_frame['X_data']
target = data_frame['B_data']

# create Quantized Softmax Network surrogate
surrogate = es.methods.QSN_Surrogate()

# create time-lagged features
lags = [[1, 10]]

# train the surrogate on the data
n_iter = 20000
surrogate.train([features],
                target,
                n_iter,
                lags=lags,
                n_layers=4,
                n_neurons=256,
                batch_size=512)

campaign.add_app(name='test_campaign', surrogate=surrogate)

示例#5

0

显示文件

文件： train_gp_surrogate.py 项目： wedeling/EasySurrogate

def gp_train(order_num=None,
             ndim_in=None,
             ndim_out=None,
             n_train=None,
             test_frac=0.0):

    # creat e campaign object
    campaign = es.Campaign()

    # load th training and testing data from simulations
    data_frame = campaign.load_hdf5_data()

    # chose target data from the loaded dictionary
    target = data_frame['Te']

    # -- If chosing data not on every grid point
    # samples = samples_c[:, 0].reshape(-1, 1)
    # samples = samples_c[:, np.arange(0, 100, 20).tolist()]

    # defining name of input features
    feature_names = [
        "Qe_tot", "H0", "Hw", "chi", "Te_bc", "b_pos", "b_height", "b_sol",
        "b_width", "b_slope"
    ]
    # choosing all features
    features = dict((k, data_frame[k]) for k in feature_names)

    # preparing a list of feature to train
    features = [v for k, v in features.items()]

    # number of samples
    n_mc_l = target.shape[0]

    # defining input dimensionality
    if ndim_in is None:
        if isinstance(features, list):
            #ndim_in = theta[0].shape[0]
            ndim_in = len(features)
        elif isinstance(features, np.ndarray):
            ndim_in = features.shape[1]
        else:
            ndim_in = 1
    else:
        ndim_in = len(features)

    # defining output dimensionality
    if ndim_out is None:
        ndim_out = target.shape[1]

    # getting order of input features, list of numbers corresponding to the feature name list
    if order_num is None:
        order_num = np.arange(ndim_in)

    # create a surrogate object
    surrogate = es.methods.GP_Surrogate(n_in=ndim_in,
                                        n_out=ndim_out,
                                        backend='scikit-learn')

    # getting a subset of input data, take first ndim_in features defined by order_num list
    features = surrogate.feat_eng.chose_feat_subset(features, ndim_in,
                                                    order_num)

    # train a surrogate model
    day = date.today().strftime('%d%m')
    st_time = time.time()
    surrogate.train(features,
                    target,
                    test_frac=test_frac,
                    basekernel='Matern',
                    noize='fit')
    tot_time = time.time() - st_time
    print('Time to train a GP surrogate {:.3}'.format(tot_time))

    # print parameters of resulting surrogate
    surrogate.model.print_model_info()

    # save the app and the surrogate
    campaign.add_app(name='gp_campaign', surrogate=surrogate)
    campaign.save_state(
        file_path='mogp_{}_model_{}_full_100.pickle'.format(ndim_in, day))

    return tot_time

示例#6

0

显示文件

文件： lorenz96_analysis.py 项目： wedeling/EasySurrogate

import os
import numpy as np
import matplotlib.pyplot as plt
import easysurrogate as es

home = os.path.abspath(os.path.dirname(__file__))

# load the campaign
campaign = es.Campaign(load_state=True)
# load the training data (from lorenz96.py)
data_frame_ref = campaign.load_hdf5_data()
# load the data from lorenz96_qsn.py here
data_frame_qsn = campaign.load_hdf5_data()

# load reference data
X_ref = data_frame_ref['X_data']
B_ref = data_frame_ref['B_data']

# load data of QSN surrogate
X_qsn = data_frame_qsn['X_data']
B_qsn = data_frame_qsn['B_data']

# create QSN analysis object
analysis = es.analysis.QSN_analysis(campaign.surrogate)

#############
# Plot PDFs #
#############

start_idx = 0
fig = plt.figure(figsize=[8, 4])

示例#7

0

显示文件

文件： fusion_tutorial.py 项目： wedeling/EasySurrogate

def gp_surrogate_test(order=None, ndim_in=None, ndim_out=None, n_train=None):

    # TRAINING PHASE
    campaign = es.Campaign()

    surrogate_gp = es.methods.GP_Surrogate(n_in=ndim_in,
                                           n_out=ndim_out,
                                           backend='scikit-learn')

    # -- If chosing data not on every grid point
    # samples_c= samples_c[:, 0].reshape(-1, 1)
    # samples_c = samples_c[:, np.arange(0, 100, 20).tolist()]

    n_out = samples_c.shape[1]
    n_mc_l = samples_c.shape[0]

    if ndim_in is None:
        if isinstance(theta_c, list):
            ndim_in = theta_c[0].shape[0]
        elif isinstance(theta_c, np.ndarray):
            ndim_in = theta_c.shape[1]

    if order is None:
        order = np.arange(ndim_in)

    theta_reod = chose_feat_subset(theta_c, ndim_in, order)

    day = date.today().strftime('%d%m')
    st_time = time.time()
    surrogate_gp.train(theta_reod,
                       samples_c,
                       test_frac=test_frac,
                       basekernel='Matern',
                       noize='fit')
    tot_time = time.time() - st_time
    print('Time to train a GP surrogate {:.3}'.format(tot_time))

    campaign.add_app(name='gp_campaign', surrogate=surrogate_gp)
    campaign.save_state(
        file_path='mogp_{}_model_{}_full_100.pickle'.format(ndim_in, day))

    # -- If loading a pretrained surrogate
    # campaign = es.Campaign(load_state=True, file_path='skl_1_model_2505_full_100.pickle')
    # surrogate_gp = campaign.surrogate

    # TRYING SEQUENTIAL OPTIMISATION - passes, but resulting model worsesns in its performance
    # surrogate_gp.train_sequentially(theta_reod, samples_axial, n_iter=10, acquisition_function='poi')
    # print('Indices of runs used for training: {}'.format(surrogate_gp.feat_eng.train_indices))
    #####

    surrogate_gp.model.print_model_info()

    # list of run indices used in training
    train_inds = surrogate_gp.feat_eng.train_indices.tolist()
    test_inds = surrogate_gp.feat_eng.test_indices.tolist()
    n_train = surrogate_gp.feat_eng.n_train  # length of training data set

    # ANALYSIS PHASE
    analysis = es.analysis.GP_analysis(surrogate_gp)

    # Training set
    # evaluate the surrogate on the training data
    training_predictions = np.zeros([n_train, n_out])
    training_pred_vars = np.zeros([n_train, n_out])

    for i, j in enumerate(train_inds):
        theta_point = [x[j] for x in theta_reod]
        training_predictions[i], training_pred_vars[i] = surrogate_gp.predict(
            theta_point)

    # plot the train predictions and original data
    plot_prediction_results(samples_c[train_inds].reshape(-1),
                            training_predictions.reshape(-1),
                            training_pred_vars.reshape(-1), 1.0,
                            'gp_train_{}_data_res.png'.format(ndim_in))

    rel_err_train = np.linalg.norm(training_predictions -
                                   samples_c[train_inds]) / np.linalg.norm(
                                       samples_c[train_inds])
    print('Relative error on the training set is %.2f percent' %
          (rel_err_train * 100))

    # plot prediction against parameter values
    if len(theta_reod) == 1 and samples_c.shape[1] == 1:
        plot_prediction_results_vspar(
            theta_reod[0][train_inds].reshape(-1),
            samples_c[train_inds],
            training_predictions.reshape(-1),
            training_pred_vars.reshape(-1),
            name='gp_theta_train_{}_data_res.png'.format(ndim_in))

    # Testing set
    # evaluate on testing data
    test_predictions = np.zeros([n_mc_l - n_train, n_out])
    test_pred_vars = np.zeros([n_mc_l - n_train, n_out])
    for i, j in enumerate(test_inds):
        theta_point = [x[j] for x in theta_reod]
        test_predictions[i], test_pred_vars[i] = surrogate_gp.predict(
            theta_point)

    # plot the test predictions and data
    plot_prediction_results(samples_c[test_inds].reshape(-1),
                            test_predictions.reshape(-1),
                            test_pred_vars.reshape(-1),
                            name='gp_test_{}_data_res.png'.format(ndim_in))

    # plot a several chosen test prediction as radial dependency
    plot_prediction_results_vectorqoi(
        samples_c[test_inds],
        test_predictions,
        test_pred_vars,
        name='gp_test_{}_data_res_profiles.png'.format(ndim_in))

    # plot prediction against parameter values for testing data
    if len(theta_reod) == 1 and samples_c.shape[1] == 1:
        plot_prediction_results_vspar(
            theta_reod[0][test_inds].reshape(-1),
            samples_c[test_inds],
            test_predictions.reshape(-1),
            test_pred_vars,
            name='gp_theta_test_{}_data_res.png'.format(ndim_in))

    # print the relative test error
    rel_err_test = np.linalg.norm(test_predictions -
                                  samples_c[test_inds]) / np.linalg.norm(
                                      samples_c[test_inds])
    print('Relative error on the test set is %.2f percent' %
          (rel_err_test * 100))

    # plot average predicted variance and R2 score on a test set
    test_pred_var_tot = test_predictions.var()
    print('Variance of predicted result means for the test set %.3f' %
          test_pred_var_tot)
    print('R2 score on testing set: {}'.format(
        surrogate_gp.model.instance.score(
            np.array(theta_reod)[:,
                                 [test_inds]].reshape(n_mc - n_train, ndim_in),
            samples_c[test_inds])))

    # Save simulation and surrogate data to hdf
    data_sim = {}
    data_sim['Te'] = samples_c
    for i, name in enumerate(order_orig):
        data_sim[name] = theta_reod[i]
    campaign.store_data_to_hdf5(data_sim, file_path='sim_data.hdf5')

    predictions = np.zeros([n_mc_l, n_out])
    pred_vars = np.zeros([n_mc_l, n_out])
    for i in range(n_mc_l):
        theta_point = [x[i] for x in theta_reod]
        predictions[i], pred_vars[i] = surrogate_gp.predict(theta_point)

    data_sur = {}
    data_sur['Te'] = predictions
    data_sur['Te_sigmasq'] = pred_vars
    campaign.store_data_to_hdf5(data_sur, file_path='sur_data.hdf5')

    data_frame_sim = campaign.load_hdf5_data(file_path='sim_data.hdf5')
    data_frame_sur = campaign.load_hdf5_data(file_path='sur_data.hdf5')

    samples = data_frame_sim['Te']
    predictions = data_frame_sur['Te']

    # SENSITIVITY ANALYSIS
    if surrogate_gp.backend == 'mogp':
        gp_derivative_based_sa(surrogate_gp,
                               theta_reod[:][test_inds],
                               keys=order_orig)

    # QoI pdfs
    # analyse the QoI (Te(rho=0)) for test set
    te_ax_ts_dat_dom, te_ax_ts_dat_pdf = analysis.get_pdf(
        samples[test_inds][:, 0])
    te_ax_ts_surr_dom, te_ax_ts_surr_pdf = analysis.get_pdf(
        predictions[test_inds][:, 0])
    print('len of test data: {}'.format(samples_c[test_inds][:, 0].shape))

    te_ax_tr_dat_dom, te_ax_tr_dat_pdf = analysis.get_pdf(
        samples_c[train_inds][:, 0])
    te_ax_tr_surr_dom, te_ax_tr_surr_pdf = analysis.get_pdf(
        training_predictions[:, 0])
    print('len of train data: {}'.format(samples_c[train_inds][:, 0].shape))

    te_ax_tt_dat_dom, te_ax_tt_dat_pdf = analysis.get_pdf(samples_c[:][:, 0])
    tot_pred = np.concatenate([training_predictions, test_predictions])
    te_ax_tt_surr_dom, te_ax_tt_surr_pdf = analysis.get_pdf(tot_pred[:, 0])
    print('len of total data: {}'.format(samples_c[:][:, 0].shape))

    analysis.plot_pdfs(te_ax_ts_dat_dom,
                       te_ax_ts_dat_pdf,
                       te_ax_ts_surr_dom,
                       te_ax_ts_surr_pdf,
                       names=[
                           'simulation_test', 'surrogate_test',
                           'simulation_train', 'surrogate_train'
                       ],
                       qoi_names=['Te(r=0)'],
                       filename='pdf_qoi_trts_{}'.format(ndim_in))
    w_d = ws_dist(te_ax_ts_surr_pdf, te_ax_ts_dat_pdf)
    print(
        'Wasserstein distance for distribution of selected QoI produced by simulation and surrogate: {}'
        .format(w_d))

    # plotting errors on a single-case basis, for axial value
    analysis.get_regression_error(
        np.array([
            theta_feat[test_inds].reshape(-1) for theta_feat in theta_reod
        ]).T, samples_c[test_inds],
        np.array([
            theta_feat[train_inds].reshape(-1) for theta_feat in theta_reod
        ]).T, samples_c[train_inds])

    return tot_time, rel_err_test

示例#8

0

显示文件

文件： gp_surrogate_analysis.py 项目： wedeling/EasySurrogate

def main(order_num=None):

    campaign = es.Campaign(load_state=True,
                           file_path='mogp_10_model_1006_full_100.pickle')

    # load the training and testing data from simulations
    data_frame_sim = campaign.load_hdf5_data(file_path='sim_data.hdf5')

    # load the predictions from surrogate
    data_frame_sur = campaign.load_hdf5_data(file_path='sur_data.hdf5')

    # defining name of input features
    feature_names = [
        "Qe_tot", "H0", "Hw", "chi", "Te_bc", "b_pos", "b_height", "b_sol",
        "b_width", "b_slope"
    ]
    # choosing all features
    features = dict((k, data_frame_sim[k]) for k in feature_names)

    # preparing a list of feature to train
    theta = [v for k, v in features.items()]

    # chose target data from the loaded dictionary
    samples = data_frame_sim['Te']

    # chose predicted target from loaded dictionary
    predictions = data_frame_sur['Te']
    pred_vars = data_frame_sur['Te_sigmasq']

    # data size parameters
    ndim_in = campaign.surrogate.n_in
    ndim_out = campaign.surrogate.n_out
    n_mc_l = theta[0].shape[0]

    # getting order of input features, list of numbers corresponding to the feature name list
    if order_num is None:
        order_num = np.arange(ndim_in)

    # getting a subset of input data, take first ndim_in features defined by order_num list
    theta_reod = campaign.surrogate.feat_eng.chose_feat_subset(
        theta, ndim_in, order_num)

    # list of run indices used in training
    train_inds = campaign.surrogate.feat_eng.train_indices.tolist()
    test_inds = campaign.surrogate.feat_eng.test_indices.tolist()
    n_train = campaign.surrogate.feat_eng.n_train

    training_predictions = predictions[train_inds]
    training_pred_vars = pred_vars[train_inds]
    test_predictions = predictions[test_inds]
    test_pred_vars = pred_vars[test_inds]

    ############
    # Analysis #
    ############

    analysis = es.analysis.GP_analysis(campaign.surrogate)

    # plot the train predictions and original data
    analysis.plot_prediction_results(
        samples[train_inds].reshape(-1), training_predictions.reshape(-1),
        training_pred_vars.reshape(-1), 1.0,
        'gp_train_{}_data_res.png'.format(ndim_in))

    rel_err_train = np.linalg.norm(training_predictions -
                                   samples[train_inds]) / np.linalg.norm(
                                       samples[train_inds])
    print('Relative error on the training set is %.2f percent' %
          (rel_err_train * 100))

    # plot prediction against parameter values
    if len(theta_reod) == 1 and samples.shape[1] == 1:
        analysis.plot_prediction_results_vspar(
            theta_reod[0][train_inds].reshape(-1),
            samples[train_inds],
            training_predictions.reshape(-1),
            training_pred_vars.reshape(-1),
            name='gp_theta_train_{}_data_res.png'.format(ndim_in))

    # plot the test predictions and data
    analysis.plot_prediction_results(
        samples[test_inds].reshape(-1),
        test_predictions.reshape(-1),
        test_pred_vars.reshape(-1),
        name='gp_test_{}_data_res.png'.format(ndim_in))

    # plot a several chosen test prediction as radial dependency
    analysis.plot_prediction_results_vectorqoi(
        samples[test_inds],
        test_predictions,
        test_pred_vars,
        name='gp_test_{}_data_res_profiles.png'.format(ndim_in))

    # plot prediction against parameter values for testing data
    if len(theta_reod) == 1 and samples.shape[1] == 1:
        analysis.plot_prediction_results_vspar(
            theta_reod[0][test_inds].reshape(-1),
            samples[test_inds],
            test_predictions.reshape(-1),
            test_pred_vars,
            name='gp_theta_test_{}_data_res.png'.format(ndim_in))

    # print the relative test error
    rel_err_test = np.linalg.norm(test_predictions -
                                  samples[test_inds]) / np.linalg.norm(
                                      samples[test_inds])
    print('Relative error on the test set is %.2f percent' %
          (rel_err_test * 100))

    # plot average predicted variance and R2 score on a test set
    test_pred_var_tot = test_predictions.var()
    print('Variance of predicted result means for the test set %.3f' %
          test_pred_var_tot)
    print('R2 score on testing set: {}'.format(
        campaign.surrogate.model.instance.score(
            np.array(theta_reod)[:,
                                 [test_inds]].reshape(n_mc_l - n_train,
                                                      ndim_in),
            samples[test_inds])))

    ############
    # QoI PDFs #
    ############

    # analyse the QoI (Te(rho=0)) for test set
    te_ax_ts_dat_dom, te_ax_ts_dat_pdf = analysis.get_pdf(
        samples[test_inds][:, 0])
    te_ax_ts_surr_dom, te_ax_ts_surr_pdf = analysis.get_pdf(
        predictions[test_inds][:, 0])
    print('len of test data: {}'.format(samples[test_inds][:, 0].shape))

    te_ax_tr_dat_dom, te_ax_tr_dat_pdf = analysis.get_pdf(
        samples[train_inds][:, 0])
    te_ax_tr_surr_dom, te_ax_tr_surr_pdf = analysis.get_pdf(
        training_predictions[:, 0])
    print('len of train data: {}'.format(samples[train_inds][:, 0].shape))

    te_ax_tt_dat_dom, te_ax_tt_dat_pdf = analysis.get_pdf(samples[:][:, 0])
    tot_pred = np.concatenate([training_predictions, test_predictions])
    te_ax_tt_surr_dom, te_ax_tt_surr_pdf = analysis.get_pdf(tot_pred[:, 0])
    print('len of total data: {}'.format(samples[:][:, 0].shape))

    analysis.plot_pdfs(te_ax_ts_dat_dom,
                       te_ax_ts_dat_pdf,
                       te_ax_ts_surr_dom,
                       te_ax_ts_surr_pdf,
                       names=[
                           'simulation_test', 'surrogate_test',
                           'simulation_train', 'surrogate_train'
                       ],
                       qoi_names=['Te(r=0)'],
                       filename='pdf_qoi_trts_{}'.format(ndim_in))
    w_d = ws_dist(te_ax_ts_surr_pdf, te_ax_ts_dat_pdf)
    print(
        'Wasserstein distance for distribution of selected QoI produced by simulation and surrogate: {}'
        .format(w_d))

    # plotting errors on a single-case basis, for axial value
    analysis.get_regression_error(
        np.array([
            theta_feat[test_inds].reshape(-1) for theta_feat in theta_reod
        ]).T, samples[test_inds],
        np.array([
            theta_feat[train_inds].reshape(-1) for theta_feat in theta_reod
        ]).T, samples[train_inds])

    return rel_err_test

示例#9

0

显示文件

文件： train_surrogate.py 项目： wedeling/EasySurrogate

from itertools import chain
import numpy as np
import easysurrogate as es

# create EasySurrogate campaign
campaign = es.Campaign(load_state=False)

# load HDF5 data frame
data_frame = campaign.load_hdf5_data()

# supervised training data set
features = data_frame['X_data']
target = data_frame['B_data']

# create Kernel Mixture Network surrogate
surrogate = es.methods.KMN_Surrogate()

# create time-lagged features
lags = [range(11)]

# create the KDE anchor points and standard deviations
n_means = 15
n_stds = 3
kernel_means = []
kernel_stds = []

n_softmax = target.shape[1]
for i in range(n_softmax):
    kernel_means.append(
        np.linspace(np.min(target[:, i]), np.max(target[:, i]), n_means))
    kernel_stds.append(np.linspace(0.2, 0.3, n_stds))

示例#10

0

显示文件

文件： retrain_model.py 项目： wedeling/EasySurrogate

import numpy as np
import easysurrogate as es

features_names = ['te_value', 'ti_value', 'te_ddrho', 'ti_ddrho']
target_names = ['te_transp_flux', 'ti_transp_flux']

campaign = es.Campaign(load_state=True, file_path='gem_gp_model.pickle')
data_frame = campaign.load_hdf5_data(file_path='gem_data_625.hdf5')

m, v = campaign.surrogate.predict(campaign.surrogate.X_test[100, :])  # just a test on single sample
print(m, v)

features = [data_frame[k] for k in features_names if k in data_frame]
target = np.concatenate([data_frame[k] for k in target_names if k in data_frame], axis=1)

# try to retrain on max uncertain samples of full feature dataset
campaign.surrogate.train_sequentially(n_iter=20)
campaign.save_state()

m, v = campaign.surrogate.predict(campaign.surrogate.X_test[100, :])  # just a test on single sample
print(m, v)

示例#11

0

显示文件

#data_frame_train = campaign.load_hdf5_data(file_path='gem_data_625.hdf5')

# 2) case for data from a MFW production run
#campaign = es.Campaign(load_state=True, file_path='skl_gem_500_wf_1405_opt.pickle')
#data_frame = campaign.load_hdf5_data(file_path='gem_workflow_500.hdf5')
#data_frame_train = campaign.load_hdf5_data(file_path='gem_workflow_500.hdf5')

# 3) case for data generated from single flux tube GEM0 with 4 parameters (LHD, with a wrapper)
#campaign = es.Campaign(load_state=True, file_path='gem0_lhc.pickle')
#data_frame = campaign.load_hdf5_data(file_path='gem_workflow_500.hdf5')
#data_frame_train = campaign.load_hdf5_data(file_path='gem_workflow_500.hdf5')

# 4) case for from single flux tube GEM0 with 2 parameters (LHD, with a wrapper)
features_names_selected = [features_names[2], features_names[3]]
target_name_selected = [target_names[1]]
campaign = es.Campaign(
    load_state=True, file_path='gp_model_10pperctrset_plus1oseqsamples.pickle')
data_frame = campaign.load_hdf5_data(file_path='gem0_lhc_256.hdf5')
data_frame_train = campaign.load_hdf5_data(file_path='gem0_lhc_256.hdf5')

# getting the data
features_train = [
    data_frame_train[k] for k in features_names_selected
    if k in data_frame_train
]
target_train = np.concatenate([
    data_frame_train[k] for k in target_name_selected if k in data_frame_train
],
                              axis=1)

feat_train, targ_train, feat_test, targ_test = campaign.surrogate.feat_eng.\
    get_training_data(features_train, target_train, index=campaign.surrogate.feat_eng.train_indices)

示例#12

0

显示文件

文件： coupling_example.py 项目： wedeling/EasySurrogate

# Associate the sampler with the campaign
campaign.set_sampler(my_sampler)

# Execute runs
campaign.execute().collate()

# get the EasyVVUQ data frame
data_frame = campaign.get_collation_result()

##############################
# EasySurrogate ANN campaign #
##############################

# Create an EasySurrogate campaign
surr_campaign = es.Campaign()

# This is the main point of this test: extract training data from EasyVVUQ data frame
features, samples = surr_campaign.load_easyvvuq_data(campaign, qoi_cols='f')

# Create artificial neural network surrogate
surrogate = es.methods.ANN_Surrogate()

# Number of training iterations (number of mini batches)
N_ITER = 10000

# The latter fraction of the data to be kept apart for testing
TEST_FRAC = 0.3

# Train the ANN
surrogate.train(features,

示例#13

0

显示文件

文件： train_DAS_surrogate.py 项目： wedeling/EasySurrogate

ID = 'func'
DB_LOCATION = "sqlite:///" + WORK_DIR + "/campaign%s.db" % ID

# reload easyvvuq campaign
campaign = uq.Campaign(name=ID, db_location=DB_LOCATION)
print("===========================================")
print("Reloaded campaign {}".format(ID))
print("===========================================")
sampler = campaign.get_active_sampler()
campaign.set_sampler(sampler, update=True)

# name of the quantity of interest, the column name of the output file
output_columns = ["f"]

# create an EasySurrogate campaign
surr_campaign = es.Campaign(name=ID)

# load the training data
params, samples = surr_campaign.load_easyvvuq_data(campaign,
                                                   qoi_cols=output_columns)
samples = samples[output_columns[0]]

# input dimension (15)
D = params.shape[1]
# assumed dimension active subspace
d = 2

# create DAS surrogate object
surrogate = es.methods.DAS_Surrogate()

# train the DAS surrogate