def ann_surrogate_test(samples_c, n_train): campaign = es.Campaign() # create a vanilla ANN surrogate surrogate_ann = es.methods.ANN_Surrogate() # -- If not using all the grid points # samples_c = samples_c[:, 0].reshape(-1, 1) # axial # samples_c = samples_c[:, np.arange(0, 100, 5).tolist()] # sparse # number of output neurons n_out = samples_c.shape[1] # train the surrogate on the data n_iter = 20000 surrogate_ann.train(theta, samples_c, n_iter, test_frac=test_frac, n_layers=2, n_neurons=1) campaign.add_app(name='ann_campaign', surrogate=surrogate_ann) campaign.save_state() # -- If loading a pretrained surrogate # campaign = es.Campaign(load_state=True) # surrogate = campaign.surrogate # evaluate the surrogate on the test data test_predictions = np.zeros([n_mc - n_train, n_out]) for count, i in enumerate(range(n_train, n_mc)): theta_point = [x[i] for x in theta] test_predictions[count] = surrogate_ann.predict(theta_point) # plot the test predictions and data fig = plt.figure() ax = fig.add_subplot(111) ax.plot(test_predictions.T, 'b') ax.plot(samples[n_train:].T, 'r+') plt.tight_layout() plt.savefig('ann_ax_test_data_res.png') # -- If plotting around y=x for a scalar QoI # plot_prediction_results(samples_c[I:], test_predictions) # print the relative test error rel_err_test = np.linalg.norm(test_predictions - samples_c[n_train:] ) / np.linalg.norm(test_predictions) print('Relative error on the test set is %.2f percent' % (rel_err_test * 100)) # perfrom derivative based surrogate ann_derivative_based_sa(surrogate_ann, order_orig)
def main(): campaign = es.Campaign() feat_eng = es.methods.Feature_Engineering() # reading input and output data of simulations theta = pd.read_pickle('inputs_2704.pickle').to_numpy()[:, :] theta = theta.T.tolist() theta = [np.array(x).reshape(-1, 1) for x in theta] samples = pd.read_pickle('outputs_2704.pickle').to_numpy()[:, :] # Choosing the X, Y subsets to analyse ndim_in = 10 n_mc = 500 samples_c = samples[:n_mc] theta_c = [t[:n_mc] for t in theta] # Defining the current ordering of parameters by sensitivity name_order_orig = [ "Qe_tot", "H0", "Hw", "chi", "Te_bc", "b_pos", "b_height", "b_sol", "b_width", "b_slope"] # -- If we change order of the features by their importance # order = get_sa_order(order_orig, order_sa_ann) order_num = np.arange(len(name_order_orig)) theta_reod = feat_eng.chose_feat_subset(theta_c, ndim_in, order_num) # save reference data as hdf data_sim = {} data_sim['Te'] = samples_c for i, name in enumerate(name_order_orig): data_sim[name] = theta_reod[i] campaign.store_data_to_hdf5(data_sim, file_path='sim_data.hdf5')
def main(order_num=None): campaign = es.Campaign(load_state=True, file_path='mogp_10_model_1006_full_100.pickle') # load the training and testing data from simulations data_frame = campaign.load_hdf5_data(file_path='sim_data.hdf5') # defining name of input features feature_names = [ "Qe_tot", "H0", "Hw", "chi", "Te_bc", "b_pos", "b_height", "b_sol", "b_width", "b_slope" ] # choosing all features features = dict((k, data_frame[k]) for k in feature_names) # preparing a list of feature to train theta = [v for k, v in features.items()] ndim_in = campaign.surrogate.n_in ndim_out = campaign.surrogate.n_out n_mc_l = theta[0].shape[0] # getting order of input features, list of numbers corresponding to the feature name list if order_num is None: order_num = np.arange(ndim_in) # getting a subset of input data, take first ndim_in features defined by order_num list theta_reod = campaign.surrogate.feat_eng.chose_feat_subset( theta, ndim_in, order_num) # Total dataset predictions = np.zeros([n_mc_l, ndim_out]) pred_vars = np.zeros([n_mc_l, ndim_out]) for i in range(n_mc_l): theta_point = [x[i] for x in theta_reod] predictions[i], pred_vars[i] = campaign.surrogate.predict(theta_point) data_sur = {} data_sur['Te'] = predictions data_sur['Te_sigmasq'] = pred_vars campaign.store_data_to_hdf5(data_sur, file_path='sur_data.hdf5')
from itertools import chain import numpy as np import easysurrogate as es # create EasySurrogate campaign campaign = es.Campaign() # load HDF5 data frame data_frame = campaign.load_hdf5_data() # supervised training data set features = data_frame['X_data'] target = data_frame['B_data'] # create Quantized Softmax Network surrogate surrogate = es.methods.QSN_Surrogate() # create time-lagged features lags = [[1, 10]] # train the surrogate on the data n_iter = 20000 surrogate.train([features], target, n_iter, lags=lags, n_layers=4, n_neurons=256, batch_size=512) campaign.add_app(name='test_campaign', surrogate=surrogate)
def gp_train(order_num=None, ndim_in=None, ndim_out=None, n_train=None, test_frac=0.0): # creat e campaign object campaign = es.Campaign() # load th training and testing data from simulations data_frame = campaign.load_hdf5_data() # chose target data from the loaded dictionary target = data_frame['Te'] # -- If chosing data not on every grid point # samples = samples_c[:, 0].reshape(-1, 1) # samples = samples_c[:, np.arange(0, 100, 20).tolist()] # defining name of input features feature_names = [ "Qe_tot", "H0", "Hw", "chi", "Te_bc", "b_pos", "b_height", "b_sol", "b_width", "b_slope" ] # choosing all features features = dict((k, data_frame[k]) for k in feature_names) # preparing a list of feature to train features = [v for k, v in features.items()] # number of samples n_mc_l = target.shape[0] # defining input dimensionality if ndim_in is None: if isinstance(features, list): #ndim_in = theta[0].shape[0] ndim_in = len(features) elif isinstance(features, np.ndarray): ndim_in = features.shape[1] else: ndim_in = 1 else: ndim_in = len(features) # defining output dimensionality if ndim_out is None: ndim_out = target.shape[1] # getting order of input features, list of numbers corresponding to the feature name list if order_num is None: order_num = np.arange(ndim_in) # create a surrogate object surrogate = es.methods.GP_Surrogate(n_in=ndim_in, n_out=ndim_out, backend='scikit-learn') # getting a subset of input data, take first ndim_in features defined by order_num list features = surrogate.feat_eng.chose_feat_subset(features, ndim_in, order_num) # train a surrogate model day = date.today().strftime('%d%m') st_time = time.time() surrogate.train(features, target, test_frac=test_frac, basekernel='Matern', noize='fit') tot_time = time.time() - st_time print('Time to train a GP surrogate {:.3}'.format(tot_time)) # print parameters of resulting surrogate surrogate.model.print_model_info() # save the app and the surrogate campaign.add_app(name='gp_campaign', surrogate=surrogate) campaign.save_state( file_path='mogp_{}_model_{}_full_100.pickle'.format(ndim_in, day)) return tot_time
import os import numpy as np import matplotlib.pyplot as plt import easysurrogate as es home = os.path.abspath(os.path.dirname(__file__)) # load the campaign campaign = es.Campaign(load_state=True) # load the training data (from lorenz96.py) data_frame_ref = campaign.load_hdf5_data() # load the data from lorenz96_qsn.py here data_frame_qsn = campaign.load_hdf5_data() # load reference data X_ref = data_frame_ref['X_data'] B_ref = data_frame_ref['B_data'] # load data of QSN surrogate X_qsn = data_frame_qsn['X_data'] B_qsn = data_frame_qsn['B_data'] # create QSN analysis object analysis = es.analysis.QSN_analysis(campaign.surrogate) ############# # Plot PDFs # ############# start_idx = 0 fig = plt.figure(figsize=[8, 4])
def gp_surrogate_test(order=None, ndim_in=None, ndim_out=None, n_train=None): # TRAINING PHASE campaign = es.Campaign() surrogate_gp = es.methods.GP_Surrogate(n_in=ndim_in, n_out=ndim_out, backend='scikit-learn') # -- If chosing data not on every grid point # samples_c= samples_c[:, 0].reshape(-1, 1) # samples_c = samples_c[:, np.arange(0, 100, 20).tolist()] n_out = samples_c.shape[1] n_mc_l = samples_c.shape[0] if ndim_in is None: if isinstance(theta_c, list): ndim_in = theta_c[0].shape[0] elif isinstance(theta_c, np.ndarray): ndim_in = theta_c.shape[1] if order is None: order = np.arange(ndim_in) theta_reod = chose_feat_subset(theta_c, ndim_in, order) day = date.today().strftime('%d%m') st_time = time.time() surrogate_gp.train(theta_reod, samples_c, test_frac=test_frac, basekernel='Matern', noize='fit') tot_time = time.time() - st_time print('Time to train a GP surrogate {:.3}'.format(tot_time)) campaign.add_app(name='gp_campaign', surrogate=surrogate_gp) campaign.save_state( file_path='mogp_{}_model_{}_full_100.pickle'.format(ndim_in, day)) # -- If loading a pretrained surrogate # campaign = es.Campaign(load_state=True, file_path='skl_1_model_2505_full_100.pickle') # surrogate_gp = campaign.surrogate # TRYING SEQUENTIAL OPTIMISATION - passes, but resulting model worsesns in its performance # surrogate_gp.train_sequentially(theta_reod, samples_axial, n_iter=10, acquisition_function='poi') # print('Indices of runs used for training: {}'.format(surrogate_gp.feat_eng.train_indices)) ##### surrogate_gp.model.print_model_info() # list of run indices used in training train_inds = surrogate_gp.feat_eng.train_indices.tolist() test_inds = surrogate_gp.feat_eng.test_indices.tolist() n_train = surrogate_gp.feat_eng.n_train # length of training data set # ANALYSIS PHASE analysis = es.analysis.GP_analysis(surrogate_gp) # Training set # evaluate the surrogate on the training data training_predictions = np.zeros([n_train, n_out]) training_pred_vars = np.zeros([n_train, n_out]) for i, j in enumerate(train_inds): theta_point = [x[j] for x in theta_reod] training_predictions[i], training_pred_vars[i] = surrogate_gp.predict( theta_point) # plot the train predictions and original data plot_prediction_results(samples_c[train_inds].reshape(-1), training_predictions.reshape(-1), training_pred_vars.reshape(-1), 1.0, 'gp_train_{}_data_res.png'.format(ndim_in)) rel_err_train = np.linalg.norm(training_predictions - samples_c[train_inds]) / np.linalg.norm( samples_c[train_inds]) print('Relative error on the training set is %.2f percent' % (rel_err_train * 100)) # plot prediction against parameter values if len(theta_reod) == 1 and samples_c.shape[1] == 1: plot_prediction_results_vspar( theta_reod[0][train_inds].reshape(-1), samples_c[train_inds], training_predictions.reshape(-1), training_pred_vars.reshape(-1), name='gp_theta_train_{}_data_res.png'.format(ndim_in)) # Testing set # evaluate on testing data test_predictions = np.zeros([n_mc_l - n_train, n_out]) test_pred_vars = np.zeros([n_mc_l - n_train, n_out]) for i, j in enumerate(test_inds): theta_point = [x[j] for x in theta_reod] test_predictions[i], test_pred_vars[i] = surrogate_gp.predict( theta_point) # plot the test predictions and data plot_prediction_results(samples_c[test_inds].reshape(-1), test_predictions.reshape(-1), test_pred_vars.reshape(-1), name='gp_test_{}_data_res.png'.format(ndim_in)) # plot a several chosen test prediction as radial dependency plot_prediction_results_vectorqoi( samples_c[test_inds], test_predictions, test_pred_vars, name='gp_test_{}_data_res_profiles.png'.format(ndim_in)) # plot prediction against parameter values for testing data if len(theta_reod) == 1 and samples_c.shape[1] == 1: plot_prediction_results_vspar( theta_reod[0][test_inds].reshape(-1), samples_c[test_inds], test_predictions.reshape(-1), test_pred_vars, name='gp_theta_test_{}_data_res.png'.format(ndim_in)) # print the relative test error rel_err_test = np.linalg.norm(test_predictions - samples_c[test_inds]) / np.linalg.norm( samples_c[test_inds]) print('Relative error on the test set is %.2f percent' % (rel_err_test * 100)) # plot average predicted variance and R2 score on a test set test_pred_var_tot = test_predictions.var() print('Variance of predicted result means for the test set %.3f' % test_pred_var_tot) print('R2 score on testing set: {}'.format( surrogate_gp.model.instance.score( np.array(theta_reod)[:, [test_inds]].reshape(n_mc - n_train, ndim_in), samples_c[test_inds]))) # Save simulation and surrogate data to hdf data_sim = {} data_sim['Te'] = samples_c for i, name in enumerate(order_orig): data_sim[name] = theta_reod[i] campaign.store_data_to_hdf5(data_sim, file_path='sim_data.hdf5') predictions = np.zeros([n_mc_l, n_out]) pred_vars = np.zeros([n_mc_l, n_out]) for i in range(n_mc_l): theta_point = [x[i] for x in theta_reod] predictions[i], pred_vars[i] = surrogate_gp.predict(theta_point) data_sur = {} data_sur['Te'] = predictions data_sur['Te_sigmasq'] = pred_vars campaign.store_data_to_hdf5(data_sur, file_path='sur_data.hdf5') data_frame_sim = campaign.load_hdf5_data(file_path='sim_data.hdf5') data_frame_sur = campaign.load_hdf5_data(file_path='sur_data.hdf5') samples = data_frame_sim['Te'] predictions = data_frame_sur['Te'] # SENSITIVITY ANALYSIS if surrogate_gp.backend == 'mogp': gp_derivative_based_sa(surrogate_gp, theta_reod[:][test_inds], keys=order_orig) # QoI pdfs # analyse the QoI (Te(rho=0)) for test set te_ax_ts_dat_dom, te_ax_ts_dat_pdf = analysis.get_pdf( samples[test_inds][:, 0]) te_ax_ts_surr_dom, te_ax_ts_surr_pdf = analysis.get_pdf( predictions[test_inds][:, 0]) print('len of test data: {}'.format(samples_c[test_inds][:, 0].shape)) te_ax_tr_dat_dom, te_ax_tr_dat_pdf = analysis.get_pdf( samples_c[train_inds][:, 0]) te_ax_tr_surr_dom, te_ax_tr_surr_pdf = analysis.get_pdf( training_predictions[:, 0]) print('len of train data: {}'.format(samples_c[train_inds][:, 0].shape)) te_ax_tt_dat_dom, te_ax_tt_dat_pdf = analysis.get_pdf(samples_c[:][:, 0]) tot_pred = np.concatenate([training_predictions, test_predictions]) te_ax_tt_surr_dom, te_ax_tt_surr_pdf = analysis.get_pdf(tot_pred[:, 0]) print('len of total data: {}'.format(samples_c[:][:, 0].shape)) analysis.plot_pdfs(te_ax_ts_dat_dom, te_ax_ts_dat_pdf, te_ax_ts_surr_dom, te_ax_ts_surr_pdf, names=[ 'simulation_test', 'surrogate_test', 'simulation_train', 'surrogate_train' ], qoi_names=['Te(r=0)'], filename='pdf_qoi_trts_{}'.format(ndim_in)) w_d = ws_dist(te_ax_ts_surr_pdf, te_ax_ts_dat_pdf) print( 'Wasserstein distance for distribution of selected QoI produced by simulation and surrogate: {}' .format(w_d)) # plotting errors on a single-case basis, for axial value analysis.get_regression_error( np.array([ theta_feat[test_inds].reshape(-1) for theta_feat in theta_reod ]).T, samples_c[test_inds], np.array([ theta_feat[train_inds].reshape(-1) for theta_feat in theta_reod ]).T, samples_c[train_inds]) return tot_time, rel_err_test
def main(order_num=None): campaign = es.Campaign(load_state=True, file_path='mogp_10_model_1006_full_100.pickle') # load the training and testing data from simulations data_frame_sim = campaign.load_hdf5_data(file_path='sim_data.hdf5') # load the predictions from surrogate data_frame_sur = campaign.load_hdf5_data(file_path='sur_data.hdf5') # defining name of input features feature_names = [ "Qe_tot", "H0", "Hw", "chi", "Te_bc", "b_pos", "b_height", "b_sol", "b_width", "b_slope" ] # choosing all features features = dict((k, data_frame_sim[k]) for k in feature_names) # preparing a list of feature to train theta = [v for k, v in features.items()] # chose target data from the loaded dictionary samples = data_frame_sim['Te'] # chose predicted target from loaded dictionary predictions = data_frame_sur['Te'] pred_vars = data_frame_sur['Te_sigmasq'] # data size parameters ndim_in = campaign.surrogate.n_in ndim_out = campaign.surrogate.n_out n_mc_l = theta[0].shape[0] # getting order of input features, list of numbers corresponding to the feature name list if order_num is None: order_num = np.arange(ndim_in) # getting a subset of input data, take first ndim_in features defined by order_num list theta_reod = campaign.surrogate.feat_eng.chose_feat_subset( theta, ndim_in, order_num) # list of run indices used in training train_inds = campaign.surrogate.feat_eng.train_indices.tolist() test_inds = campaign.surrogate.feat_eng.test_indices.tolist() n_train = campaign.surrogate.feat_eng.n_train training_predictions = predictions[train_inds] training_pred_vars = pred_vars[train_inds] test_predictions = predictions[test_inds] test_pred_vars = pred_vars[test_inds] ############ # Analysis # ############ analysis = es.analysis.GP_analysis(campaign.surrogate) # plot the train predictions and original data analysis.plot_prediction_results( samples[train_inds].reshape(-1), training_predictions.reshape(-1), training_pred_vars.reshape(-1), 1.0, 'gp_train_{}_data_res.png'.format(ndim_in)) rel_err_train = np.linalg.norm(training_predictions - samples[train_inds]) / np.linalg.norm( samples[train_inds]) print('Relative error on the training set is %.2f percent' % (rel_err_train * 100)) # plot prediction against parameter values if len(theta_reod) == 1 and samples.shape[1] == 1: analysis.plot_prediction_results_vspar( theta_reod[0][train_inds].reshape(-1), samples[train_inds], training_predictions.reshape(-1), training_pred_vars.reshape(-1), name='gp_theta_train_{}_data_res.png'.format(ndim_in)) # plot the test predictions and data analysis.plot_prediction_results( samples[test_inds].reshape(-1), test_predictions.reshape(-1), test_pred_vars.reshape(-1), name='gp_test_{}_data_res.png'.format(ndim_in)) # plot a several chosen test prediction as radial dependency analysis.plot_prediction_results_vectorqoi( samples[test_inds], test_predictions, test_pred_vars, name='gp_test_{}_data_res_profiles.png'.format(ndim_in)) # plot prediction against parameter values for testing data if len(theta_reod) == 1 and samples.shape[1] == 1: analysis.plot_prediction_results_vspar( theta_reod[0][test_inds].reshape(-1), samples[test_inds], test_predictions.reshape(-1), test_pred_vars, name='gp_theta_test_{}_data_res.png'.format(ndim_in)) # print the relative test error rel_err_test = np.linalg.norm(test_predictions - samples[test_inds]) / np.linalg.norm( samples[test_inds]) print('Relative error on the test set is %.2f percent' % (rel_err_test * 100)) # plot average predicted variance and R2 score on a test set test_pred_var_tot = test_predictions.var() print('Variance of predicted result means for the test set %.3f' % test_pred_var_tot) print('R2 score on testing set: {}'.format( campaign.surrogate.model.instance.score( np.array(theta_reod)[:, [test_inds]].reshape(n_mc_l - n_train, ndim_in), samples[test_inds]))) ############ # QoI PDFs # ############ # analyse the QoI (Te(rho=0)) for test set te_ax_ts_dat_dom, te_ax_ts_dat_pdf = analysis.get_pdf( samples[test_inds][:, 0]) te_ax_ts_surr_dom, te_ax_ts_surr_pdf = analysis.get_pdf( predictions[test_inds][:, 0]) print('len of test data: {}'.format(samples[test_inds][:, 0].shape)) te_ax_tr_dat_dom, te_ax_tr_dat_pdf = analysis.get_pdf( samples[train_inds][:, 0]) te_ax_tr_surr_dom, te_ax_tr_surr_pdf = analysis.get_pdf( training_predictions[:, 0]) print('len of train data: {}'.format(samples[train_inds][:, 0].shape)) te_ax_tt_dat_dom, te_ax_tt_dat_pdf = analysis.get_pdf(samples[:][:, 0]) tot_pred = np.concatenate([training_predictions, test_predictions]) te_ax_tt_surr_dom, te_ax_tt_surr_pdf = analysis.get_pdf(tot_pred[:, 0]) print('len of total data: {}'.format(samples[:][:, 0].shape)) analysis.plot_pdfs(te_ax_ts_dat_dom, te_ax_ts_dat_pdf, te_ax_ts_surr_dom, te_ax_ts_surr_pdf, names=[ 'simulation_test', 'surrogate_test', 'simulation_train', 'surrogate_train' ], qoi_names=['Te(r=0)'], filename='pdf_qoi_trts_{}'.format(ndim_in)) w_d = ws_dist(te_ax_ts_surr_pdf, te_ax_ts_dat_pdf) print( 'Wasserstein distance for distribution of selected QoI produced by simulation and surrogate: {}' .format(w_d)) # plotting errors on a single-case basis, for axial value analysis.get_regression_error( np.array([ theta_feat[test_inds].reshape(-1) for theta_feat in theta_reod ]).T, samples[test_inds], np.array([ theta_feat[train_inds].reshape(-1) for theta_feat in theta_reod ]).T, samples[train_inds]) return rel_err_test
from itertools import chain import numpy as np import easysurrogate as es # create EasySurrogate campaign campaign = es.Campaign(load_state=False) # load HDF5 data frame data_frame = campaign.load_hdf5_data() # supervised training data set features = data_frame['X_data'] target = data_frame['B_data'] # create Kernel Mixture Network surrogate surrogate = es.methods.KMN_Surrogate() # create time-lagged features lags = [range(11)] # create the KDE anchor points and standard deviations n_means = 15 n_stds = 3 kernel_means = [] kernel_stds = [] n_softmax = target.shape[1] for i in range(n_softmax): kernel_means.append( np.linspace(np.min(target[:, i]), np.max(target[:, i]), n_means)) kernel_stds.append(np.linspace(0.2, 0.3, n_stds))
import numpy as np import easysurrogate as es features_names = ['te_value', 'ti_value', 'te_ddrho', 'ti_ddrho'] target_names = ['te_transp_flux', 'ti_transp_flux'] campaign = es.Campaign(load_state=True, file_path='gem_gp_model.pickle') data_frame = campaign.load_hdf5_data(file_path='gem_data_625.hdf5') m, v = campaign.surrogate.predict(campaign.surrogate.X_test[100, :]) # just a test on single sample print(m, v) features = [data_frame[k] for k in features_names if k in data_frame] target = np.concatenate([data_frame[k] for k in target_names if k in data_frame], axis=1) # try to retrain on max uncertain samples of full feature dataset campaign.surrogate.train_sequentially(n_iter=20) campaign.save_state() m, v = campaign.surrogate.predict(campaign.surrogate.X_test[100, :]) # just a test on single sample print(m, v)
#data_frame_train = campaign.load_hdf5_data(file_path='gem_data_625.hdf5') # 2) case for data from a MFW production run #campaign = es.Campaign(load_state=True, file_path='skl_gem_500_wf_1405_opt.pickle') #data_frame = campaign.load_hdf5_data(file_path='gem_workflow_500.hdf5') #data_frame_train = campaign.load_hdf5_data(file_path='gem_workflow_500.hdf5') # 3) case for data generated from single flux tube GEM0 with 4 parameters (LHD, with a wrapper) #campaign = es.Campaign(load_state=True, file_path='gem0_lhc.pickle') #data_frame = campaign.load_hdf5_data(file_path='gem_workflow_500.hdf5') #data_frame_train = campaign.load_hdf5_data(file_path='gem_workflow_500.hdf5') # 4) case for from single flux tube GEM0 with 2 parameters (LHD, with a wrapper) features_names_selected = [features_names[2], features_names[3]] target_name_selected = [target_names[1]] campaign = es.Campaign( load_state=True, file_path='gp_model_10pperctrset_plus1oseqsamples.pickle') data_frame = campaign.load_hdf5_data(file_path='gem0_lhc_256.hdf5') data_frame_train = campaign.load_hdf5_data(file_path='gem0_lhc_256.hdf5') # getting the data features_train = [ data_frame_train[k] for k in features_names_selected if k in data_frame_train ] target_train = np.concatenate([ data_frame_train[k] for k in target_name_selected if k in data_frame_train ], axis=1) feat_train, targ_train, feat_test, targ_test = campaign.surrogate.feat_eng.\ get_training_data(features_train, target_train, index=campaign.surrogate.feat_eng.train_indices)
# Associate the sampler with the campaign campaign.set_sampler(my_sampler) # Execute runs campaign.execute().collate() # get the EasyVVUQ data frame data_frame = campaign.get_collation_result() ############################## # EasySurrogate ANN campaign # ############################## # Create an EasySurrogate campaign surr_campaign = es.Campaign() # This is the main point of this test: extract training data from EasyVVUQ data frame features, samples = surr_campaign.load_easyvvuq_data(campaign, qoi_cols='f') # Create artificial neural network surrogate surrogate = es.methods.ANN_Surrogate() # Number of training iterations (number of mini batches) N_ITER = 10000 # The latter fraction of the data to be kept apart for testing TEST_FRAC = 0.3 # Train the ANN surrogate.train(features,
ID = 'func' DB_LOCATION = "sqlite:///" + WORK_DIR + "/campaign%s.db" % ID # reload easyvvuq campaign campaign = uq.Campaign(name=ID, db_location=DB_LOCATION) print("===========================================") print("Reloaded campaign {}".format(ID)) print("===========================================") sampler = campaign.get_active_sampler() campaign.set_sampler(sampler, update=True) # name of the quantity of interest, the column name of the output file output_columns = ["f"] # create an EasySurrogate campaign surr_campaign = es.Campaign(name=ID) # load the training data params, samples = surr_campaign.load_easyvvuq_data(campaign, qoi_cols=output_columns) samples = samples[output_columns[0]] # input dimension (15) D = params.shape[1] # assumed dimension active subspace d = 2 # create DAS surrogate object surrogate = es.methods.DAS_Surrogate() # train the DAS surrogate