Python get_init_data示例，utilities.utilities.get_init_data Python示例

示例#1

0

显示文件

文件： hyper_learning_for_hmc.py 项目： rubinxin/HMC_BNN

    def __init__(self, task='yacht', n_units=50, activation='tanh', seed=0):

        # ------------ saving paths created ------------
        self.saving_model_path = None
        # ------------ generate train and test data ------------
        np.random.seed(seed)
        # ------------ generate data ------------
        if task in ['yacht', 'concrete', 'wine-quality-red', 'bostonHousing']:
            X_train, y_train, X_test, y_test = load_uci_data(
                data_directory=f'exps_tasks/datasets/{task}',
                split_number=seed)
        else:
            maths_f, x_bounds, _, true_fmin = get_function(task)
            n_train = 5000
            n_test = 2000

            X_train, y_train = get_init_data(obj_func=maths_f,
                                             noise_var=1e-6,
                                             n_init=n_train,
                                             bounds=x_bounds)
            X_test, y_test = get_init_data(obj_func=maths_f,
                                           noise_var=1e-6,
                                           n_init=n_test,
                                           bounds=x_bounds)
        # X_train, y_train, X_test, y_test = load_uci_data(data_directory=f'./datasets/{task}', split_number=seed)

        self.X_train = X_train
        self.y_train = y_train.flatten()
        self.X_test = X_test
        self.y_test = y_test.flatten()

        # ------------  bnn hyperparameters ------------
        self.num_samples = 300
        self.keep_every = 3
        self.activation = activation
        self.seed = seed
        # bnds for hyperparameters to be tuned
        self.true_bnds = np.array([
            [2, 50.0],  # num_steps_per_sample
            [100, 5000],  # tau_out
            [1e-3, 1e-1],  # length_scale
            [1e-4, 5e-2]
        ]  # step_size
                                  )
        self.d = len(self.true_bnds)

示例#2

0

显示文件

文件： test_gp.py 项目： rubinxin/BNN_BO

def test_gp():
    #  Specify the objective function and parameters (noise variance, input dimension, initial observation
    np.random.seed(3)
    x_ob, y_ob = get_init_data(obj_func=f,
                               noise_var=var_noise,
                               n_init=n_init,
                               bounds=x_bounds)

    # ------ Test grid -------- #
    if d == 2:
        x1, x2 = np.mgrid[-1:1:50j, -1:1:50j]
        X = np.vstack((x1.flatten(), x2.flatten())).T
        y = f(X)
    else:
        X = np.linspace(-1, 1, 100)[:, None]
        y = f(X)

    # -- GP model ----#
    # kern = GPy.kern.Matern52(2, variance=1., ARD=True)
    GP = GPModel(exact_feval=True, ARD=True)
    GP._update_model(x_ob, y_ob)
    m, s = GP.predict(X)

    # # ------ Plot figures -------- #
    if d == 2:
        figure, axes = plt.subplots(2, 1, figsize=(10, 10))
        sub1 = axes[0].contourf(x1, x2, y.reshape(50, 50))
        axes[0].plot(x_ob[:, 0], x_ob[:, 1], 'rx')
        axes[0].set_title('objective func ')

        sub2 = axes[1].contourf(x1, x2, m.reshape(50, 50))
        axes[1].plot(x_ob[:, 0], x_ob[:, 1], 'rx')
        pred_title = f'prediction by GP'
        axes[1].set_title(pred_title)
        plt.show()
    else:
        figure, axes = plt.subplots(1, 1, figsize=(10, 10))
        axes.plot(x_ob, y_ob, 'rx')
        axes.plot(X, y, 'r--')
        axes.plot(X, m, 'b')
        axes.fill_between(X.flatten(), (m - s).flatten(), (m + s).flatten(),
                          color='blue',
                          alpha=0.30)
        axes.set_title('1D Regression')
        plt.show()

    # ------ Save figures -------- #
    saving_path = 'data/syntheticFns/'
    if not os.path.exists(saving_path):
        os.makedirs(saving_path)
    fig_name = func_name + f'_gp.png'
    print(fig_name)

示例#3

0

显示文件

文件： test_compare_gp_dropoutnet_keras.py 项目： rubinxin/BNN_BO

def test_compare_gp_with_dropnet():
    #  Specify the objective function and parameters (noise variance, input dimension, initial observation
    np.random.seed(3)
    x_ob, y_ob = get_init_data(obj_func=f, noise_var=var_noise, n_init =n_init, bounds=x_bounds)


    # ------ Test grid -------- #
    if d == 2:
        x1, x2 = np.mgrid[-1:1:50j, -1:1:50j]
        X = np.vstack((x1.flatten(), x2.flatten())).T
        y = f(X)
        # Dropout NN Configuration
        dropout = 0.05
        T = 100
        tau = 1.0
        bs = 10
        tbs = 50
        n_hidden = [50, 50, 50]

    else:
        X = np.linspace(-1,1,100)[:,None]
        y = f(X)

        # Dropout NN Configuration
        dropout = 0.05
        T = 100
        tau = 20
        bs = 10
        tbs = 50
        n_hidden = [50, 50, 50]
        # n_hidden = [100]

    # -- GP model ----#
    # kern = GPy.kern.Matern52(2, variance=1., ARD=True)
    GP = GPModel(exact_feval=True,ARD=True)
    GP._update_model(x_ob, y_ob)
    m_gp, s_gp = GP.predict(X)

    # -- DropoutNet model ----#


    DropNet = DropoutNet(n_epochs=40, n_hidden=n_hidden, dropout=dropout, T=T, tau=tau, batch_size=bs)
    DropNet._update_model(x_ob, y_ob)
    m_drop, s_drop = DropNet.predict(X, test_batch_size=tbs)

    # # ------ Plot figures -------- #
    if d == 2:
        figure, axes = plt.subplots(3, 1, figsize=(6, 18))
        sub1 = axes[0].contourf(x1, x2, y.reshape(50, 50))
        axes[0].plot(x_ob[:,0], x_ob[:,1],'rx')
        axes[0].set_title('objective func ')

        sub2 = axes[1].contourf(x1, x2, m_gp.reshape(50, 50))
        axes[1].plot(x_ob[:,0], x_ob[:,1],'rx')
        gp_title=f'prediction by GP'
        axes[1].set_title(gp_title)

        sub2 = axes[2].contourf(x1, x2, m_drop.reshape(50, 50))
        axes[2].plot(x_ob[:,0], x_ob[:,1],'rx')
        dropnet_title=f'prediction by NN:dropout={dropout},T={T},tau={tau},BS={bs},TBS={tbs}'
        axes[2].set_title(dropnet_title)

        plt.show()
    else:
        figure, axes = plt.subplots(2, 1, figsize=(10, 8))
        axes[0].plot(x_ob,y_ob, 'rx')
        axes[0].plot(X, y, 'r--')
        axes[0].plot(X, m_gp, 'b')
        axes[0].fill_between(X.flatten(), (m_gp - s_gp).flatten(), (m_gp + s_gp).flatten(), color='blue', alpha=0.30)
        axes[0].set_title('1D GP Regression')

        axes[1].plot(x_ob,y_ob, 'rx')
        axes[1].plot(X, y, 'r--')
        axes[1].plot(X, m_drop, 'b')
        axes[1].fill_between(X.flatten(), (m_drop - s_drop).flatten(), (m_drop + s_drop).flatten(), color='blue', alpha=0.30)
        dropnet_title=f'prediction by NN:dropout={dropout},T={T},tau={tau},BS={bs},TBS={tbs}'
        axes[1].set_title(dropnet_title)
        plt.show()

    # ------ Save figures -------- #
    saving_path = 'data/syntheticFns/'
    if not os.path.exists(saving_path):
        os.makedirs(saving_path)
    # gp_fig_name = func_name + f'_gp.png'
    # dropnet_fig_name = func_name + f'_gp.png'
    fig_name = 'compare_gp_dropnet.png'

示例#4

0

显示文件

def BayesOpt_attack(obj_func,
                    model_type,
                    acq_type,
                    batch_size,
                    low_dim,
                    sparse,
                    seed,
                    img_offset,
                    n_init=50,
                    num_iter=40,
                    ntargets=9,
                    target_label=0,
                    dim_reduction='BILI',
                    cost_metric=None,
                    obj_metric=1,
                    update_freq=10,
                    nsubspaces=1):
    # Specify code directory
    directory = './'

    if obj_func == 'mnist':
        high_dim = 784
        nchannel = 1
        epsilon = 0.3

    elif obj_func == 'cifar10':
        high_dim = int(32 * 32)
        nchannel = 3
        epsilon = 0.05

    elif obj_func == 'imagenet':
        high_dim = int(96 * 96)
        nchannel = 3
        epsilon = 0.05
        ntargets = 1

    if 'LDR' in model_type:
        low_dim = high_dim

    if dim_reduction == 'NONE':
        x_bounds = np.vstack([[-1, 1]] * high_dim * nchannel)
    else:
        x_bounds = np.vstack([[-1, 1]] * low_dim * nchannel)

    # Specify the experiment results saving directory
    results_data_folder = f'{directory}exp_results/{obj_func}_tf_{model_type}_ob{obj_metric}_' \
                          f'_freq{update_freq}_ld{low_dim}_{dim_reduction}/'
    if not os.path.exists(results_data_folder):
        os.makedirs(results_data_folder)

    # Define the model and the original images to be attacked
    cnn = CNN(dataset_name=obj_func,
              img_offset=img_offset,
              epsilon=epsilon,
              dim_reduction=dim_reduction,
              low_dim=low_dim,
              high_dim=high_dim,
              obj_metric=obj_metric,
              results_folder=results_data_folder,
              directory=directory)

    # For each image, define the target class
    if ntargets > 1:
        target_list = list(range(ntargets))
    else:
        target_list = [target_label]

    # Start attack each target in sequence
    for tg in target_list:
        cnn.get_data_sample(tg)
        input_label = cnn.input_label
        img_id = cnn.orig_img_id
        target_label = cnn.target_label[0]
        print(
            f'id={img_offset}, origin={input_label}, target={target_label}, eps={epsilon}, dr={low_dim}'
        )

        # Define the BO objective function
        if obj_func == 'imagenet':
            if 'LDR' in model_type or dim_reduction == 'NONE':
                f = lambda x: cnn.np_evaluate_bili(x)
            else:
                f = lambda x: cnn.np_upsample_evaluate_bili(x)
        else:
            if 'LDR' in model_type or dim_reduction == 'NONE':
                f = lambda x: cnn.np_evaluate(x)
            else:
                f = lambda x: cnn.np_upsample_evaluate(x)

        # Define the name of results file and failure fail(for debug or resume)
        results_file_name = os.path.join(
            results_data_folder,
            f'{model_type}{acq_type}{batch_size}_{dim_reduction}_'
            f'd{low_dim}_i{input_label}_t{target_label}_id{img_id}')
        failed_file_name = os.path.join(
            results_data_folder,
            f'failed_{model_type}{acq_type}{batch_size}_{dim_reduction}_'
            f'd{low_dim}_i{input_label}_t{target_label}_id{img_id}')

        X_opt_all_slices = []
        Y_opt_all_slices = []
        X_query_all_slices = []
        Y_query_all_slices = []
        X_reduced_opt_all_slices = []
        X_reduced_query_all_slices = []

        seed_list = [seed]  # can be modified to do BO over multiple seeds
        for seed in seed_list:
            # Specify the random seed
            np.random.seed(seed)

            # Generate initial observation data for BO
            if os.path.exists(results_file_name) and 'LDR' not in model_type:
                print('load old init data')
                with open(results_file_name, 'rb') as pre_file:
                    previous_bo_results = pickle.load(pre_file)
                x_init = previous_bo_results['X_reduced_query'][0]
                y_init = previous_bo_results['Y_query'][0]
            else:
                print('generate new init data')
                x_init, y_init = get_init_data(obj_func=f,
                                               n_init=n_init,
                                               bounds=x_bounds)
            print(f'X init shape {x_init.shape}')

            # Initialise BO
            bayes_opt = Bayes_opt(func=f,
                                  bounds=x_bounds,
                                  saving_path=failed_file_name)
            bayes_opt.initialise(X_init=x_init,
                                 Y_init=y_init,
                                 model_type=model_type,
                                 acq_type=acq_type,
                                 sparse=sparse,
                                 nsubspaces=nsubspaces,
                                 batch_size=batch_size,
                                 update_freq=update_freq,
                                 nchannel=nchannel,
                                 high_dim=high_dim,
                                 dim_reduction=dim_reduction,
                                 cost_metric=cost_metric,
                                 seed=seed)

            # Run BO
            X_query_full, Y_query, X_opt_full, Y_opt, time_record = bayes_opt.run(
                total_iterations=num_iter)

            # Reduce the memory needed for storing results
            if 'LDR' in model_type:
                X_query = X_query_full[-2:]
                X_opt = X_opt_full[-2:]
            else:
                X_query = X_query_full
                X_opt = X_opt_full[-2:]

            # Store the results
            Y_opt_all_slices.append(Y_opt)
            Y_query_all_slices.append(Y_query)
            opt_dr_list = bayes_opt.opt_dr_list

            if dim_reduction == 'NONE':
                X_reduced_opt_all_slices.append(X_opt.astype(np.float16))
                X_reduced_query_all_slices.append(X_query.astype(np.float16))
                X_query_all_slices.append(X_query)
                X_opt_all_slices.append(X_opt)
                print(
                    f'Y_opt={Y_opt[-1]}, X_opt shape{X_opt.shape}, X_h_opt shape{X_opt.shape}, '
                    f'X_query shape{X_query.shape}, X_h_query shape{X_query.shape}, opt_dr={opt_dr_list[-1]}'
                )
            else:
                X_reduced_opt_all_slices.append(X_opt.astype(np.float16))
                X_reduced_query_all_slices.append(X_query.astype(np.float16))

                # Transform data from reduced search space to original high-dimensional input space
                X_h_query = upsample_projection(dim_reduction,
                                                X_query,
                                                low_dim=low_dim,
                                                high_dim=high_dim,
                                                nchannel=nchannel)
                X_query_all_slices.append(X_h_query[-2:])
                X_h_opt = upsample_projection(dim_reduction,
                                              X_opt,
                                              low_dim=low_dim,
                                              high_dim=high_dim,
                                              nchannel=nchannel)
                X_opt_all_slices.append(X_h_opt)
                print(
                    f'Y_opt={Y_opt[-1]}, X_opt shape{X_opt.shape}, X_h_opt shape{X_h_opt.shape}, '
                    f'X_query shape{X_query.shape}, X_h_query shape{X_h_query.shape}'
                )

            # For ImageNet images, save only the L_inf norm and L2 norm instead of the adversarial image
            if 'imagenet' in obj_func:
                l_inf_sum = np.abs(X_h_opt[-1, :]).sum()
                l_2_norm = np.sqrt(
                    np.sum((epsilon * X_h_opt[-1, :].ravel())**2))
                X_opt_all_slices = [l_inf_sum]
                X_query_all_slices = [l_2_norm]

            # Save the results locally
            results = {
                'X_opt': X_opt_all_slices,
                'Y_opt': Y_opt_all_slices,
                'X_query': X_query_all_slices,
                'Y_query': Y_query_all_slices,
                'X_reduced_opt': X_reduced_opt_all_slices,
                'X_reduced_query': X_reduced_query_all_slices,
                'dr_opt_list': opt_dr_list,
                'runtime': time_record
            }
            with open(results_file_name, 'wb') as file:
                pickle.dump(results, file)

示例#5

0

显示文件

文件： test_compare_gp_dropoutnet_torch.py 项目： rubinxin/BNN_BO

def test_compare_gp_with_dropnet():
    #  Specify the objective function and parameters (noise variance, input dimension, initial observation
    np.random.seed(3)
    x_ob, y_ob = get_init_data(obj_func=f,
                               noise_var=var_noise,
                               n_init=n_init,
                               bounds=x_bounds)
    rng = np.random.RandomState(42)
    x = rng.rand(n_init)

    # ------ Test grid -------- #
    if d == 2:
        x1, x2 = np.mgrid[-1:1:50j, -1:1:50j]
        X = np.vstack((x1.flatten(), x2.flatten())).T
        y = f(X)
        # Dropout NN Configuration
        dropout = 0.05
        T = 100
        tau = 1.0
        bs = 10
        tbs = 50
        n_hidden = [50, 50, 50]

    else:
        X = np.linspace(-1, 1, 100)[:, None]
        y = f(X)

        # Dropout NN Configuration
        dropout = 0.05
        T = 100
        tau = 20
        bs = 10
        tbs = 50
        n_hidden = [50, 50, 50]
        # n_hidden = [100]

    # -- GP model ----#
    GP = GPModel(exact_feval=True, ARD=True)
    GP._update_model(x_ob, y_ob)
    m_gp, s_gp = GP.predict(X)

    # -- MCDropoutNet or DNGO model ----#
    Net = DNGOWrap()
    # Net = MCDROPWarp()
    # Net = BOHAMIANNWarp(num_samples=600)
    Net._update_model(x_ob, y_ob)
    m_net, s_net = Net.predict(X)

    # # ------ Plot figures -------- #
    if d == 2:
        figure, axes = plt.subplots(3, 1, figsize=(6, 18))
        sub1 = axes[0].contourf(x1, x2, y.reshape(50, 50))
        axes[0].plot(x_ob[:, 0], x_ob[:, 1], 'rx')
        axes[0].set_title('objective func ')

        sub2 = axes[1].contourf(x1, x2, m_gp.reshape(50, 50))
        axes[1].plot(x_ob[:, 0], x_ob[:, 1], 'rx')
        gp_title = f'prediction by GP'
        axes[1].set_title(gp_title)

        sub2 = axes[2].contourf(x1, x2, m_net.reshape(50, 50))
        axes[2].plot(x_ob[:, 0], x_ob[:, 1], 'rx')
        dropnet_title = f'prediction by NN:dropout={dropout},T={T},tau={tau},BS={bs},TBS={tbs}'
        axes[2].set_title(dropnet_title)
        plt.show()
    else:
        figure, axes = plt.subplots(2, 1, figsize=(10, 8))
        axes[0].plot(x_ob, y_ob, 'ro')
        axes[0].plot(X, y, 'k--')
        axes[0].plot(X, m_gp, 'b')
        axes[0].fill_between(X.flatten(), (m_gp - s_gp).flatten(),
                             (m_gp + s_gp).flatten(),
                             color='orange',
                             alpha=0.80)
        axes[0].fill_between(X.flatten(), (m_gp - 2 * s_gp).flatten(),
                             (m_gp + 2 * s_gp).flatten(),
                             color='orange',
                             alpha=0.60)
        axes[0].fill_between(X.flatten(), (m_gp - 3 * s_gp).flatten(),
                             (m_gp + 3 * s_gp).flatten(),
                             color='orange',
                             alpha=0.40)
        axes[0].set_title('1D GP Regression')

        axes[1].plot(x_ob, y_ob, 'ro')
        axes[1].plot(X, y, 'k--')
        axes[1].plot(X, m_net, 'b')
        axes[1].fill_between(X.flatten(), (m_net - s_net).flatten(),
                             (m_net + s_net).flatten(),
                             color='orange',
                             alpha=0.80)
        axes[1].fill_between(X.flatten(), (m_net - 2 * s_net).flatten(),
                             (m_net + 2 * s_net).flatten(),
                             color='orange',
                             alpha=0.60)
        axes[1].fill_between(X.flatten(), (m_net - 3 * s_net).flatten(),
                             (m_net + 3 * s_net).flatten(),
                             color='orange',
                             alpha=0.40)
        dropnet_title = f'prediction by NN:dropout={dropout},T={T},tau={tau},BS={bs},TBS={tbs}'
        axes[1].set_title(dropnet_title)
        plt.show()

    # ------ Save figures -------- #
    saving_path = 'data/syntheticFns/'
    if not os.path.exists(saving_path):
        os.makedirs(saving_path)
    # gp_fig_name = func_name + f'_gp.png'
    # dropnet_fig_name = func_name + f'_gp.png'
    fig_name = 'compare_gp_dropnet.png'

示例#6

0

显示文件

文件： bo_general_exps.py 项目： rubinxin/BNN_BO

def BNN_BO_Exps(obj_func,
                model_type,
                bo_method,
                batch_option,
                batch_size,
                num_iter=40,
                seed_size=20,
                util_type='se_y',
                activation='tanh'):

    #  Specify the objective function and parameters (noise variance, input dimension, number of initial observation)
    f, x_bounds, _, true_fmin = get_function(obj_func)
    var_noise = 1.0e-10
    d = x_bounds.shape[0]
    n_init = d * 10

    saving_path = 'data/' + obj_func

    if not os.path.exists(saving_path):
        os.makedirs(saving_path)

    if model_type == 'LCBNN' or model_type == 'LCCD':
        results_file_name = saving_path + '/' + model_type + activation + util_type + bo_method + str(
            batch_size)
    else:
        results_file_name = saving_path + '/' + model_type + activation + bo_method + str(
            batch_size)

    if os.path.exists(results_file_name):
        with open(results_file_name, 'rb') as exist_data_filefile2:
            existing_results = pickle.load(exist_data_filefile2)

        X_opt_all_seeds = existing_results['X_opt']
        Y_opt_all_seeds = existing_results['Y_opt']
        X_query_all_seeds = existing_results['X_query']
        Y_query_all_seeds = existing_results['Y_query']
        time_all_seeds = existing_results['runtime']

        if isinstance(X_query_all_seeds, list):
            X_query_all_seeds = X_query_all_seeds
            Y_query_all_seeds = Y_query_all_seeds
            time_all_seeds = time_all_seeds
        else:
            X_query_all_seeds = list(X_query_all_seeds)
            Y_query_all_seeds = list(Y_query_all_seeds)
            time_all_seeds = list(time_all_seeds)

        s_start = len(Y_opt_all_seeds)
        print(f"Using existing data from seed{s_start} onwards")

    else:
        s_start = 0
        X_opt_all_seeds = []
        Y_opt_all_seeds = []
        X_query_all_seeds = []
        Y_query_all_seeds = []
        time_all_seeds = []

    for j in range(s_start, seed_size):
        # specify the random seed and generate observation data
        seed = j
        np.random.seed(seed)
        x_init, y_init = get_init_data(obj_func=f,
                                       noise_var=var_noise,
                                       n_init=n_init,
                                       bounds=x_bounds)

        # run Bayesian optimisation:
        bayes_opt = Bayes_opt(func=f, bounds=x_bounds, noise_var=var_noise)
        # model_type: GP or MCDROP or DNGO or BOHAM
        bayes_opt.initialise(X_init=x_init,
                             Y_init=y_init,
                             model_type=model_type,
                             bo_method=bo_method,
                             batch_option=batch_option,
                             batch_size=batch_size,
                             seed=seed,
                             util_type=util_type,
                             actv_func=activation)

        # output of Bayesian optimisation:
        X_query, Y_query, X_opt, Y_opt, time_record = bayes_opt.iteration_step(
            iterations=num_iter)
        # X_query, Y_query - query points selected by BO;
        # X_opt, Yopt      - guesses of the global optimum/optimiser (= optimum point of GP posterior mean)

        # store data
        X_opt_all_seeds.append(X_opt)
        Y_opt_all_seeds.append(Y_opt)
        X_query_all_seeds.append(X_query)
        Y_query_all_seeds.append(Y_query)
        time_all_seeds.append(time_record)

        results = {
            'X_opt': X_opt_all_seeds,
            'Y_opt': Y_opt_all_seeds,
            'X_query': X_query_all_seeds,
            'Y_query': Y_query_all_seeds,
            'runtime': time_all_seeds
        }

        with open(results_file_name, 'wb') as file:
            pickle.dump(results, file)