Exemplo n.º 1
0
        indices = np.concatenate((indices, new_indices))
    # generate new loader based on updated indices
    print(indices.shape)
    train_dataset = generate_full_dataset()
    train_dataset = modify_dataset(train_dataset, indices)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_batch_size,
                                               shuffle=True)
    print('Selection: {}, Round: {}, Size: {}'.format(
        args.selection, r + 1, len(train_loader.dataset)))

draw_result.draw_gene_error(test_loss_list, criteria, args.initial_size,
                            args.acquisition_size, color, font_size)
plt.tight_layout()
plt.savefig(save_dir + "BDNN_gene_error_MNIST.pdf")
draw_result.draw_epsilon(criteria, args.initial_size, args.acquisition_size,
                         color, font_size)
plt.tight_layout()
plt.savefig(save_dir + "BDNN_epsilon_MNIST.pdf")

indecies = utils.calc_min_list(error_stability1.error_ratio)
corr = np.corrcoef(test_loss_list[indecies],
                   error_stability1.error_ratio[indecies])[1, 0]
np.savetxt(save_dir + "corr.txt", np.array([corr]))

print(test_loss_list[indecies].shape)
print(error_stability1.error_ratio[indecies].shape)
draw_result.draw_correlation(test_loss_list, error_stability1.error_ratio,
                             "BDNN", "purple", font_size_corr)
plt.tight_layout()
plt.savefig(save_dir + "BDNN_correlation_MNIST.pdf")
Exemplo n.º 2
0
def main():
    np.random.seed(1)
    random.seed(1)
    data_names = ["grid_stability_c", "skin", "HTRU2"]
    corr_list = np.zeros((len(data_names)))

    C_list = [100, 0.001, 1]
    fontsize = 24
    fontsize_corr = 40
    batch_size = 1

    for i, data_name in enumerate(data_names):
        save_dir = "result/BLR/"
        os.makedirs(save_dir, exist_ok=True)
        [X, y] = get_dataset.get_dataset(data_name)
        [whole_size, dim] = X.shape
        y = y[:, 0].astype(int)
        test_size = 5000
        train_size = whole_size - test_size
        init_sample_size = 100
        sample_size = min([3000, train_size - init_sample_size])
        X_test = X[:test_size]
        y_test = y[:test_size]
        X_train = X[test_size:test_size + train_size]
        y_train = y[test_size:test_size + train_size]

        pool_indecies = set(range(train_size))
        sampled_indecies = set(random.sample(pool_indecies, init_sample_size))
        pool_indecies = list(pool_indecies - sampled_indecies)
        sampled_indecies = list(sampled_indecies)

        X_sampled = X_train[sampled_indecies]
        y_sampled = y_train[sampled_indecies]

        basis_size = 5
        x_range = [X_train.min(), X_train.max()]
        blr = active_BLR(basis_size=basis_size,
                         x_range=x_range,
                         C=C_list[i],
                         solver="newton-cg")

        validate_size = 10
        threshold = 0.2
        error_stability1 = error_stability_criterion(threshold, validate_size)
        threshold = 0.15
        error_stability2 = error_stability_criterion(threshold, validate_size)
        threshold = 0.1
        error_stability3 = error_stability_criterion(threshold, validate_size)
        criteria = [error_stability1, error_stability2, error_stability3]

        test_error = np.empty(0, float)
        blr.fit(X_sampled, y_sampled)
        color = {
            error_stability1.criterion_name: "r",
            error_stability2.criterion_name: "g",
            error_stability3.criterion_name: "b"
        }
        for e in tqdm(range(sample_size)):
            new_data_index = blr.data_acquire(X_train, pool_indecies)
            sampled_indecies.append(new_data_index)
            pool_indecies.remove(new_data_index)

            X_sampled = X_train[sampled_indecies]
            y_sampled = y_train[sampled_indecies]

            pos_old = blr.get_pos()
            blr.fit(X_sampled, y_sampled, blr.coef_[0])
            pos_new = blr.get_pos()
            KL_pq = utils.calcKL_gauss(pos_old, pos_new)
            KL_qp = utils.calcKL_gauss(pos_new, pos_old)
            error = utils.calc_cross_entropy(y_test,
                                             blr.predict_proba(X_test)[:, 1])
            test_error = np.append(test_error, error)

            error_stability1.check_threshold(KL_pq, KL_qp, e)
            error_stability2.check_threshold(KL_pq, KL_qp, e)
            error_stability3.check_threshold(KL_pq, KL_qp, e)

        draw_result.draw_gene_error(test_error, criteria, init_sample_size,
                                    batch_size, color, fontsize)
        plt.tight_layout()
        plt.savefig(save_dir + "BLR_gene_error_" + data_name + ".pdf")
        draw_result.draw_correlation(
            test_error[validate_size:],
            error_stability1.error_ratio[validate_size:], "BLR", "b",
            fontsize_corr)
        plt.tight_layout()
        plt.savefig(save_dir + "BLR_correlation_" + data_name + ".pdf")
        draw_result.draw_epsilon(criteria, init_sample_size, batch_size, color,
                                 fontsize)
        plt.tight_layout()
        plt.savefig(save_dir + "BLR_criterion_" + data_name + ".pdf")

        indecies = utils.calc_min_list(
            error_stability1.error_ratio[validate_size:])
        corr_list[i] = np.corrcoef(
            test_error[validate_size:][indecies],
            error_stability1.error_ratio[validate_size:][indecies])[1, 0]

    np.savetxt(save_dir + "corr_list.txt", corr_list)
    np.savetxt(save_dir + "loss.txt", np.array(test_error))
    np.savetxt(save_dir + "lambda.txt", error_stability1.error_ratio)
Exemplo n.º 3
0
def main():
    np.random.seed(1)
    random.seed(1)
    data_names = ["power_plant", "protein", "gas_emission", "grid_stability"]
    noise_level_bounds_list = [(1e-3, 1e3), (1e-3, 1e3), (1e-1, 1e3),
                               (1e-1, 1e3), (1e-3, 1e3)]
    corr_list = np.zeros((len(data_names)))
    fontsize = 24
    fontsize_corr = 40
    batch_size = 1

    for i, data_name in enumerate(data_names):
        save_dir = "result/GPR/"
        os.makedirs(save_dir, exist_ok=True)
        [X, y] = get_dataset.get_dataset(data_name)
        [whole_size, dim] = X.shape
        print(X.shape)
        test_size = 2000
        sample_size = 1000
        train_size = min([5000, whole_size - test_size])
        X_test = X[:test_size]
        y_test = y[:test_size]
        X_train = X[test_size:test_size + train_size]
        y_train = y[test_size:test_size + train_size]

        length_scale_bounds = (1e-3, 1e3)
        noise_level_bounds = noise_level_bounds_list[i]
        kernel = RBF(length_scale=1.0,
                     length_scale_bounds=length_scale_bounds) + WhiteKernel(
                         noise_level=1.0,
                         noise_level_bounds=noise_level_bounds)
        gp = active_GPR(kernel=kernel,
                        alpha=0.0,
                        optimizer="fmin_l_bfgs_b",
                        n_restarts_optimizer=5)
        # gp = active_GPR(kernel=kernel, alpha=0.0, optimizer=None)
        gp.fit(X_train[:sample_size], y_train[:sample_size])
        gp.change_optimizer(None)
        init_sample_size = 10
        validate_size = 10
        threshold = 0.05
        error_stability1 = error_stability_criterion(threshold, validate_size)
        threshold = 0.04
        error_stability2 = error_stability_criterion(threshold, validate_size)
        threshold = 0.03
        error_stability3 = error_stability_criterion(threshold, validate_size)
        criteria = [error_stability1, error_stability2, error_stability3]

        pool_indecies = set(range(train_size))
        sampled_indecies = set(random.sample(pool_indecies, init_sample_size))
        pool_indecies = list(pool_indecies - sampled_indecies)
        sampled_indecies = list(sampled_indecies)
        gp.fit(X_train[sampled_indecies], y_train[sampled_indecies])
        test_error = np.empty(0, float)

        color = {
            error_stability1.criterion_name: "r",
            error_stability2.criterion_name: "g",
            error_stability3.criterion_name: "b"
        }
        params = np.exp(gp.kernel_.theta)
        print(params)
        for e in tqdm(range(sample_size)):
            new_data_index = gp.data_aquire(X_train, pool_indecies)
            sampled_indecies.append(new_data_index)
            pool_indecies.remove(new_data_index)

            X_sampled = X_train[sampled_indecies]
            y_sampled = y_train[sampled_indecies]

            pos_old = gp.predict(X_sampled, return_cov=True)
            gp.fit(X_sampled, y_sampled)

            error = utils.calc_expected_squre_error(
                y_test, gp.predict(X_test, return_std=True))
            test_error = np.append(test_error, error)

            KL_pq = utils.calcKL_pq_fast(pos_old, y_sampled[-1], 1 / params[1])
            KL_qp = utils.calcKL_qp_fast(pos_old, y_sampled[-1], 1 / params[1])

            error_stability1.check_threshold(KL_pq, KL_qp, e)
            error_stability2.check_threshold(KL_pq, KL_qp, e)
            error_stability3.check_threshold(KL_pq, KL_qp, e)

        draw_result.draw_gene_error(test_error, criteria, init_sample_size,
                                    batch_size, color, fontsize)
        plt.tight_layout()
        plt.savefig(save_dir + "GPR_gene_error_" + data_name + ".pdf")
        draw_result.draw_correlation(
            test_error[validate_size:],
            error_stability1.error_ratio[validate_size:], "GPR", "r",
            fontsize_corr)
        plt.tight_layout()
        plt.savefig(save_dir + "GPR_correlation_" + data_name + ".pdf")
        draw_result.draw_epsilon(criteria, init_sample_size, batch_size, color,
                                 fontsize)
        plt.tight_layout()
        plt.savefig(save_dir + "GPR_criterion_" + data_name + ".pdf")

        indecies = utils.calc_min_list(
            error_stability1.error_ratio[validate_size:])
        corr_list[i] = np.corrcoef(
            test_error[validate_size:][indecies],
            error_stability1.error_ratio[validate_size:][indecies])[1, 0]

    np.savetxt(save_dir + "corr_list.txt", corr_list)
    np.savetxt(save_dir + "loss.txt", np.array(test_error))
    np.savetxt(save_dir + "lambda.txt", error_stability1.error_ratio)
        print(data[0].shape)
    print('Selection: {}, Round: {}, Size: {}'.format(
        args.selection, r + 1, len(train_loader.dataset)))

print(loss_lists.shape)
for i in range(args.round):
    plt.plot(range(epochs), loss_lists[i], c=cm.rainbow(i / args.round))
plt.xlabel("epochs", fontsize=24)
plt.ylabel("train loss", fontsize=24)
plt.savefig("result/losses.pdf")
plt.pause(0.01)
plt.clf()
plt.plot(range(args.round), loss_lists[:, -1], c="k")
plt.xlabel("data size", fontsize=24)
plt.ylabel("train loss", fontsize=24)
plt.savefig("result/loss.pdf")
plt.pause(0.01)
draw_result.draw_accuracy(test_accs_list, criteria, color,
                          args.acquisition_size)
plt.tight_layout()
plt.savefig("result/BayesianDNN_accuracy_MNIST.pdf")
draw_result.draw_mse(test_mses_list, criteria, color, args.acquisition_size)
plt.tight_layout()
plt.savefig("result/BayesianDNN_MSE_MNIST.pdf")
draw_result.draw_upper_bound(criteria, color, args.acquisition_size)
plt.tight_layout()
plt.savefig("result/BayesianDNN_upper_bound_MNIST.pdf")
draw_result.draw_epsilon(criteria, color, args.acquisition_size)
plt.tight_layout()
plt.savefig("result/BayesianDNN_epsilon_MNIST.pdf")
Exemplo n.º 5
0
def main():
    np.random.seed(1)
    random.seed(1)
    data_names = ["power_plant", "protein", "gas_emission", "grid_stability"]
    data_sizes = [2000, 2000, 1000, 8000]
    corr_list = np.zeros((len(data_names)))
    fontsize = 24
    fontsize_corr = 40
    batch_size = 1

    for i, data_name in enumerate(data_names):
        save_dir = "result/BRR/"
        os.makedirs(save_dir, exist_ok=True)
        [X, y] = get_dataset.get_dataset(data_name)
        [whole_size, dim] = X.shape
        print(X.shape)
        test_size = 2000
        train_size = min([10000, whole_size - test_size])
        init_sample_size = 10
        sample_size = min([data_sizes[i], train_size - init_sample_size])
        print(train_size)
        X_test = X[:test_size]
        y_test = y[:test_size]
        X_train = X[test_size:test_size + train_size]
        y_train = y[test_size:test_size + train_size]

        basis_size = 10
        x_range = [X_train.min(), X_train.max()]
        brr = active_BRR(beta=0.1,
                         alpha=1.0,
                         basis_size=basis_size,
                         x_range=x_range)
        brr.fit(X_train[:sample_size],
                y_train[:sample_size],
                fix_hyper_param=False)
        validate_size = 10
        # threshold = 0.02
        threshold = 0.05
        error_stability1 = error_stability_criterion(threshold, validate_size)
        threshold = 0.04
        # threshold = 0.015
        error_stability2 = error_stability_criterion(threshold, validate_size)
        threshold = 0.03
        # threshold = 0.01
        error_stability3 = error_stability_criterion(threshold, validate_size)
        criteria = [error_stability1, error_stability2, error_stability3]

        pool_indecies = set(range(train_size))
        sampled_indecies = set(random.sample(pool_indecies, init_sample_size))
        pool_indecies = list(pool_indecies - sampled_indecies)
        sampled_indecies = list(sampled_indecies)
        test_error = np.empty(0, float)

        brr.fit(X_train[sampled_indecies],
                y_train[sampled_indecies],
                fix_hyper_param=True)
        color = {
            error_stability1.criterion_name: "r",
            error_stability2.criterion_name: "g",
            error_stability3.criterion_name: "b"
        }
        epsilon = 0
        for e in tqdm(range(sample_size)):
            # new_data_index = random.sample(pool_indecies,1)[0]
            new_data_index = brr.data_aquire(X_train, pool_indecies)
            sampled_indecies.append(new_data_index)
            pool_indecies.remove(new_data_index)

            X_sampled = X_train[sampled_indecies]
            y_sampled = y_train[sampled_indecies]

            pos_old = brr.get_pos()
            brr.fit(X_sampled, y_sampled)
            # brr.fit(X_sampled, y_sampled, fix_hyper_param=False)
            pos_new = brr.get_pos()
            error = utils.calc_expected_squre_error(
                y_test, brr.predict(X_test, return_std=True))
            test_error = np.append(test_error, error)

            KL_pq = utils.calcKL_gauss(pos_old, pos_new, epsilon)
            KL_qp = utils.calcKL_gauss(pos_new, pos_old, epsilon)

            error_stability1.check_threshold(KL_pq, KL_qp, e)
            error_stability2.check_threshold(KL_pq, KL_qp, e)
            error_stability3.check_threshold(KL_pq, KL_qp, e)

        draw_result.draw_gene_error(test_error, criteria, init_sample_size,
                                    batch_size, color, fontsize)
        plt.tight_layout()
        plt.savefig(save_dir + "BRR_gene_error_" + data_name + ".pdf")
        draw_result.draw_correlation(
            test_error[validate_size:],
            error_stability1.error_ratio[validate_size:], "BRR", "g",
            fontsize_corr)
        plt.tight_layout()
        plt.savefig(save_dir + "BRR_correlation_" + data_name + ".pdf")
        draw_result.draw_epsilon(criteria, init_sample_size, batch_size, color,
                                 fontsize)
        plt.tight_layout()
        plt.savefig(save_dir + "BRR_criterion_" + data_name + ".pdf")

        indecies = utils.calc_min_list(
            error_stability1.error_ratio[validate_size:])
        corr_list[i] = np.corrcoef(
            test_error[validate_size:][indecies],
            error_stability1.error_ratio[validate_size:][indecies])[1, 0]

    np.savetxt(save_dir + "corr_list.txt", corr_list)
    np.savetxt(save_dir + "loss.txt", np.array(test_error))
    np.savetxt(save_dir + "lambda.txt", error_stability1.error_ratio)