max_lbfgs_iter=max_lbfgs_iter, num_classes=num_classes, batch_size=batch_size, data_sets=data_sets, initial_learning_rate=initial_learning_rate, keep_probs=keep_probs, decay_epochs=decay_epochs, mini_batch=False, train_dir='output', log_dir='log', model_name='%s_inception_onlytop' % dataset_name) inception_model.train() inception_predicted_loss_diffs = inception_model.get_influence_on_test_loss( [test_idx], np.arange(len(inception_model.data_sets.train.labels)), force_refresh=True) x_test = X_test[test_idx, :] y_test = Y_test[test_idx] distances = dataset.find_distances(x_test, X_train) flipped_idx = Y_train != y_test rbf_margins_test = rbf_model.sess.run(rbf_model.margin, feed_dict=rbf_model.all_test_feed_dict) rbf_margins_train = rbf_model.sess.run(rbf_model.margin, feed_dict=rbf_model.all_train_feed_dict) inception_Y_pred_correct = get_Y_pred_correct_inception(inception_model) np.savez('output/rbf_results', test_idx=test_idx,
test_idx = 8 actual_loss_diffs, predicted_loss_diffs_cg, indices_to_remove = experiments.test_retraining( tf_model, test_idx, iter_to_load=0, force_refresh=False, num_to_remove=500, remove_type='maxinf', random_seed=0) # LiSSA np.random.seed(17) predicted_loss_diffs_lissa = tf_model.get_influence_on_test_loss( [test_idx], indices_to_remove, approx_type='cg', approx_params={ 'scale': 25, 'recursion_depth': 5000, 'damping': 0, 'batch_size': 1, 'num_samples': 10 }, force_refresh=True) np.savez('output/spam_logreg_lbfgs_retraining-500.npz', actual_loss_diffs=actual_loss_diffs, predicted_loss_diffs_cg=predicted_loss_diffs_cg, predicted_loss_diffs_lissa=predicted_loss_diffs_lissa, indices_to_remove=indices_to_remove)
# plt.title("centroid 9") # plt.show() mask_neg = Y_test == -1 k = -1 n = -1 while k < closest_7_idx: n = n + 1 if mask_neg[n]: k = k + 1 test_idx_7 = n num_train = len(tf_model.data_sets.train.labels) influences_7 = tf_model.get_influence_on_test_loss( [test_idx_7], np.arange(len(tf_model.data_sets.train.labels)), force_refresh=True) * num_train fittedKmeans_1 = KMeans(n_clusters=numClusters, random_state=randomState).fit(class_wise_1) clusters_1 = fittedKmeans_1.predict(class_wise_1) centroids_1 = fittedKmeans_1.cluster_centers_ #pcaVis(class_wise_1, clusters_1, centroids_1, "for fours") distanceScoresByCluster_1, minDist_1, maxDist_1 = getDistancesbyCluster( class_wise_1, clusters_1, centroids_1) closest_1_idx = getClosest(distanceScoresByCluster_1)[0] # plt.figure(2) # plt.imshow(class_wise_1[closest_1_idx, :].reshape(28,-1)) # plt.title("centroid 4") # plt.show()
# top_model.update_train_x(X_train) # top_model.train() # if attack_success: # break ### Try attacking all test examples step_size = 0.005 test_indices = np.arange(num_test) test_description = 'all_%s' % dataset_name ## Find idx to poison? # Use top model to quickly generate inverse HVP with top_graph.as_default(): top_model.get_influence_on_test_loss(test_indices, [0], test_description=test_description, force_refresh=True) copyfile( 'output/%s-cg-normal_loss-test-%s.npz' % (top_model_name, test_description), 'output/%s-cg-normal_loss-test-%s.npz' % (full_model_name, test_description)) with full_graph.as_default(): grad_influence_wrt_input_val = full_model.get_grad_of_influence_wrt_input( np.arange(num_train), test_indices, force_refresh=False, test_description=test_description, loss_type='normal_loss') indices_to_poison = select_examples_to_attack(full_model,
data_sets=lr_data_sets, initial_learning_rate=initial_learning_rate, keep_probs=keep_probs, decay_epochs=decay_epochs, mini_batch=False, train_dir='output', log_dir='log', model_name='mnist-17_logreg') tf_model.train() test_idx = 30 num_train = len(tf_model.data_sets.train.labels) influences = tf_model.get_influence_on_test_loss( [test_idx], np.arange(len(tf_model.data_sets.train.labels)), force_refresh=True) * num_train influences_without_train_error = tf_model.get_influence_on_test_loss( [test_idx], np.arange(len(tf_model.data_sets.train.labels)), force_refresh=False, ignore_training_error=True) * num_train influences_without_hessian = tf_model.get_influence_on_test_loss( [test_idx], np.arange(len(tf_model.data_sets.train.labels)), force_refresh=False, ignore_hessian=True) * num_train influences_without_both = tf_model.get_influence_on_test_loss(