def test_fix_relabel_poison(self):
        (x_train, y_train), (_, _), (_, _) = self.mnist
        x_poison = x_train[:100]
        y_fix = y_train[:100]

        test_set_split = 0.7
        n_train = int(len(x_poison) * test_set_split)
        x_test = x_poison[n_train:]
        y_test = y_fix[n_train:]

        predictions = np.argmax(self.classifier.predict(x_test), axis=1)
        ini_miss = 1 - np.sum(predictions == np.argmax(y_test, axis=1)) / y_test.shape[0]

        improvement, new_classifier = ActivationDefence.relabel_poison_ground_truth(self.classifier, x_poison, y_fix,
                                                                                    test_set_split=test_set_split,
                                                                                    tolerable_backdoor=0.01,
                                                                                    max_epochs=5, batch_epochs=10)

        predictions = np.argmax(new_classifier.predict(x_test), axis=1)
        final_miss = 1 - np.sum(predictions == np.argmax(y_test, axis=1)) / y_test.shape[0]

        self.assertEqual(improvement, ini_miss - final_miss)

        # Other method (since it's cross validation we can't assert to a concrete number).
        improvement, _ = ActivationDefence.relabel_poison_cross_validation(self.classifier, x_poison,
                                                          y_fix, n_splits=2,
                                                          tolerable_backdoor=0.01,
                                                          max_epochs=5, batch_epochs=10)
        self.assertGreaterEqual(improvement, 0)
def main():
    try:
        print('See if poison model has been previously trained ')
        import pickle
        classifier = pickle.load(open('my_poison_classifier.p', 'rb'))
        print('Loaded model from pickle.... ')

        data_train = np.load('data_training.npz')
        x_train = data_train['x_train']
        y_train = data_train['y_train']
        is_poison_train = data_train['is_poison_train']

        data_test = np.load('data_testing.npz')
        x_test = data_test['x_test']
        y_test = data_test['y_test']
        is_poison_test = data_test['is_poison_test']

    except:
        # Read MNIST dataset (x_raw contains the original images):
        (x_raw, y_raw), (x_raw_test, y_raw_test), min_, max_ = load_mnist(raw=True)

        n_train = np.shape(x_raw)[0]
        num_selection = n_train
        random_selection_indices = np.random.choice(n_train, num_selection)
        x_raw = x_raw[random_selection_indices]
        y_raw = y_raw[random_selection_indices]

        # Poison training data
        perc_poison = .33
        (is_poison_train, x_poisoned_raw, y_poisoned_raw) = generate_backdoor(x_raw, y_raw, perc_poison)
        x_train, y_train = preprocess(x_poisoned_raw, y_poisoned_raw)
        # Add channel axis:
        x_train = np.expand_dims(x_train, axis=3)

        # Poison test data
        (is_poison_test, x_poisoned_raw_test, y_poisoned_raw_test) = generate_backdoor(x_raw_test, y_raw_test,
                                                                                       perc_poison)
        x_test, y_test = preprocess(x_poisoned_raw_test, y_poisoned_raw_test)
        # Add channel axis:
        x_test = np.expand_dims(x_test, axis=3)

        # Shuffle training data so poison is not together
        n_train = np.shape(y_train)[0]
        shuffled_indices = np.arange(n_train)
        np.random.shuffle(shuffled_indices)
        x_train = x_train[shuffled_indices]
        y_train = y_train[shuffled_indices]
        is_poison_train = is_poison_train[shuffled_indices]

        # Save data used for training and testing split:
        np.savez('data_training.npz', x_train=x_train, y_train=y_train, is_poison_train=is_poison_train,
                 x_raw=x_poisoned_raw)
        np.savez('data_testing.npz', x_test=x_test, y_test=y_test, is_poison_test=is_poison_test,
                 x_raw_test=x_poisoned_raw_test)

        # Create Keras convolutional neural network - basic architecture from Keras examples
        # Source here: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py
        k.set_learning_phase(1)
        model = Sequential()
        model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:]))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(10, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        classifier = KerasClassifier((min_, max_), model=model)

        classifier.fit(x_train, y_train, nb_epochs=50, batch_size=128)

        print('Saving poisoned model: ')
        pickle.dump(classifier, open('my_poison_classifier.p', 'wb'))

        # Also saving for Anu:
        file_name = 'anu_poison_mnist'
        model.save(file_name + '.hdf5')
        model_json = model.to_json()
        with open(file_name + '.json', "w") as json_file:
            json_file.write(model_json)

    # Evaluate the classifier on the test set
    preds = np.argmax(classifier.predict(x_test), axis=1)
    acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
    print("\nTest accuracy: %.2f%%" % (acc * 100))

    # Evaluate the classifier on poisonous data
    preds = np.argmax(classifier.predict(x_test[is_poison_test]), axis=1)
    acc = np.sum(preds == np.argmax(y_test[is_poison_test], axis=1)) / y_test[is_poison_test].shape[0]
    print("\nPoisonous test set accuracy (i.e. effectiveness of poison): %.2f%%" % (acc * 100))

    # Evaluate the classifier on clean data
    preds = np.argmax(classifier.predict(x_test[is_poison_test == 0]), axis=1)
    acc = np.sum(preds == np.argmax(y_test[is_poison_test == 0], axis=1)) / y_test[is_poison_test == 0].shape[0]
    print("\nClean test set accuracy: %.2f%%" % (acc * 100))

    # Calling poisoning defence:
    defence = ActivationDefence(classifier, x_train, y_train)

    # End-to-end method:
    print("------------------- Results using size metric -------------------")
    print(defence.get_params())
    defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA")

    # Now fix the model
    x_new, y_fix = correct_poisoned_labels(x_train, y_train, is_poison_train)

    improvement = defence.relabel_poison_ground_truth(x_new, y_fix, test_set_split=0.7, tolerable_backdoor=0.001,
                                                      max_epochs=5, batch_epochs=10)

    # Evaluate the classifier on poisonous data after backdoor fix:
    preds = np.argmax(classifier.predict(x_test[is_poison_test]), axis=1)
    acc_after = np.sum(preds == np.argmax(y_test[is_poison_test], axis=1)) / y_test[is_poison_test].shape[0]
    print("\nPoisonous test set accuracy (i.e. effectiveness of poison) after backdoor fix: %.2f%%" % (acc_after * 100))

    print("\n Improvement after training: ", improvement)
    print('before: ', acc, ' after: ', acc_after)

    print("done :) ")