Пример #1
0
def test_clf(clf, ts):
    # Compute predictions on a experiments set
    y_pred = clf.predict(ts.X)
    # Metric to use for performance evaluation
    metric = CMetricAccuracy()
    # Evaluate the accuracy of the classifier
    acc = metric.performance_score(y_true=ts.Y, y_pred=y_pred)
    return acc
Пример #2
0
    def test_multiclass(self):
        """Test multiclass SVM on MNIST digits."""

        self.logger.info("Testing multiclass SVM.")

        digits = tuple(range(0, 10))
        n_tr = 100  # Number of training set samples
        n_ts = 200  # Number of test set samples

        loader = CDataLoaderMNIST()
        tr = loader.load('training', digits=digits, num_samples=n_tr)
        ts = loader.load('testing', digits=digits, num_samples=n_ts)

        # Normalize the features in `[0, 1]`
        tr.X /= 255
        ts.X /= 255

        svm_params = {
            'kernel': CKernelRBF(gamma=0.1),
            'C': 10,
            'class_weight': {
                0: 1,
                1: 1
            },
            'n_jobs': 2
        }
        classifiers = [
            CClassifierMulticlassOVA(CClassifierSVM, **svm_params),
            CClassifierSVM(**svm_params),
        ]

        grads = []
        acc = []
        for clf in classifiers:
            clf.verbose = 1
            # We can now fit the classifier
            clf.fit(tr.X, tr.Y)
            # Compute predictions on a test set
            y_pred, scores = clf.predict(ts.X, return_decision_function=True)
            # Evaluate the accuracy of the classifier
            metric = CMetricAccuracy()
            acc.append(metric.performance_score(y_true=ts.Y, y_pred=y_pred))
            grads.append(clf.grad_f_x(ts.X[1, :], 1))

        self.assertAlmostEqual(acc[0], acc[1])
        self.assert_array_almost_equal(grads[0], grads[1])
Пример #3
0
    def test_classification(self):
        self.logger.info("Check the classification method... ")

        lab_cl, score = self.knn.predict(self.test.X,
                                         return_decision_function=True)

        acc = CMetricAccuracy().performance_score(self.test.Y, lab_cl)

        self.logger.info("Real label:\n{:}".format(self.test.Y.tolist()))
        self.logger.info("Predicted label:\n{:}".format(lab_cl.tolist()))

        self.logger.info("Accuracy: {:}".format(acc))

        self.assertGreater(acc, 0.9)
Пример #4
0
    def _test_accuracy(self, clf, ts):
        """Check classification accuracy on test set.

        Parameters
        ----------
        clf : CClassifierPyTorch
        ts : CDataset

        """
        self.assertTrue(clf.is_fitted())

        label_torch, y_torch = \
            clf.predict(ts.X, return_decision_function=True)

        acc_torch = CMetricAccuracy().performance_score(ts.Y, label_torch)

        self.logger.info("Accuracy of PyTorch Model: {:}".format(acc_torch))
        self.assertGreater(acc_torch, 0.80)
Пример #5
0
# acc = metric.performance_score(y_true = ds_te_secml.Y, y_pred = preds)
# print("Accuracy on test set: {:.2%}".format(acc))
# probs = secml_sklearn_clf.predict_proba(ds_te_secml.X)       #Doesn't work
#
# #sklearn here isn't supported for performing adversarial attacks, only the native SVM of secml supports adversarial attacks
# ###############################################################
#
# =============================================================================
x, y = ds_te_secml[:, :].X, ds_te_secml[:, :].Y  # This won't work if we want to specify the target
#class for each example

#secml_clf = CClassifierMulticlassOVA(CClassifierSVM, kernel = CKernelRBF(gamma = 10), C = 1)
secml_clf = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)
secml_clf.fit(ds_tr_secml)
preds = secml_clf.predict(ds_te_secml.X)
metric = CMetricAccuracy()
acc = metric.performance_score(y_true=ds_te_secml.Y, y_pred=preds)
print("Accuracy on test set: {:.2%}".format(acc))

#Performing the attack
noise_type = 'l2'
dmax = 0.4
lb, ub = None, None  # with 0, 1 it goes out of bounds
y_target = None  #### Here y_target can be some class, indicating which class is expected for the adversarial example

#solver_params = {
#    'eta': 0.3,
#    'max_iter': 100,
#    'eps': 1e-4
#}
solver_params = {
# Split in training and test
from secml.data.splitter import CTrainTestSplit
splitter = CTrainTestSplit(
    train_size=n_tr, test_size=n_ts, random_state=random_state)
tr, ts = splitter.split(dataset)

# Normalize the data
from secml.ml.features import CNormalizerMinMax
nmz = CNormalizerMinMax()
tr.X = nmz.fit_transform(tr.X)
ts.X = nmz.transform(ts.X)

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()

# Creation of the multiclass classifier
from secml.ml.classifiers import CClassifierSVM
from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
from secml.ml.kernel import CKernelRBF
clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF())

# Parameters for the Cross-Validation procedure
xval_params = {'C': [1e-2, 0.1, 1], 'kernel.gamma': [10, 100, 1e3]}

# Let's create a 3-Fold data splitter
from secml.data.splitter import CDataSplitterKFold
xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

# Select and set the best training parameters for the classifier
Пример #7
0
    def setUpClass(cls):
        CUnitTest.setUpClass()

        cls._tr, cls._val, cls._ts = cls._create_mnist_dataset()
        cls._metric = CMetricAccuracy()
def attack_keras_model(X, Y, S, nb_attack=25, dmax=0.1):
    """
    Generates an adversarial attack on a general model.

    :param X: Original inputs on which the model is trained
    :param Y: Original outputs on which the model is trained
    :param S: Original protected attributes on which the model is trained
    :return: Adversarial dataset (i.e. new data points + original input)
    """

    from secml.data import CDataset
    from secml.array import CArray

    # secML wants all dimensions to be homogeneous (we had previously float and int in X)
    data_set_encoded_secML = CArray(X, dtype=float, copy=True)
    data_set_encoded_secML = CDataset(data_set_encoded_secML, Y)

    n_tr = round(0.66 * X.shape[0])
    n_ts = X.shape[0] - n_tr

    logger.debug(X.shape)
    logger.debug(n_tr)
    logger.debug(n_ts)

    from secml.data.splitter import CTrainTestSplit
    splitter = CTrainTestSplit(train_size=n_tr, test_size=n_ts)

    # Use training set for the classifier and then pick points from an internal test set.
    tr_set_secML, ts_set_secML = splitter.split(data_set_encoded_secML)

    # tr_set_secML = CDataset(X_train,Y_train)
    # ts_set_secML = CDataset(X_test,Y_test)

    # Create a surrogate classifier

    # Creation of the multiclass classifier
    from secml.ml.classifiers import CClassifierSVM
    from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
    from secml.ml.kernel import CKernelRBF
    clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF())

    # Parameters for the Cross-Validation procedure
    xval_params = {'C': [1e-4, 1e-3, 1e-2, 0.1, 1], 'kernel.gamma': [0.01, 0.1, 1, 10, 100, 1e3]}

    # Let's create a 3-Fold data splitter
    random_state = 999

    from secml.data.splitter import CDataSplitterKFold
    xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

    # Select and set the best training parameters for the classifier
    logger.debug("Estimating the best training parameters...")
    best_params = clf.estimate_parameters(
        dataset=tr_set_secML,
        parameters=xval_params,
        splitter=xval_splitter,
        metric='accuracy',
        perf_evaluator='xval'
    )
    logger.debug("The best training parameters are: ", best_params)

    logger.debug(clf.get_params())
    logger.debug(clf.num_classifiers)

    # Metric to use for training and performance evaluation
    from secml.ml.peval.metrics import CMetricAccuracy
    metric = CMetricAccuracy()

    # Train the classifier
    clf.fit(tr_set_secML)
    logger.debug(clf.num_classifiers)

    # Compute predictions on a test set
    y_pred = clf.predict(ts_set_secML.X)

    # Evaluate the accuracy of the classifier
    acc = metric.performance_score(y_true=ts_set_secML.Y, y_pred=y_pred)

    logger.debug("Accuracy on test set: {:.2%}".format(acc))

    # Prepare attack configuration

    noise_type = 'l2'   # Type of perturbation 'l1' or 'l2'
    lb, ub = 0, 1       # Bounds of the attack space. Can be set to `None` for unbounded
    y_target = None     # None if `error-generic` or a class label for `error-specific`

    # Should be chosen depending on the optimization problem
    solver_params = {
        'eta': 0.1,         # grid search resolution
        'eta_min': 0.1,
        'eta_max': None,    # None should be ok
        'max_iter': 1000,
        'eps': 1e-2         # Tolerance on the stopping crit.
    }

    # Run attack

    from secml.adv.attacks.evasion import CAttackEvasionPGDLS
    pgd_ls_attack = CAttackEvasionPGDLS(
        classifier=clf,
        surrogate_classifier=clf,
        surrogate_data=tr_set_secML,
        distance=noise_type,
        dmax=dmax,
        lb=lb, ub=ub,
        solver_params=solver_params,
        y_target=y_target)

    nb_feat = X.shape[1]

    result_pts = np.empty([nb_attack, nb_feat])
    result_class = np.empty([nb_attack, 1])

    # take a point at random being the starting point of the attack and run the attack
    import random
    for nb_iter in range(0, nb_attack):
        rn = random.randint(0, ts_set_secML.num_samples - 1)
        x0, y0 = ts_set_secML[rn, :].X, ts_set_secML[rn, :].Y,

        try:
            y_pred_pgdls, _, adv_ds_pgdls, _ = pgd_ls_attack.run(x0, y0)
            adv_pt = adv_ds_pgdls.X.get_data()
            # np.asarray([np.asarray(row, dtype=float) for row in y_tr], dtype=float)
            result_pts[nb_iter] = adv_pt
            result_class[nb_iter] = y_pred_pgdls.get_data()[0]
        except ValueError:
            logger.warning("value error on {}".format(nb_iter))

    return result_pts, result_class, ts_set_secML[:nb_attack, :].Y
trainingValidation, test = splitter.split(dataset)

splitter = CTrainTestSplit(train_size=setSamplesTrainingNumber,
                           test_size=setSamplesValidationNumber,
                           random_state=random_state)

training, validation = splitter.split(dataset)

# Normalize the data
normalizer = CNormalizerMinMax()
training.X = normalizer.fit_transform(training.X)
validation.X = normalizer.transform(validation.X)
test.X = normalizer.transform(test.X)

# Metric to use for training and performance evaluation
metric = CMetricAccuracy()

# Creation of the multiclass classifier
classifier = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)

# We can now fit the classifier
classifier.fit(training.X, training.Y)
print("Training of classifier complete!")

# Compute predictions on a test set
predictionY = classifier.predict(test.X)

# Bounds of the attack space. Can be set to `None` for unbounded
lowerBound, upperBound = validation.X.min(), validation.X.max()

# Should be chosen depending on the optimization problem
#)
print(best_lin_params)

#train classifier
print("start training")
clf_lin.fit(data_smp_encoded_secML)
#print("linear training ended, begining rbf")
#clf_rbf.fit(tr)
#print("start linear classif")
#clf_l.fit(data_smp_encoded_secML)

print("Classifiers trained")

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()

# Compute predictions on a test set
y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X)
#y_rbf_pred = clf_rbf.predict(ts.X)
#y_l_pred = clf_l.predict(raw_data_encoded_secML.X)

# Evaluate the accuracy of the classifier
acc_lin = metric.performance_score(y_true=raw_data_encoded_secML.Y,
                                   y_pred=y_lin_pred)
#acc_rbf = metric.performance_score(y_true=ts.Y, y_pred=y_rbf_pred)
#acc_rbf = 0.0
#acc_l = metric.performance_score(y_true=raw_data_encoded_secML.Y, y_pred=y_l_pred)

print("Performance evaluations ended:")
print(acc_lin)
Пример #11
0
        # Combine data
        x_use = np.concatenate((x_train, x_poison), 0)
        y_use = np.concatenate((y_train, y_poison), 0)
    else:
        x_use, y_use = x_train, y_train

    # Convert to CArray
    x_use, y_use = CArray(x_use), CArray(y_use)
    x_test, y_test = CArray(x_test), CArray(y_test)

    print("Poison rato: %.2f" % (len(x_poison) / len(x_train)))

    # Fit classifier
    random_state = 2021

    metric = CMetricAccuracy()
    clf = CClassifierLogistic(C=1)
    clf.fit(x_use, y_use)
    print("Training of classifier complete!")

    # Compute predictions on a test set
    y_pred_tr = clf.predict(x_use)
    y_pred = clf.predict(x_test)

    # Evaluate the accuracy of the original classifier
    tr_acc = metric.performance_score(y_true=y_use, y_pred=y_pred_tr)
    te_acc = metric.performance_score(y_true=y_test, y_pred=y_pred)

    # Report metrics using poisoned model
    print("Poisoned | Train accuracy {:.1%}".format(tr_acc))
    print("Poisoned | Test accuracy {:.1%}".format(te_acc))
Пример #12
0
    return (x_train, y_train), (x_test, y_test), (0, 1)


if __name__ == "__main__":
    import sys
    poison_ratio = float(sys.argv[1])
    num_reps = int(sys.argv[2])

    random_state = 2021
    
    # Step 1: Load dataset
    (x_train, y_train), (x_test, y_test), (min_val, max_val) = load_data()

    # Metric to use for training and performance evaluation
    metric = CMetricAccuracy()

    # Creation of the multiclass classifier
    clf = CClassifierLogistic(C=1)

    # Make train-val split from train data for attack purposes
    x_train_red, x_val, y_train_red, y_val = train_test_split(
        x_train, y_train, stratify=y_train, test_size=0.3)
    
    # Convert to CArray
    x_train, y_train = CArray(x_train), CArray(y_train)
    x_test, y_test = CArray(x_test), CArray(y_test)
    x_train_red, y_train_red = CArray(x_train_red), CArray(y_train_red)
    x_val, y_val = CArray(x_val), CArray(y_val)

    # We can now fit the classifier