Exemplo n.º 1
0
class TestCRoc(CUnitTest):
    """Unit test for CRoc."""
    def setUp(self):

        self.dl1 = CDLRandom(n_features=1000,
                             n_redundant=200,
                             n_informative=250,
                             n_clusters_per_class=2,
                             random_state=0)
        self.dl2 = CDLRandom(n_features=1000,
                             n_redundant=200,
                             n_informative=250,
                             n_clusters_per_class=2,
                             random_state=1000)
        self.ds1 = self.dl1.load()
        self.ds2 = self.dl2.load()

        self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y)

        self.y1, self.s1 = self.svm.predict(self.ds1.X,
                                            return_decision_function=True)
        self.y2, self.s2 = self.svm.predict(self.ds2.X,
                                            return_decision_function=True)

        self.roc = CRoc()

    def test_roc_1sample(self):

        self.roc.compute(CArray([1]), CArray([0]))
        self.roc.average()

        # Testing 3 and not 1 as roc is bounded (we add a first and last point)
        self.assertEqual(self.roc.fpr.size, 3)
        self.assertEqual(self.roc.tpr.size, 3)

    def test_compute(self):

        self.roc.compute(self.ds1.Y, self.s1[:, 1].ravel())

        fig = CFigure()
        fig.sp.semilogx(self.roc.fpr, self.roc.tpr)
        fig.sp.grid()
        fig.show()

    def test_mean(self):

        self.roc.compute([self.ds1.Y, self.ds2.Y],
                         [self.s1[:, 1].ravel(), self.s2[:, 1].ravel()])
        mean_fp, mean_tp, mean_std = self.roc.average(return_std=True)
        fig = CFigure(linewidth=2)
        fig.sp.errorbar(self.roc.mean_fpr, self.roc.mean_tpr, yerr=mean_std)
        for rep in range(self.roc.n_reps):
            fig.sp.semilogx(self.roc.fpr[rep], self.roc.tpr[rep])
        fig.sp.semilogx(mean_fp, mean_tp)
        fig.sp.grid()
        fig.show()
Exemplo n.º 2
0
    def test_performance(self):
        """ Compare the classifiers performance"""
        self.logger.info("Testing error performance of the "
                         "classifiers on the training set")

        for ridge in self.ridges:
            self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess))

            if ridge.preprocess is not None:
                svm_kernel = ridge.preprocess.deepcopy()
            else:
                svm_kernel = None

            svm = CClassifierSVM(kernel=svm_kernel)
            svm.fit(self.dataset.X, self.dataset.Y)

            label_svm, y_svm = svm.predict(
                self.dataset.X, return_decision_function=True)
            label_ridge, y_ridge = ridge.predict(
                self.dataset.X, return_decision_function=True)

            acc_svm = CMetric.create('f1').performance_score(
                self.dataset.Y, label_svm)
            acc_ridge = CMetric.create('f1').performance_score(
                self.dataset.Y, label_ridge)

            self.logger.info("Accuracy of SVM: {:}".format(acc_svm))
            self.assertGreater(acc_svm, 0.90,
                               "Accuracy of SVM: {:}".format(acc_svm))
            self.logger.info("Accuracy of ridge: {:}".format(acc_ridge))
            self.assertGreater(acc_ridge, 0.90,
                               "Accuracy of ridge: {:}".format(acc_ridge))
Exemplo n.º 3
0
    def test_performance(self):
        """ Compare the classifiers performance"""
        self.logger.info("Testing error performance of the "
                         "classifiers on the training set")

        for sgd in self.sgds:

            self.logger.info("SGD kernel: {:}".format(sgd.preprocess))

            if sgd.preprocess is not None:
                k = sgd.preprocess.deepcopy()
            else:
                k = None
            svm = CClassifierSVM(kernel=k)

            svm.fit(self.dataset.X, self.dataset.Y)

            label_svm, y_svm = svm.predict(self.dataset.X,
                                           return_decision_function=True)
            label_sgd, y_sgd = sgd.predict(self.dataset.X,
                                           return_decision_function=True)

            acc_svm = CMetric.create('f1').performance_score(
                self.dataset.Y, label_svm)
            acc_sgd = CMetric.create('f1').performance_score(
                self.dataset.Y, label_sgd)

            self.logger.info("Accuracy of SVM: {:}".format(acc_svm))
            self.assertGreater(acc_svm, 0.90,
                               "Accuracy of SVM: {:}".format(acc_svm))
            self.logger.info("Accuracy of SGD: {:}".format(acc_sgd))
            self.assertGreater(acc_sgd, 0.90,
                               "Accuracy of SGD: {:}".format(acc_sgd))
Exemplo n.º 4
0
    def test_linear_svm(self):
        """Performs tests on linear SVM."""
        self.logger.info("Testing SVM linear variants (kernel and not)")

        # Instancing a linear SVM and an SVM with linear kernel
        linear_svm = CClassifierSVM(kernel=None)
        kernel_linear_svm = self.svms[0]

        self.logger.info("SVM w/ linear kernel in the primal")
        self.assertIsNone(linear_svm.kernel)

        self.logger.info("Training both classifiers on dense data")
        linear_svm.fit(self.dataset.X, self.dataset.Y)
        kernel_linear_svm.fit(self.dataset.X, self.dataset.Y)

        linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict(
            self.dataset.X, return_decision_function=True)
        kernel_linear_svm_pred_y, \
        kernel_linear_svm_pred_score = kernel_linear_svm.predict(
            self.dataset.X, return_decision_function=True)

        # check prediction
        self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)

        self.logger.info("Training both classifiers on sparse data")
        linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)
        kernel_linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)

        self.assertTrue(
            linear_svm.w.issparse, "Weights vector is not sparse even "
            "if training data is sparse")

        linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict(
            self.dataset_sparse.X, return_decision_function=True)
        kernel_linear_svm_pred_y, \
        kernel_linear_svm_pred_score = kernel_linear_svm.predict(
            self.dataset_sparse.X, return_decision_function=True)

        # check prediction
        self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)
Exemplo n.º 5
0
class TestCPerfEvaluator(CUnitTest):
    """Unit test for CKernel."""
    def setUp(self):

        # Create dummy dataset (we want a test different from train)
        loader = CDLRandom(random_state=50000)
        self.training_dataset = loader.load()
        self.test_dataset = loader.load()

        # CREATE CLASSIFIERS
        kernel = CKernel.create('rbf')
        self.svm = CClassifierSVM(kernel=kernel)
        self.svm.verbose = 1

        self.logger.info("Using kernel {:}".format(self.svm.kernel.class_type))

    def test_parameters_setting(self):

        # Changing default parameters to be sure are not used
        self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1, 'n_jobs': 2})

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]}

        # DO XVAL FOR CHOOSE BEST PARAMETERS
        xval_splitter = CDataSplitter.create('kfold',
                                             num_folds=5,
                                             random_state=50000)

        # Set the best parameters inside the classifier
        self.svm.estimate_parameters(self.training_dataset, xval_parameters,
                                     xval_splitter, 'accuracy')

        self.logger.info("SVM has now the following parameters: {:}".format(
            self.svm.get_params()))

        self.assertEqual(self.svm.get_params()['C'], 1)
        self.assertEqual(self.svm.get_params()['kernel.gamma'], 50)

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter,
                                       CMetric.create('accuracy'))
        perf_eval.verbose = 1

        best_params, best_score = perf_eval.evaluate_params(
            self.svm, self.training_dataset, xval_parameters)

        for param in xval_parameters:
            self.logger.info("Best '{:}' is: {:}".format(
                param, best_params[param]))
            self.assertEqual(best_params[param], self.svm.get_params()[param])

        self.svm.verbose = 0

        parameters_combination = [[1, 1], [1, 50], [10, 1], [10, 50], [100, 1],
                                  [100, 50]]
        par_comb_score = CArray.zeros(len(parameters_combination))
        for comb in range(len(parameters_combination)):

            this_fold_score = []
            num_xval_fold = len(xval_splitter.tr_idx)

            for f in range(num_xval_fold):
                self.svm.set("C", parameters_combination[comb][0])
                self.svm.kernel.gamma = parameters_combination[comb][1]

                self.svm.fit(
                    self.training_dataset[xval_splitter.tr_idx[f], :].X,
                    self.training_dataset[xval_splitter.tr_idx[f], :].Y)

                this_fold_predicted = self.svm.predict(
                    self.training_dataset[xval_splitter.ts_idx[f], :].X)

                this_fold_accuracy = skm.accuracy_score(
                    self.training_dataset[
                        xval_splitter.ts_idx[f], :].Y.get_data(),
                    this_fold_predicted.get_data())
                this_fold_score.append(this_fold_accuracy)

            par_comb_score[comb] = (np.mean(this_fold_score))
            self.logger.info("this fold mean: {:}".format(
                par_comb_score[comb]))

        max_combination_score = par_comb_score.max()
        better_param_comb = parameters_combination[par_comb_score.argmax()]
        self.logger.info("max combination score founded here: {:}".format(
            max_combination_score))
        self.logger.info(
            "max comb score founded during xval {:}".format(best_score))

        self.assertEqual(max_combination_score, best_score)

        # set parameters found by xval and check if are the same chosen here
        self.logger.info("the parameters selected by own xval are:")
        self.svm.set_params(best_params)
        self.logger.info("C: {:}".format(self.svm.C))
        self.logger.info("kernel.gamma: {:}".format(self.svm.kernel.gamma))
        # check c
        self.assertEqual(better_param_comb[0], self.svm.C)
        # check gamma
        self.assertEqual(better_param_comb[1], self.svm.kernel.gamma)

    def test_nan_metric_value(self):

        # Changing default parameters to be sure are not used
        self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1})
        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]}

        # DO XVAL FOR CHOOSE BEST PARAMETERS
        xval_splitter = CDataSplitter.create('kfold',
                                             num_folds=5,
                                             random_state=50000)

        self.logger.info("Testing metric with some nan")

        some_nan_metric = CMetricFirstNan()

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter, some_nan_metric)
        perf_eval.verbose = 1

        best_params, best_score = perf_eval.evaluate_params(
            self.svm, self.training_dataset, xval_parameters, pick='last')

        self.logger.info("best score : {:}".format(best_score))

        # The xval should select the only one actual value (others are nan)
        self.assertEqual(best_score, 1.)

        self.logger.info("Testing metric with all nan")

        # This test case involves an all-nan slice
        self.logger.filterwarnings(action="ignore",
                                   message="All-NaN slice encountered",
                                   category=RuntimeWarning)

        all_nan_metric = CMetricAllNan()

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter, all_nan_metric)
        perf_eval.verbose = 1

        with self.assertRaises(ValueError):
            perf_eval.evaluate_params(self.svm,
                                      self.training_dataset,
                                      xval_parameters,
                                      pick='last')

    def _run_multiclass(self, tr, multiclass, xval_params, expected_best):

        xval_splitter = CDataSplitter.create('kfold',
                                             num_folds=3,
                                             random_state=50000)

        # Set the best parameters inside the classifier
        best_params = multiclass.estimate_parameters(tr, xval_params,
                                                     xval_splitter, 'accuracy')

        self.logger.info(
            "Multiclass SVM has now the following parameters: {:}".format(
                multiclass.get_params()))

        for clf_idx, clf in enumerate(multiclass._binary_classifiers):
            self.assertEqual(clf.C, expected_best['C'])
            self.assertEqual(clf.kernel.gamma, expected_best['kernel.gamma'])

        # Final test: fit using best parameters
        multiclass.fit(tr.X, tr.Y)

        for clf in multiclass._binary_classifiers:
            for param in best_params:
                self.assertEqual(clf.get_params()[param], best_params[param])

    def test_params_multiclass(self):
        """Parameter estimation for multiclass classifiers."""
        # Create dummy dataset (we want a test different from train)
        tr = CDLRandom(n_classes=4, n_clusters_per_class=1,
                       random_state=50000).load()

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(CClassifierSVM,
                                              C=1,
                                              kernel=kernel)
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': 10.0, 'kernel.gamma': 0.1}

        self._run_multiclass(tr, multiclass, xval_parameters, expected)

        self.logger.info("Testing with preprocessor")

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(CClassifierSVM,
                                              C=1,
                                              kernel=kernel,
                                              preprocess='min-max')
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': 10.0, 'kernel.gamma': 0.1}

        self._run_multiclass(tr, multiclass, xval_parameters, expected)
Exemplo n.º 6
0
# metric = CMetricAccuracy()
# acc = metric.performance_score(y_true = ds_te_secml.Y, y_pred = preds)
# print("Accuracy on test set: {:.2%}".format(acc))
# probs = secml_sklearn_clf.predict_proba(ds_te_secml.X)       #Doesn't work
#
# #sklearn here isn't supported for performing adversarial attacks, only the native SVM of secml supports adversarial attacks
# ###############################################################
#
# =============================================================================
x, y = ds_te_secml[:, :].X, ds_te_secml[:, :].Y  # This won't work if we want to specify the target
#class for each example

#secml_clf = CClassifierMulticlassOVA(CClassifierSVM, kernel = CKernelRBF(gamma = 10), C = 1)
secml_clf = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)
secml_clf.fit(ds_tr_secml)
preds = secml_clf.predict(ds_te_secml.X)
metric = CMetricAccuracy()
acc = metric.performance_score(y_true=ds_te_secml.Y, y_pred=preds)
print("Accuracy on test set: {:.2%}".format(acc))

#Performing the attack
noise_type = 'l2'
dmax = 0.4
lb, ub = None, None  # with 0, 1 it goes out of bounds
y_target = None  #### Here y_target can be some class, indicating which class is expected for the adversarial example

#solver_params = {
#    'eta': 0.3,
#    'max_iter': 100,
#    'eps': 1e-4
#}
Exemplo n.º 7
0
class TestCLossRegression(CUnitTest):
    """Unittests for CLossRegression and subclasses."""
    def setUp(self):

        self.ds = CDLRandom(n_samples=50, random_state=0).load()

        self.logger.info("Train an SVM and classify dataset...")
        self.svm = CClassifierSVM()
        self.svm.fit(self.ds.X, self.ds.Y)
        self.labels, self.scores = self.svm.predict(
            self.ds.X, return_decision_function=True)

    def test_in_out(self):
        """Unittest for input and output to loss classes"""
        def _check_loss(l, n_samples):

            self.assertIsInstance(l, CArray)
            self.assertTrue(l.isdense)
            self.assertEqual(1, l.ndim)
            self.assertEqual(n_samples, l.size)
            self.assertIsSubDtype(l.dtype, float)

        for loss_id in ('e-insensitive', 'e-insensitive-squared', 'quadratic'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel())
            loss_mean_pos = loss_pos.mean()
            self.logger.info(
                "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format(
                    loss_class.__class__.__name__, loss_mean_pos))
            _check_loss(loss_pos, self.ds.Y.size)

            loss = loss_class.loss(self.ds.Y[0], self.scores[0, 1].ravel())
            loss_mean = loss.mean()
            self.logger.info(
                "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format(
                    loss_class.__class__.__name__, loss_mean))
            _check_loss(loss, 1)

            with self.assertRaises(ValueError):
                loss_class.loss(self.ds.Y, self.scores[:, 1])

    def test_draw(self):
        """Drawing the loss functions.

        Inspired by: https://en.wikipedia.org/wiki/Loss_functions_for_classification

        """
        fig = CFigure()
        x = CArray.arange(-1, 3.01, 0.01)

        for loss_id in ('e-insensitive', 'e-insensitive-squared', 'quadratic'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)
            fig.sp.plot(x, loss_class.loss(CArray([1]), x), label=loss_id)

        fig.sp.grid()
        fig.sp.legend()

        fig.show()
training.X = normalizer.fit_transform(training.X)
validation.X = normalizer.transform(validation.X)
test.X = normalizer.transform(test.X)

# Metric to use for training and performance evaluation
metric = CMetricAccuracy()

# Creation of the multiclass classifier
classifier = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)

# We can now fit the classifier
classifier.fit(training.X, training.Y)
print("Training of classifier complete!")

# Compute predictions on a test set
predictionY = classifier.predict(test.X)

# Bounds of the attack space. Can be set to `None` for unbounded
lowerBound, upperBound = validation.X.min(), validation.X.max()

# Should be chosen depending on the optimization problem
solver_params = {
    'eta': 0.05,
    'eta_min': 0.05,
    'eta_max': None,
    'max_iter': 100,
    'eps': 1e-6
}

poisonAttack = CAttackPoisoningSVM(classifier=classifier,
                                   training_data=training,
#train classifier
print("start training")
clf_lin.fit(data_smp_encoded_secML)
#print("linear training ended, begining rbf")
#clf_rbf.fit(tr)
#print("start linear classif")
#clf_l.fit(data_smp_encoded_secML)

print("Classifiers trained")

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()

# Compute predictions on a test set
y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X)
#y_rbf_pred = clf_rbf.predict(ts.X)
#y_l_pred = clf_l.predict(raw_data_encoded_secML.X)

# Evaluate the accuracy of the classifier
acc_lin = metric.performance_score(y_true=raw_data_encoded_secML.Y,
                                   y_pred=y_lin_pred)
#acc_rbf = metric.performance_score(y_true=ts.Y, y_pred=y_rbf_pred)
#acc_rbf = 0.0
#acc_l = metric.performance_score(y_true=raw_data_encoded_secML.Y, y_pred=y_l_pred)

print("Performance evaluations ended:")
print(acc_lin)
#print(acc_rbf)
#print(acc_l)
clf_lin.fit(tr_set)

## Select and set the best training parameters for the linear classifier
#print("Estimating the best training parameters for linear kernel...")
#best_lin_params = clf_l.estimate_parameters(
#    dataset=tr_set,
#    parameters=xval_lin_params,
#    splitter=xval_splitter,
#    metric='accuracy',
#    perf_evaluator='xval'
#)

#clf_l.fit(tr_set)

# Compute predictions on a test set
y_pred = clf_lin.predict(ts_set.X)

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()

# Evaluate the accuracy of the classifier
acc = metric.performance_score(y_true=ts_set.Y, y_pred=y_pred)

print("Accuracy on test set: {:.2%}".format(acc))

import random
from secml.adv.attacks.evasion import CAttackEvasionPGD
#perform adversarial attacks
noise_type = 'l2'  # Type of perturbation 'l1' or 'l2'
#dmax = 20  # Maximum perturbation
Exemplo n.º 11
0
class TestCLossClassification(CUnitTest):
    """Unittests for CLossClassification and subclasses."""
    def setUp(self):

        self.ds = CDLRandom(n_samples=50, random_state=0).load()

        self.logger.info("Train an SVM and classify dataset...")
        self.svm = CClassifierSVM()
        self.svm.fit(self.ds)
        self.labels, self.scores = self.svm.predict(
            self.ds.X, return_decision_function=True)

    def test_one_at_zero(self):
        """Testing that classification loss return 1 for input 0."""

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            self.assertEqual(CArray([1.0]),
                             loss_class.loss(CArray([1]), CArray([0])))

    def test_in_out(self):
        """Unittest for input and output to loss classes"""
        def _check_loss(l, n_samples):

            self.assertIsInstance(l, CArray)
            self.assertTrue(l.isdense)
            self.assertEqual(1, l.ndim)
            self.assertEqual(n_samples, l.size)
            self.assertEqual(l.dtype, float)

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            loss = loss_class.loss(self.ds.Y, self.scores)
            loss_mean = loss.mean()
            self.logger.info("{:}.loss(y_true, scores).mean():\n{:}".format(
                loss_class.__class__.__name__, loss_mean))
            _check_loss(loss, self.ds.Y.size)

            loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel())
            loss_mean_pos = loss_pos.mean()
            self.logger.info(
                "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format(
                    loss_class.__class__.__name__, loss_mean_pos))
            _check_loss(loss_pos, self.ds.Y.size)

            self.assertEqual(loss_mean, loss_mean_pos)

            loss = loss_class.loss(self.ds.Y, self.scores, pos_label=0)
            loss_mean = loss.mean()
            self.logger.info(
                "{:}.loss(y_true, scores, pos_label=0).mean():\n{:}".format(
                    loss_class.__class__.__name__, loss_mean))
            _check_loss(loss, self.ds.Y.size)

            loss_neg = loss_class.loss(self.ds.Y, self.scores[:, 0].ravel())
            loss_mean_neg = loss_neg.mean()
            self.logger.info(
                "{:}.loss(y_true, scores[:,0].ravel()).mean():\n".format(
                    loss_class.__class__.__name__, loss_mean_neg))
            _check_loss(loss_neg, self.ds.Y.size)

            self.assertEqual(loss_mean, loss_mean_neg)

            loss = loss_class.loss(self.ds.Y[0], self.scores[0, :])
            loss_mean = loss.mean()
            self.logger.info(
                "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format(
                    loss_class.__class__.__name__, loss_mean))
            _check_loss(loss, 1)

    def test_draw(self):
        """Drawing the loss functions.

        Inspired by: https://en.wikipedia.org/wiki/Loss_functions_for_classification

        """
        fig = CFigure()
        x = CArray.arange(-1, 3.01, 0.01)

        fig.sp.plot(x,
                    CArray([1 if i <= 0 else 0 for i in x]),
                    label='0-1 indicator')

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)
            fig.sp.plot(x, loss_class.loss(CArray([1]), x), label=loss_id)

        fig.sp.grid()
        fig.sp.legend()

        fig.show()

    def test_grad(self):
        """Compare analytical gradients with its numerical approximation."""
        def _loss_wrapper(scores, loss, true_labels):
            return loss.loss(true_labels, scores)

        def _dloss_wrapper(scores, loss, true_labels):
            return loss.dloss(true_labels, scores)

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):
            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            n_elemes = 1
            y_true = CArray.randint(0, 2, n_elemes).todense()
            score = CArray.randn((n_elemes, ))

            check_grad_val = CFunction(
                _loss_wrapper, _dloss_wrapper).check_grad(score,
                                                          1e-8,
                                                          loss=loss_class,
                                                          true_labels=y_true)
            self.logger.info(
                "Gradient difference between analytical svm "
                "gradient and numerical gradient: %s", str(check_grad_val))
            self.assertLess(
                check_grad_val, 1e-4,
                "the gradient is wrong {:} for {:} loss".format(
                    check_grad_val, loss_id))
Exemplo n.º 12
0
class TestCRoc(CUnitTest):
    """Unit test for CPlotMetric (ROC plots)."""
    def setUp(self):

        self.ds_loader = CDLRandom(n_features=1000,
                                   n_redundant=200,
                                   n_informative=250,
                                   n_clusters_per_class=2)
        self.ds1 = self.ds_loader.load()
        self.ds2 = self.ds_loader.load()

        self.y1 = self.ds1.Y
        self.y2 = self.ds2.Y

        self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y)

        _, self.s1 = self.svm.predict(self.ds1.X,
                                      return_decision_function=True)
        _, self.s2 = self.svm.predict(self.ds2.X,
                                      return_decision_function=True)

        self.s1 = self.s1[:, 1].ravel()
        self.s2 = self.s2[:, 1].ravel()

        # Roc with not computed average (2 repetitions)
        self.roc_nomean = CRoc()
        self.roc_nomean.compute([self.y1, self.y2], [self.s1, self.s2])

        # Roc with average (2 repetitions)
        self.roc_wmean = CRoc()
        self.roc_wmean.compute([self.y1, self.y2], [self.s1, self.s2])
        self.roc_wmean.average()

    def test_standard(self):
        """Plot of standard ROC."""

        # Testing without input CFigure
        roc_plot = CFigure()
        roc_plot.sp.title('ROC Curve Standard')
        # Plotting 2 times (to show multiple curves)
        # add one curve for repetition and call it rep 0 and rep 1 of roc 1
        roc_plot.sp.plot_roc(self.roc_wmean.mean_fpr, self.roc_wmean.mean_tpr)

        roc_plot.show()

    def test_mean(self):
        """Plot of average ROC."""

        # Testing without input CFigure
        roc_plot = CFigure()
        roc_plot.sp.title('ROC Curve')
        # Plotting 2 times (to show 2 curves)
        roc_plot.sp.plot_roc_mean(self.roc_wmean,
                                  label='roc1 mean',
                                  plot_std=True)
        roc_plot.sp.plot_roc_reps(self.roc_wmean, label='roc1')

        roc_plot.show()

        # Testing mean plot with no average
        with self.assertRaises(ValueError):
            roc_plot.sp.plot_roc_mean(self.roc_nomean)

    def test_custom_params(self):
        """Plot of ROC altering default parameters."""

        # Testing without input CFigure
        roc_plot = CFigure()
        roc_plot.sp.title('ROC Curve - Custom')
        roc_plot.sp.xlim(0.1, 100)
        roc_plot.sp.ylim(30, 100)
        roc_plot.sp.yticks([70, 80, 90, 100])
        roc_plot.sp.yticklabels(['70', '80', '90', '100'])
        # Plotting 2 times (to show 2 curves)
        roc_plot.sp.plot_roc_mean(self.roc_wmean, label='roc1')
        roc_plot.sp.plot_roc_mean(self.roc_wmean, label='roc2')

        roc_plot.show()

    def test_single(self):
        """Plot of ROC repetitions."""

        # Testing without input CFigure
        roc_plot = CFigure()
        roc_plot.sp.title('ROC Curve Repetitions')
        # Plotting 2 times (to show multiple curves)
        # add one curve for repetition and call it rep 0 and rep 1 of roc 1
        roc_plot.sp.plot_roc_reps(self.roc_nomean, label='roc1')
        # add one curve for repetition and call it rep 0 and rep 1 of roc 2
        roc_plot.sp.plot_roc_reps(self.roc_nomean, label='roc2')

        roc_plot.show()

    def test_compare_sklearn(self):

        import numpy as np

        from sklearn import svm, datasets
        from sklearn.metrics import roc_curve, auc
        from sklearn.model_selection import StratifiedKFold

        from secml.figure import CFigure
        roc_fig = CFigure(width=12)

        # import some data to play with
        iris = datasets.load_iris()
        X = iris.data
        y = iris.target
        X, y = X[y != 2], y[y != 2]
        n_samples, n_features = X.shape

        # Add noisy features
        random_state = np.random.RandomState(0)
        X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

        # Classification and ROC analysis

        # Run classifier with cross-validation and plot ROC curves
        classifier = svm.SVC(kernel='linear',
                             probability=True,
                             random_state=random_state)

        roc_fig.subplot(1, 2, 1)

        mean_tpr = 0.0
        mean_fpr = np.linspace(0, 1, 1000)

        cv = StratifiedKFold(n_splits=6)
        for i, (train, test) in enumerate(cv.split(X, y)):
            probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
            # Compute ROC curve and area the curve
            fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
            mean_tpr += np.interp(mean_fpr, fpr, tpr)
            mean_tpr[0] = 0.0
            roc_auc = auc(fpr, tpr)
            roc_fig.sp.plot(fpr,
                            tpr,
                            linewidth=1,
                            label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

        roc_fig.sp.plot([0, 1], [0, 1],
                        '--',
                        color=(0.6, 0.6, 0.6),
                        label='Luck')

        mean_tpr /= cv.get_n_splits()
        mean_tpr[-1] = 1.0
        mean_auc = auc(mean_fpr, mean_tpr)

        roc_fig.sp.plot(mean_fpr,
                        mean_tpr,
                        'k--',
                        label='Mean ROC (area = %0.2f)' % mean_auc,
                        linewidth=2)

        roc_fig.sp.xlim([-0.05, 1.05])
        roc_fig.sp.ylim([-0.05, 1.05])
        roc_fig.sp.xlabel('False Positive Rate')
        roc_fig.sp.ylabel('True Positive Rate')
        roc_fig.sp.title('Sklearn Receiver operating characteristic example')
        roc_fig.sp.legend(loc="lower right")
        roc_fig.sp.grid()

        self.logger.info("Plotting using our CPLotRoc")

        roc_fig.subplot(1, 2, 2)

        score = []
        true_y = []
        for i, (train, test) in enumerate(cv.split(X, y)):
            probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
            true_y.append(CArray(y[test]))
            score.append(CArray(probas_[:, 1]))

        self.roc_wmean = CRoc()
        self.roc_wmean.compute(true_y, score)
        fp, tp = self.roc_wmean.average()

        roc_fig.sp.plot([0, 100], [0, 100],
                        '--',
                        color=(0.6, 0.6, 0.6),
                        label='Luck')

        roc_fig.sp.xticks([0, 20, 40, 60, 80, 100])
        roc_fig.sp.xticklabels(['0', '20', '40', '60', '80', '100'])

        roc_fig.sp.plot_roc_mean(self.roc_wmean,
                                 plot_std=True,
                                 logx=False,
                                 style='go-',
                                 label='Mean ROC (area = %0.2f)' %
                                 (auc(fp.tondarray(), tp.tondarray())))

        roc_fig.sp.xlim([-0.05 * 100, 1.05 * 100])
        roc_fig.sp.ylim([-0.05 * 100, 1.05 * 100])
        roc_fig.sp.title('SecML Receiver operating characteristic example')
        roc_fig.sp.legend(loc="lower right")
        roc_fig.show()
xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

# Select and set the best training parameters for the classifier
print("Estimating the best training parameters...")
best_params = clf.estimate_parameters(dataset=tr,
                                      parameters=xval_params,
                                      splitter=xval_splitter,
                                      metric='accuracy',
                                      perf_evaluator='xval')
print("The best training parameters are: ", best_params)

# We can now fit the classifier
clf.fit(tr)

# Compute predictions on a test set
y_pred = clf.predict(ts.X)

# Evaluate the accuracy of the classifier
acc = metric.performance_score(y_true=ts.Y, y_pred=y_pred)

print("Accuracy on test set: {:.2%}".format(acc))

x0, y0 = ts[5, :].X, ts[5, :].Y  # Initial sample; add randomness?
print(x0.dtype)
print(y0.dtype)

noise_type = 'l2'  # Type of perturbation 'l1' or 'l2'
dmax = 0.4  # Maximum perturbation
lb, ub = 0, 1  # Bounds of the attack space. Can be set to `None` for unbounded
y_target = None  # None if `error-generic` or a class label for `error-specific`
Exemplo n.º 14
0
training_data = CDataset(x_train, y)
validation_data = CDataset(x_val, y_val)
test_data = CDataset(xtt, ytt)

del xtr
del ytr

metric = CMetricAccuracy()

clf = CClassifierSVM(kernel=CKernelRBF(gamma=GAMMA), C=C)

# We can now fit the classifier
clf.fit(training_data.X, training_data.Y)
print("Training of classifier complete!")
# Compute predictions on a test set
y_pred = clf.predict(test_data.X)

lb, ub = validation_data.X.min(), validation_data.X.max(
)  # Bounds of the attack space. Can be set to `None` for unbounded
n_poisoning_points = int(
    n_tr * poison_percentage)  # Number of poisoning points to generate

# Should be chosen depending on the optimization problem
solver_params = {
    'eta': 0.05,
    'eta_min': 0.05,
    'eta_max': None,
    'max_iter': 100,
    'eps': 1e-6
}
# Non-adaptive attacker #################################################################################