Exemplo n.º 1
0
    def _create_clf(dnn):
        """Initialize the DNR classifier passing a single `layer_clf`"""
        layers = ['conv2', 'relu']
        combiner = CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1)
        layer_clf = CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1)

        return CClassifierDNR(combiner, layer_clf, dnn, layers, -inf)
Exemplo n.º 2
0
    def test_grad_tr_params_linear(self):
        """Test `grad_tr_params` on a linear classifier."""

        for n in (None, CNormalizerMinMax((-10, 10))):
            clf = CClassifierSVM(store_dual_vars=True, preprocess=n)
            clf.fit(self.ds.X, self.ds.Y)
            self._test_grad_tr_params(clf)
Exemplo n.º 3
0
    def test_alignment(self):

        ds = CDLRandom(n_samples=100,
                       n_features=500,
                       n_redundant=0,
                       n_informative=10,
                       n_clusters_per_class=1,
                       random_state=0).load()

        self.logger.info("Train Sec SVM")
        sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-2, lb=-0.1, ub=0.5)
        sec_svm.verbose = 2
        sec_svm.fit(ds.X, ds.Y)

        self.logger.info("Train SVM")
        svm = CClassifierSVM(C=1)
        svm.fit(ds.X, ds.Y)

        self._compute_alignment(ds, sec_svm, svm)

        svm_pred = sec_svm.predict(ds.X)
        secsvm_pred = sec_svm.predict(ds.X)

        self.logger.info("SVM pred:\n{:}".format(svm_pred))
        self.logger.info("Sec-SVM pred:\n{:}".format(secsvm_pred))

        self.assert_array_almost_equal(secsvm_pred, svm_pred)
Exemplo n.º 4
0
    def test_grad_tr_params_nonlinear(self):
        """Test `grad_tr_params` on a nonlinear classifier."""

        for n in (None, CNormalizerMinMax((-10, 10))):
            clf = CClassifierSVM(kernel='rbf', preprocess=n)
            clf.fit(self.ds.X, self.ds.Y)
            self._test_grad_tr_params(clf)
Exemplo n.º 5
0
    def test_performance(self):
        """ Compare the classifiers performance"""
        self.logger.info("Testing error performance of the "
                         "classifiers on the training set")

        for sgd in self.sgds:

            self.logger.info("SGD kernel: {:}".format(sgd.preprocess))

            if sgd.preprocess is not None:
                k = sgd.preprocess.deepcopy()
            else:
                k = None
            svm = CClassifierSVM(kernel=k)

            svm.fit(self.dataset.X, self.dataset.Y)

            label_svm, y_svm = svm.predict(self.dataset.X,
                                           return_decision_function=True)
            label_sgd, y_sgd = sgd.predict(self.dataset.X,
                                           return_decision_function=True)

            acc_svm = CMetric.create('f1').performance_score(
                self.dataset.Y, label_svm)
            acc_sgd = CMetric.create('f1').performance_score(
                self.dataset.Y, label_sgd)

            self.logger.info("Accuracy of SVM: {:}".format(acc_svm))
            self.assertGreater(acc_svm, 0.90,
                               "Accuracy of SVM: {:}".format(acc_svm))
            self.logger.info("Accuracy of SGD: {:}".format(acc_sgd))
            self.assertGreater(acc_sgd, 0.90,
                               "Accuracy of SGD: {:}".format(acc_sgd))
Exemplo n.º 6
0
    def setUp(self):

        self.ds_loader = CDLRandom(n_features=1000,
                                   n_redundant=200,
                                   n_informative=250,
                                   n_clusters_per_class=2)
        self.ds1 = self.ds_loader.load()
        self.ds2 = self.ds_loader.load()

        self.y1 = self.ds1.Y
        self.y2 = self.ds2.Y

        self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y)

        _, self.s1 = self.svm.predict(self.ds1.X,
                                      return_decision_function=True)
        _, self.s2 = self.svm.predict(self.ds2.X,
                                      return_decision_function=True)

        self.s1 = self.s1[:, 1].ravel()
        self.s2 = self.s2[:, 1].ravel()

        # Roc with not computed average (2 repetitions)
        self.roc_nomean = CRoc()
        self.roc_nomean.compute([self.y1, self.y2], [self.s1, self.s2])

        # Roc with average (2 repetitions)
        self.roc_wmean = CRoc()
        self.roc_wmean.compute([self.y1, self.y2], [self.s1, self.s2])
        self.roc_wmean.average()
Exemplo n.º 7
0
class TestCPlot(CUnitTest):
    """Unit test for TestCPlot."""
    def setUp(self):
        self.clf = CClassifierSVM()
        self.dataset = CDLRandom(n_features=2,
                                 n_redundant=0,
                                 n_informative=1,
                                 n_clusters_per_class=1).load()
        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
        self.clf.fit(self.dataset.X, self.dataset.Y)

    def test_fun(self):
        """Test for CPlotFunction.plot_fun method."""
        fig = CFigure()
        fig.sp.plot_ds(self.dataset)

        fig.sp.plot_fun(self.clf.decision_function, y=1)
        fig.show()

    def test_fgrads(self):
        """Test for CPlotFunction.plot_fgrads method."""
        fig = CFigure()
        fig.sp.plot_ds(self.dataset)

        fig.sp.plot_fun(self.clf.decision_function, y=1)
        fig.sp.plot_fgrads(lambda x: self.clf.grad_f_x(x, y=1))
        fig.show()
Exemplo n.º 8
0
    def test_performance(self):
        """ Compare the classifiers performance"""
        self.logger.info("Testing error performance of the "
                         "classifiers on the training set")

        for ridge in self.ridges:
            self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess))

            if ridge.preprocess is not None:
                svm_kernel = ridge.preprocess.deepcopy()
            else:
                svm_kernel = None

            svm = CClassifierSVM(kernel=svm_kernel)
            svm.fit(self.dataset.X, self.dataset.Y)

            label_svm, y_svm = svm.predict(
                self.dataset.X, return_decision_function=True)
            label_ridge, y_ridge = ridge.predict(
                self.dataset.X, return_decision_function=True)

            acc_svm = CMetric.create('f1').performance_score(
                self.dataset.Y, label_svm)
            acc_ridge = CMetric.create('f1').performance_score(
                self.dataset.Y, label_ridge)

            self.logger.info("Accuracy of SVM: {:}".format(acc_svm))
            self.assertGreater(acc_svm, 0.90,
                               "Accuracy of SVM: {:}".format(acc_svm))
            self.logger.info("Accuracy of ridge: {:}".format(acc_ridge))
            self.assertGreater(acc_ridge, 0.90,
                               "Accuracy of ridge: {:}".format(acc_ridge))
Exemplo n.º 9
0
 def setUp(self):
     self.clf = CClassifierSVM()
     self.dataset = CDLRandom(n_features=2,
                              n_redundant=0,
                              n_informative=1,
                              n_clusters_per_class=1).load()
     self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
     self.clf.fit(self.dataset.X, self.dataset.Y)
Exemplo n.º 10
0
    def setUp(self):

        self.clf = CClassifierSVM()
        # 100 samples, 2 classes, 20 features
        self.ds = CDLDigits(class_list=[0, 1], zero_one=True).load()

        # Training classifier
        self.clf.fit(self.ds.X, self.ds.Y)
        self.explainer = CExplainerGradient(self.clf)
Exemplo n.º 11
0
    def setUp(self):

        self.ds = CDLRandom(n_samples=50, random_state=0).load()

        self.logger.info("Train an SVM and classify dataset...")
        self.svm = CClassifierSVM()
        self.svm.fit(self.ds.X, self.ds.Y)
        self.labels, self.scores = self.svm.predict(
            self.ds.X, return_decision_function=True)
Exemplo n.º 12
0
class TestCRoc(CUnitTest):
    """Unit test for CRoc."""
    def setUp(self):

        self.dl1 = CDLRandom(n_features=1000,
                             n_redundant=200,
                             n_informative=250,
                             n_clusters_per_class=2,
                             random_state=0)
        self.dl2 = CDLRandom(n_features=1000,
                             n_redundant=200,
                             n_informative=250,
                             n_clusters_per_class=2,
                             random_state=1000)
        self.ds1 = self.dl1.load()
        self.ds2 = self.dl2.load()

        self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y)

        self.y1, self.s1 = self.svm.predict(self.ds1.X,
                                            return_decision_function=True)
        self.y2, self.s2 = self.svm.predict(self.ds2.X,
                                            return_decision_function=True)

        self.roc = CRoc()

    def test_roc_1sample(self):

        self.roc.compute(CArray([1]), CArray([0]))
        self.roc.average()

        # Testing 3 and not 1 as roc is bounded (we add a first and last point)
        self.assertEqual(self.roc.fpr.size, 3)
        self.assertEqual(self.roc.tpr.size, 3)

    def test_compute(self):

        self.roc.compute(self.ds1.Y, self.s1[:, 1].ravel())

        fig = CFigure()
        fig.sp.semilogx(self.roc.fpr, self.roc.tpr)
        fig.sp.grid()
        fig.show()

    def test_mean(self):

        self.roc.compute([self.ds1.Y, self.ds2.Y],
                         [self.s1[:, 1].ravel(), self.s2[:, 1].ravel()])
        mean_fp, mean_tp, mean_std = self.roc.average(return_std=True)
        fig = CFigure(linewidth=2)
        fig.sp.errorbar(self.roc.mean_fpr, self.roc.mean_tpr, yerr=mean_std)
        for rep in range(self.roc.n_reps):
            fig.sp.semilogx(self.roc.fpr[rep], self.roc.tpr[rep])
        fig.sp.semilogx(mean_fp, mean_tp)
        fig.sp.grid()
        fig.show()
    def setUp(self):

        self.classifier = CClassifierSVM(kernel='linear', C=1.0)

        self.lb = -2
        self.ub = +2

        n_tr = 20
        n_ts = 10
        n_features = 2

        n_reps = 1

        self.sec_eval = []
        self.attack_ds = []
        for rep_i in range(n_reps):
            self.logger.info(
                "Loading `random_blobs` with seed: {:}".format(rep_i))
            loader = CDLRandomBlobs(n_samples=n_tr + n_ts,
                                    n_features=n_features,
                                    centers=[(-0.5, -0.5), (+0.5, +0.5)],
                                    center_box=(-0.5, 0.5),
                                    cluster_std=0.5,
                                    random_state=rep_i * 100 + 10)
            ds = loader.load()

            self.tr = ds[:n_tr, :]
            self.ts = ds[n_tr:, :]

            self.classifier.fit(self.tr.X, self.tr.Y)

            # only manipulate positive samples, targeting negative ones
            self.y_target = None
            self.attack_classes = CArray([1])

            for create_fn in (self._attack_pgd_ls, self._attack_cleverhans):
                # TODO: REFACTOR THESE UNITTESTS REMOVING THE FOR LOOP

                try:
                    import cleverhans
                except ImportError:
                    continue

                self.attack_ds.append(self.ts)
                attack, param_name, param_values = create_fn()
                # set sec eval object
                self.sec_eval.append(
                    CSecEval(
                        attack=attack,
                        param_name=param_name,
                        param_values=param_values,
                    ))
Exemplo n.º 14
0
    def setUp(self):

        # Create dummy dataset (we want a test different from train)
        loader = CDLRandom(random_state=50000)
        self.training_dataset = loader.load()
        self.test_dataset = loader.load()

        # CREATE CLASSIFIERS
        kernel = CKernel.create('rbf')
        self.svm = CClassifierSVM(kernel=kernel)
        self.svm.verbose = 1

        self.logger.info("Using kernel {:}".format(self.svm.kernel.class_type))
Exemplo n.º 15
0
    def _prepare_tree_nonlinear_svm(self, sparse, seed):
        """Preparare the data required for attacking a TREE classifier with
        surrogate NONLINEAR SVM.

        - load a blob 2D dataset
        - create a decision tree classifier
        - create a surrogate SVM with RBF kernel (C=1, gamma=1)

        Parameters
        ----------
        sparse : bool
        seed : int or None

        Returns
        -------
        ds : CDataset
        clf : CClassifierDecisionTree
        clf_surr : CClassifierSVM

        """
        ds = self._load_blobs(
            n_feats=2,  # Number of dataset features
            n_clusters=2,  # Number of dataset clusters
            sparse=sparse,
            seed=seed)

        clf = CClassifierDecisionTree(random_state=seed)
        clf_surr = CClassifierSVM(kernel='rbf', C=1)

        return ds, clf, clf_surr
Exemplo n.º 16
0
    def test_explanation_svm_rbf(self):
        self._clf = CClassifierSVM(kernel=CKernelRBF(gamma=0.01), C=10)
        self._clf.kernel.gamma = 0.01
        self._clf.store_dual_vars = True
        self._clf_idx = 'rbf-svm'

        self._test_explanation_simple_clf()
Exemplo n.º 17
0
    def setUp(self):

        # generate synthetic data
        self.dataset = CDLRandom(n_features=2,
                                 n_redundant=0,
                                 n_informative=1,
                                 n_clusters_per_class=1,
                                 random_state=1).load()

        self.dataset_sparse = self.dataset.tosparse()

        kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly)
        self.svms = [
            CClassifierSVM(kernel=kernel() if kernel is not None else None)
            for kernel in kernel_types
        ]
        self.logger.info("Testing SVM with kernel functions: %s",
                         str(kernel_types))

        for svm in self.svms:  # Enabling debug output for each classifier
            svm.verbose = 2

        self.logger.info("." * 50)
        self.logger.info("Number of Patterns: %s",
                         str(self.dataset.num_samples))
        self.logger.info("Features: %s", str(self.dataset.num_features))
Exemplo n.º 18
0
def train_clf(tr):
    clf_ = CClassifierSVM(C=100,
                          kernel="linear")  # CClassifierLogistic(C=100)  #
    clf = LogisticClassifier(clf_)
    print("Training of classifier...")
    clf.fit(tr)
    return clf
Exemplo n.º 19
0
    def _prepare_linear_svm(self, sparse, seed):
        """Preparare the data required for attacking a LINEAR SVM.

        - load a blob 2D dataset
        - create a SVM (C=1) and a minmax preprocessor

        Parameters
        ----------
        sparse : bool
        seed : int or None

        Returns
        -------
        ds : CDataset
        clf : CClassifierSVM

        """
        ds = self._load_blobs(
            n_feats=2,  # Number of dataset features
            n_clusters=2,  # Number of dataset clusters
            sparse=sparse,
            seed=seed)

        normalizer = CNormalizerMinMax(feature_range=(-1, 1))
        clf = CClassifierSVM(C=1.0, preprocess=normalizer)

        return ds, clf
Exemplo n.º 20
0
    def test_time(self):
        """ Compare execution time of ridge and SVM"""
        self.logger.info("Testing training speed of ridge compared to SVM ")

        for ridge in self.ridges:
            self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess))

            svm = CClassifierSVM(ridge.preprocess)

            with self.timer() as t_svm:
                svm.fit(self.dataset.X, self.dataset.Y)
            self.logger.info(
                "Execution time of SVM: {:}".format(t_svm.interval))
            with self.timer() as t_ridge:
                ridge.fit(self.dataset.X, self.dataset.Y)
            self.logger.info(
                "Execution time of ridge: {:}".format(t_ridge.interval))
Exemplo n.º 21
0
    def setUp(self):

        self.clf = CClassifierSVM(C=1.0)

        self.n_tr = 40
        self.n_features = 10
        self.seed = 0

        self.logger.info(
            "Loading `random_blobs` with seed: {:}".format(self.seed))
        self.ds = self._load_blobs(
            self.n_features, 2, sparse=False, seed=self.seed)

        self.tr = self.ds[:self.n_tr, :]
        self.ts = self.ds[self.n_tr:, :]

        self.clf.fit(self.tr.X, self.tr.Y)
Exemplo n.º 22
0
def _test_model_clf():
    """Model for testing `load_model` functionality.

    Pre-saved state will set "C=100" so that we can check
    if state is restored correctly.

    """
    return CClassifierSVM()
Exemplo n.º 23
0
    def test_plot(self):

        ds = CDLRandom(n_samples=100,
                       n_features=2,
                       n_redundant=0,
                       random_state=100).load()

        self.logger.info("Train Sec SVM")
        sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-3, lb=-0.1, ub=0.5)
        sec_svm.verbose = 2
        sec_svm.fit(ds.X, ds.Y)

        self.logger.info("Train SVM")
        svm = CClassifierSVM(C=1)
        svm.fit(ds.X, ds.Y)

        self._compute_alignment(ds, sec_svm, svm)

        fig = CFigure(height=5, width=8)
        fig.subplot(1, 2, 1)
        # Plot dataset points
        fig.sp.plot_ds(ds)
        # Plot objective function
        fig.sp.plot_fun(svm.predict,
                        multipoint=True,
                        plot_background=True,
                        plot_levels=False,
                        n_grid_points=100,
                        grid_limits=ds.get_bounds())
        fig.sp.title("SVM")

        fig.subplot(1, 2, 2)
        # Plot dataset points
        fig.sp.plot_ds(ds)
        # Plot objective function
        fig.sp.plot_fun(sec_svm.predict,
                        multipoint=True,
                        plot_background=True,
                        plot_levels=False,
                        n_grid_points=100,
                        grid_limits=ds.get_bounds())
        fig.sp.title("Sec-SVM")

        fig.show()
class TestCFigure(CUnitTest):
    """Unittest for CFigure."""
    def test_svm(self):

        self.X = CArray([[1, 2], [3, 4], [5, 6], [7, 8]])
        self.Y = CArray([[0], [1], [1], [0]]).ravel()
        self.dataset = CDataset(self.X, self.Y)

        self.classifier = CClassifierSVM(kernel=CKernelRBF())
        self.classifier.fit(self.dataset)

        self.x_min, self.x_max = (self.X[:, [0]].min() - 1,
                                  self.X[:, [0]].max() + 1)
        self.y_min, self.y_max = (self.X[:, [1]].min() - 1,
                                  self.X[:, [1]].max() + 1)

        self.fig = CFigure(height=7,
                           width=10,
                           linewidth=5,
                           fontsize=24,
                           markersize=20)
        self.fig.sp.title("Svm Test")

        self.logger.info("Test plot dataset method...")

        self.fig.sp.plot_ds(self.dataset)

        self.logger.info("Test plot path method...")
        path = CArray([[1, 2], [1, 3], [1.5, 5]])
        self.fig.sp.plot_path(path)

        self.logger.info("Test plot function method...")
        bounds = [(self.x_min, self.x_max), (self.y_min, self.y_max)]
        self.fig.sp.plot_fun(self.classifier.decision_function,
                             plot_levels=False,
                             grid_limits=bounds,
                             y=1)

        self.fig.sp.xlim(self.x_min, self.x_max)
        self.fig.sp.ylim(self.y_min, self.y_max)

        self.fig.show()
class TestCExplainerGradient(CUnitTest):
    """Unittests for CExplainerGradient"""
    def setUp(self):

        self.clf = CClassifierSVM()
        # 100 samples, 2 classes, 20 features
        self.ds = CDLDigits(class_list=[0, 1], zero_one=True).load()

        # Training classifier
        self.clf.fit(self.ds)

        self.explainer = CExplainerGradient(self.clf)

    def test_explain(self):
        """Unittest for explain method."""
        i = 67
        x = self.ds.X[i, :]

        attr = self.explainer.explain(x, y=1)

        self.logger.info("Attributions:\n{:}".format(attr.tolist()))

        self.assertIsInstance(attr, CArray)
        self.assertEqual(attr.shape, attr.shape)

        fig = CFigure(height=3, width=6)

        # Plotting original image
        fig.subplot(1, 2, 1)
        fig.sp.imshow(attr.reshape((8, 8)), cmap='gray')

        th = max(abs(attr.min()), abs(attr.max()))

        # Plotting attributions
        fig.subplot(1, 2, 2)
        fig.sp.imshow(attr.reshape((8, 8)),
                      cmap='seismic',
                      vmin=-1 * th,
                      vmax=th)

        fig.show()
Exemplo n.º 26
0
    def test_draw(self):
        """ Compare the classifiers graphically"""
        self.logger.info("Testing classifiers graphically")

        # generate 2D synthetic data
        dataset = CDLRandom(n_features=2,
                            n_redundant=1,
                            n_informative=1,
                            n_clusters_per_class=1).load()
        dataset.X = CNormalizerMinMax().fit_transform(dataset.X)

        self.sgds[0].fit(dataset.X, dataset.Y)

        svm = CClassifierSVM()
        svm.fit(dataset.X, dataset.Y)

        fig = CFigure(width=10, markersize=8)
        fig.subplot(2, 1, 1)
        # Plot dataset points
        fig.sp.plot_ds(dataset)
        # Plot objective function
        fig.sp.plot_fun(svm.decision_function,
                        grid_limits=dataset.get_bounds(),
                        y=1)
        fig.sp.title('SVM')

        fig.subplot(2, 1, 2)
        # Plot dataset points
        fig.sp.plot_ds(dataset)
        # Plot objective function
        fig.sp.plot_fun(self.sgds[0].decision_function,
                        grid_limits=dataset.get_bounds(),
                        y=1)
        fig.sp.title('SGD Classifier')

        fig.savefig(
            fm.join(fm.abspath(__file__), 'figs',
                    'test_c_classifier_sgd1.pdf'))
Exemplo n.º 27
0
    def setUp(self):

        self.dl1 = CDLRandom(n_features=1000,
                             n_redundant=200,
                             n_informative=250,
                             n_clusters_per_class=2,
                             random_state=0)
        self.dl2 = CDLRandom(n_features=1000,
                             n_redundant=200,
                             n_informative=250,
                             n_clusters_per_class=2,
                             random_state=1000)
        self.ds1 = self.dl1.load()
        self.ds2 = self.dl2.load()

        self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y)

        self.y1, self.s1 = self.svm.predict(self.ds1.X,
                                            return_decision_function=True)
        self.y2, self.s2 = self.svm.predict(self.ds2.X,
                                            return_decision_function=True)

        self.roc = CRoc()
Exemplo n.º 28
0
    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        ds = CDLRandom().load()
        clf = CClassifierSVM()

        # All linear transformations with gradient implemented
        self._test_preprocess(ds, clf, ['min-max', 'mean-std'], [{
            'feature_range': (-1, 1)
        }, {}])
        self._test_preprocess_grad(ds, clf, ['min-max', 'mean-std'], [{
            'feature_range': (-1, 1)
        }, {}])

        # Mixed linear/nonlinear transformations without gradient
        self._test_preprocess(ds, clf, ['pca', 'unit-norm'], [{}, {}])
Exemplo n.º 29
0
    def test_multiclass(self):
        """Test multiclass SVM on MNIST digits."""

        self.logger.info("Testing multiclass SVM.")

        digits = tuple(range(0, 10))
        n_tr = 100  # Number of training set samples
        n_ts = 200  # Number of test set samples

        loader = CDataLoaderMNIST()
        tr = loader.load('training', digits=digits, num_samples=n_tr)
        ts = loader.load('testing', digits=digits, num_samples=n_ts)

        # Normalize the features in `[0, 1]`
        tr.X /= 255
        ts.X /= 255

        svm_params = {
            'kernel': CKernelRBF(gamma=0.1),
            'C': 10,
            'class_weight': {
                0: 1,
                1: 1
            },
            'n_jobs': 2
        }
        classifiers = [
            CClassifierMulticlassOVA(CClassifierSVM, **svm_params),
            CClassifierSVM(**svm_params),
        ]

        grads = []
        acc = []
        for clf in classifiers:
            clf.verbose = 1
            # We can now fit the classifier
            clf.fit(tr.X, tr.Y)
            # Compute predictions on a test set
            y_pred, scores = clf.predict(ts.X, return_decision_function=True)
            # Evaluate the accuracy of the classifier
            metric = CMetricAccuracy()
            acc.append(metric.performance_score(y_true=ts.Y, y_pred=y_pred))
            grads.append(clf.grad_f_x(ts.X[1, :], 1))

        self.assertAlmostEqual(acc[0], acc[1])
        self.assert_array_almost_equal(grads[0], grads[1])
Exemplo n.º 30
0
    def test_linear_svm(self):
        """Performs tests on linear SVM."""
        self.logger.info("Testing SVM linear variants (kernel and not)")

        # Instancing a linear SVM and an SVM with linear kernel
        linear_svm = CClassifierSVM(kernel=None)
        kernel_linear_svm = self.svms[0]

        self.logger.info("SVM w/ linear kernel in the primal")
        self.assertIsNone(linear_svm.kernel)

        self.logger.info("Training both classifiers on dense data")
        linear_svm.fit(self.dataset.X, self.dataset.Y)
        kernel_linear_svm.fit(self.dataset.X, self.dataset.Y)

        linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict(
            self.dataset.X, return_decision_function=True)
        kernel_linear_svm_pred_y, \
        kernel_linear_svm_pred_score = kernel_linear_svm.predict(
            self.dataset.X, return_decision_function=True)

        # check prediction
        self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)

        self.logger.info("Training both classifiers on sparse data")
        linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)
        kernel_linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)

        self.assertTrue(
            linear_svm.w.issparse, "Weights vector is not sparse even "
            "if training data is sparse")

        linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict(
            self.dataset_sparse.X, return_decision_function=True)
        kernel_linear_svm_pred_y, \
        kernel_linear_svm_pred_score = kernel_linear_svm.predict(
            self.dataset_sparse.X, return_decision_function=True)

        # check prediction
        self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)