示例#1
0
    def setUp(self):
        """Test for init and fit methods."""

        # TODO: remove this filter when `kernel` parameter is removed from Ridge Classifier
        self.logger.filterwarnings("ignore",
                                   message="`kernel` parameter.*",
                                   category=DeprecationWarning)
        # generate synthetic data
        self.dataset = CDLRandom(n_features=100,
                                 n_redundant=20,
                                 n_informative=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly)
        self.ridges = [
            CClassifierRidge(kernel=kernel() if kernel is not None else None)
            for kernel in kernel_types
        ]
        self.logger.info("Testing RIDGE with kernel unctions: %s",
                         str(kernel_types))

        for ridge in self.ridges:
            ridge.verbose = 2  # Enabling debug output for each classifier
            ridge.fit(self.dataset)
示例#2
0
    def setUp(self):

        import numpy as np
        np.random.seed(12345678)

        # generate synthetic data
        self.ds = CDLRandom(n_classes=3,
                            n_features=2,
                            n_redundant=0,
                            n_clusters_per_class=1,
                            class_sep=1,
                            random_state=0).load()

        # Add a new class modifying one of the existing clusters
        self.ds.Y[(self.ds.X[:, 0] > 0).logical_and(
            self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes

        # self.kernel = None
        self.kernel = CKernelRBF(gamma=10)

        # Data normalization
        self.normalizer = CNormalizerMinMax()
        self.ds.X = self.normalizer.fit_transform(self.ds.X)

        self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                                   class_weight='balanced',
                                                   preprocess=None,
                                                   kernel=self.kernel)
        self.multiclass.verbose = 0

        # Training and classification
        self.multiclass.fit(self.ds.X, self.ds.Y)

        self.y_pred, self.score_pred = self.multiclass.predict(
            self.ds.X, return_decision_function=True)
示例#3
0
    def setUp(self):

        self.ds_loader = CDLRandom(n_features=1000,
                                   n_redundant=200,
                                   n_informative=250,
                                   n_clusters_per_class=2)
        self.ds1 = self.ds_loader.load()
        self.ds2 = self.ds_loader.load()

        self.y1 = self.ds1.Y
        self.y2 = self.ds2.Y

        self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y)

        _, self.s1 = self.svm.predict(self.ds1.X,
                                      return_decision_function=True)
        _, self.s2 = self.svm.predict(self.ds2.X,
                                      return_decision_function=True)

        self.s1 = self.s1[:, 1].ravel()
        self.s2 = self.s2[:, 1].ravel()

        # Roc with not computed average (2 repetitions)
        self.roc_nomean = CRoc()
        self.roc_nomean.compute([self.y1, self.y2], [self.s1, self.s2])

        # Roc with average (2 repetitions)
        self.roc_wmean = CRoc()
        self.roc_wmean.compute([self.y1, self.y2], [self.s1, self.s2])
        self.roc_wmean.average()
示例#4
0
    def setUp(self):

        # Create dummy dataset (we want a test different from train)
        self.tr = CDLRandom(n_classes=4, n_clusters_per_class=1,
                            random_state=50000).load()
        self.ts = CDLRandom(n_classes=4, n_clusters_per_class=1,
                            random_state=10000).load()
示例#5
0
    def setUp(self):

        # generate synthetic data
        self.dataset = CDLRandom(n_features=2,
                                 n_redundant=0,
                                 n_informative=1,
                                 n_clusters_per_class=1,
                                 random_state=1).load()

        self.dataset_sparse = self.dataset.tosparse()

        kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly)
        self.svms = [
            CClassifierSVM(kernel=kernel() if kernel is not None else None)
            for kernel in kernel_types
        ]
        self.logger.info("Testing SVM with kernel functions: %s",
                         str(kernel_types))

        for svm in self.svms:  # Enabling debug output for each classifier
            svm.verbose = 2

        self.logger.info("." * 50)
        self.logger.info("Number of Patterns: %s",
                         str(self.dataset.num_samples))
        self.logger.info("Features: %s", str(self.dataset.num_features))
    def test_plot_decision_function(self):
        """Test plot of multiclass classifier decision function."""
        # generate synthetic data
        ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0,
                       n_clusters_per_class=1, class_sep=1,
                       random_state=0).load()

        multiclass = CClassifierMulticlassOVA(
            classifier=CClassifierSVM,
            class_weight='balanced',
            preprocess='min-max')

        # Training and classification
        multiclass.fit(ds.X, ds.Y)
        y_pred, score_pred = multiclass.predict(
            ds.X, return_decision_function=True)

        def plot_hyperplane(img, clf, min_v, max_v, linestyle, label):
            """Plot the hyperplane associated to the OVA clf."""
            xx = CArray.linspace(
                min_v - 5, max_v + 5)  # make sure the line is long enough
            # get the separating hyperplane
            yy = -(clf.w[0] * xx + clf.b) / clf.w[1]
            img.sp.plot(xx, yy, linestyle, label=label)

        fig = CFigure(height=7, width=8)
        fig.sp.title('{:} ({:})'.format(multiclass.__class__.__name__,
                                        multiclass.classifier.__name__))

        x_bounds, y_bounds = ds.get_bounds()

        styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.']

        for c_idx, c in enumerate(ds.classes):
            # Plot boundary and predicted label for each OVA classifier

            plot_hyperplane(fig, multiclass._binary_classifiers[c_idx],
                            x_bounds[0], x_bounds[1], styles[c_idx],
                            'Boundary\nfor class {:}'.format(c))

            fig.sp.scatter(ds.X[ds.Y == c, 0],
                           ds.X[ds.Y == c, 1],
                           s=40, c=styles[c_idx][0])
            fig.sp.scatter(ds.X[y_pred == c, 0], ds.X[y_pred == c, 1], s=160,
                           edgecolors=styles[c_idx][0],
                           facecolors='none', linewidths=2)

        # Plotting multiclass decision function
        fig.sp.plot_decision_regions(multiclass, n_grid_points=100,
                                     grid_limits=ds.get_bounds(offset=5))

        fig.sp.xlim(x_bounds[0] - .5 * x_bounds[1],
                    x_bounds[1] + .5 * x_bounds[1])
        fig.sp.ylim(y_bounds[0] - .5 * y_bounds[1],
                    y_bounds[1] + .5 * y_bounds[1])

        fig.sp.legend(loc=4)  # lower, right

        fig.show()
示例#7
0
 def test_plot(self):
     """ Compare the classifiers graphically"""
     ds = CDLRandom(n_features=2, n_redundant=0, n_informative=2,
                    n_clusters_per_class=1, random_state=0).load()
     ds.X = CNormalizerMinMax().fit_transform(ds.X)
     fig = self._test_plot(self.ridges[0], ds)
     fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                         'test_c_classifier_ridge.pdf'))
    def setUp(self):
        """Test for init and fit methods."""

        self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1,
                                 n_clusters_per_class=1).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.nc = CClassifierNearestCentroid()
示例#9
0
文件: test_roc.py 项目: pralab/secml
class TestCRoc(CUnitTest):
    """Unit test for CRoc."""
    def setUp(self):

        self.dl1 = CDLRandom(n_features=1000,
                             n_redundant=200,
                             n_informative=250,
                             n_clusters_per_class=2,
                             random_state=0)
        self.dl2 = CDLRandom(n_features=1000,
                             n_redundant=200,
                             n_informative=250,
                             n_clusters_per_class=2,
                             random_state=1000)
        self.ds1 = self.dl1.load()
        self.ds2 = self.dl2.load()

        self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y)

        self.y1, self.s1 = self.svm.predict(self.ds1.X,
                                            return_decision_function=True)
        self.y2, self.s2 = self.svm.predict(self.ds2.X,
                                            return_decision_function=True)

        self.roc = CRoc()

    def test_roc_1sample(self):

        self.roc.compute(CArray([1]), CArray([0]))
        self.roc.average()

        # Testing 3 and not 1 as roc is bounded (we add a first and last point)
        self.assertEqual(self.roc.fpr.size, 3)
        self.assertEqual(self.roc.tpr.size, 3)

    def test_compute(self):

        self.roc.compute(self.ds1.Y, self.s1[:, 1].ravel())

        fig = CFigure()
        fig.sp.semilogx(self.roc.fpr, self.roc.tpr)
        fig.sp.grid()
        fig.show()

    def test_mean(self):

        self.roc.compute([self.ds1.Y, self.ds2.Y],
                         [self.s1[:, 1].ravel(), self.s2[:, 1].ravel()])
        mean_fp, mean_tp, mean_std = self.roc.average(return_std=True)
        fig = CFigure(linewidth=2)
        fig.sp.errorbar(self.roc.mean_fpr, self.roc.mean_tpr, yerr=mean_std)
        for rep in range(self.roc.n_reps):
            fig.sp.semilogx(self.roc.fpr[rep], self.roc.tpr[rep])
        fig.sp.semilogx(mean_fp, mean_tp)
        fig.sp.grid()
        fig.show()
    def setUp(self):
        self.n_classes = 3
        self.n_features = 5

        self.ds = CDLRandom(n_classes=self.n_classes,
                            n_features=self.n_features,
                            n_informative=self.n_features,
                            n_redundant=0).load()
        self.logger.info("num_samples: {}, num_classes: {:}".format(
            self.ds.num_samples, self.ds.num_classes))
    def setUp(self):
        """Test for init and fit methods."""
        # generate synthetic data
        self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1,
                                 n_clusters_per_class=1, random_state=99).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.logger.info("Testing classifier creation ")
        
        self.log = CClassifierLogistic(random_state=99)
示例#12
0
    def setUp(self):

        # Create dummy dataset (we want a test different from train)
        loader = CDLRandom(random_state=50000)
        self.training_dataset = loader.load()
        self.test_dataset = loader.load()

        # CREATE CLASSIFIERS
        kernel = CKernel.create('rbf')
        self.svm = CClassifierSVM(kernel=kernel)
        self.svm.verbose = 1

        self.logger.info("Using kernel {:}".format(self.svm.kernel.class_type))
示例#13
0
    def test_alignment(self):

        ds = CDLRandom(n_samples=100,
                       n_features=500,
                       n_redundant=0,
                       n_informative=10,
                       n_clusters_per_class=1,
                       random_state=0).load()

        self.logger.info("Train Sec SVM")
        sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-2, lb=-0.1, ub=0.5)
        sec_svm.verbose = 2
        sec_svm.fit(ds.X, ds.Y)

        self.logger.info("Train SVM")
        svm = CClassifierSVM(C=1)
        svm.fit(ds.X, ds.Y)

        self._compute_alignment(ds, sec_svm, svm)

        svm_pred = sec_svm.predict(ds.X)
        secsvm_pred = sec_svm.predict(ds.X)

        self.logger.info("SVM pred:\n{:}".format(svm_pred))
        self.logger.info("Sec-SVM pred:\n{:}".format(secsvm_pred))

        self.assert_array_almost_equal(secsvm_pred, svm_pred)
示例#14
0
    def test_params_multiclass(self):
        """Parameter estimation for multiclass classifiers."""
        # Create dummy dataset (we want a test different from train)
        tr = CDLRandom(n_classes=4, n_clusters_per_class=1,
                       random_state=50000).load()

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(CClassifierSVM,
                                              C=1,
                                              kernel=kernel)
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': 10.0, 'kernel.gamma': 0.1}

        self._run_multiclass(tr, multiclass, xval_parameters, expected)

        self.logger.info("Testing with preprocessor")

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(CClassifierSVM,
                                              C=1,
                                              kernel=kernel,
                                              preprocess='min-max')
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': 10.0, 'kernel.gamma': 0.1}

        self._run_multiclass(tr, multiclass, xval_parameters, expected)
    def test_openworldkfold(self):

        ds = CDLRandom(
            n_classes=3, n_samples=14, n_informative=3, random_state=0).load()

        self.logger.info("Testing Open World K-Fold")
        kf = CDataSplitterOpenWorldKFold(
            num_folds=2, n_train_samples=4,
            random_state=5000).compute_indices(ds)

        tr_idx_expected = [CArray([0, 4, 8, 12]), CArray([1, 3, 9, 13])]
        ts_idx_expected = [CArray([1, 2, 3, 5, 6, 7, 9, 10, 11, 13]),
                           CArray([0, 2, 4, 5, 6, 7, 8, 10, 11, 12])]

        self.assertEqual(len(kf.tr_idx), 2)
        self.assertEqual(len(kf.ts_idx), 2)

        self.logger.info("DS classes:\n{:}".format(ds.Y))

        for fold_idx in range(kf.num_folds):

            self.logger.info(
                "{:} fold:\nTR CLASSES {:}\nTR {:} {:}\nTS {:} {:}".format(
                    fold_idx, kf.tr_classes[fold_idx],
                    kf.tr_idx[fold_idx], ds.Y[kf.tr_idx[fold_idx]],
                    kf.ts_idx[fold_idx], ds.Y[kf.ts_idx[fold_idx]]))
            self.assert_array_equal(
                tr_idx_expected[fold_idx], kf.tr_idx[fold_idx])
            self.assert_array_equal(
                ts_idx_expected[fold_idx], kf.ts_idx[fold_idx])
    def test_stratifiedkfold(self):

        ds = CDLRandom(n_samples=10, random_state=0).load()

        self.logger.info("Testing Stratified K-Fold")
        kf = CDataSplitterStratifiedKFold(
            num_folds=2, random_state=5000).compute_indices(ds)

        import sklearn
        if sklearn.__version__ < '0.22':  # TODO: REMOVE AFTER BUMPING DEPS
        # v0.22 changed the model to fix an issue related test set size
        # https://github.com/scikit-learn/scikit-learn/pull/14704
            tr_idx_expected = [CArray([4, 5, 6, 9]), CArray([0, 1, 2, 3, 7, 8])]
            ts_idx_expected = [CArray([0, 1, 2, 3, 7, 8]), CArray([4, 5, 6, 9])]
        else:
            tr_idx_expected = [CArray([1, 2, 7, 8, 9]), CArray([0, 3, 4, 5, 6])]
            ts_idx_expected = [CArray([0, 3, 4, 5, 6]), CArray([1, 2, 7, 8, 9])]

        self.assertEqual(len(kf.tr_idx), 2)
        self.assertEqual(len(kf.ts_idx), 2)

        self.logger.info("DS classes:\n{:}".format(ds.Y))

        for fold_idx in range(kf.num_folds):
            self.logger.info("{:} fold: \nTR {:} \nTS {:}"
                             "".format(fold_idx, kf.tr_idx[fold_idx],
                                       kf.ts_idx[fold_idx]))
            self.assert_array_equal(
                tr_idx_expected[fold_idx], kf.tr_idx[fold_idx])
            self.assert_array_equal(
                ts_idx_expected[fold_idx], kf.ts_idx[fold_idx])
    def test_shuffle(self):

        ds = CDLRandom(n_samples=10, random_state=0).load()

        self.logger.info("Testing Shuffle ")
        kf = CDataSplitterShuffle(
            num_folds=2, train_size=0.2,
            random_state=5000).compute_indices(ds)

        tr_idx_expected = [CArray([1, 2]), CArray([9, 3])]
        ts_idx_expected = [CArray([6, 4, 7, 0, 3, 9, 5, 8]),
                           CArray([7, 5, 4, 0, 8, 2, 6, 1])]

        self.assertEqual(len(kf.tr_idx), 2)
        self.assertEqual(len(kf.ts_idx), 2)

        self.logger.info("DS classes:\n{:}".format(ds.Y))

        for fold_idx in range(kf.num_folds):
            self.logger.info("{:} fold: \nTR {:} \nTS {:}"
                             "".format(fold_idx, kf.tr_idx[fold_idx],
                                       kf.ts_idx[fold_idx]))
            self.assert_array_equal(
                tr_idx_expected[fold_idx], kf.tr_idx[fold_idx])
            self.assert_array_equal(
                ts_idx_expected[fold_idx], kf.ts_idx[fold_idx])
示例#18
0
    def _dataset_creation_blobs(self):
        self.logger.info("\tTest dataset creation")
        # generate synthetic data
        dataset = CDLRandom(n_samples=self.n_samples_tr + self.n_samples_ts,
                            n_classes=self.n_classes,
                            n_features=self.n_features,
                            n_redundant=0,
                            n_clusters_per_class=1,
                            class_sep=2,
                            random_state=0).load()

        # Split in training and test
        splitter = CTrainTestSplit(train_size=self.n_samples_tr,
                                   test_size=self.n_samples_ts,
                                   random_state=0)
        self.tr, self.ts = splitter.split(dataset)

        # Normalize the data
        nmz = CNormalizerMinMax()
        self.tr.X = nmz.fit_transform(self.tr.X)
        self.ts.X = nmz.transform(self.ts.X)

        self._tr_loader = CDataLoaderPyTorch(self.tr.X,
                                             self.tr.Y,
                                             self.batch_size,
                                             shuffle=True,
                                             transform=None).get_loader()

        self._ts_loader = CDataLoaderPyTorch(self.ts.X,
                                             self.ts.Y,
                                             self.batch_size,
                                             shuffle=False,
                                             transform=None).get_loader()
示例#19
0
 def setUp(self):
     self.clf = CClassifierSVM()
     self.dataset = CDLRandom(n_features=2,
                              n_redundant=0,
                              n_informative=1,
                              n_clusters_per_class=1).load()
     self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
     self.clf.fit(self.dataset.X, self.dataset.Y)
    def test_train_test_split(self):

        ds = CDLRandom(n_samples=10, random_state=0).load()

        tts = CTrainTestSplit(train_size=0.5, random_state=0, shuffle=False)

        tr_idx, ts_idx = tts.compute_indices(ds)

        self.logger.info("TR IDX:\n{:}".format(tr_idx))
        self.logger.info("TS IDX:\n{:}".format(ts_idx))

        tr_idx_expected = CArray([0, 1, 2, 3, 4])
        ts_idx_expected = CArray([5, 6, 7, 8, 9])

        self.assertIsInstance(tr_idx, CArray)
        self.assertIsInstance(ts_idx, CArray)

        self.assertFalse((tr_idx != tr_idx_expected).any())
        self.assertFalse((ts_idx != ts_idx_expected).any())

        tr, ts = tts.split(ds)

        tr_expected = ds[tr_idx, :]
        ts_expected = ds[ts_idx, :]

        self.assertIsInstance(tr, CDataset)
        self.assertIsInstance(ts, CDataset)

        self.assertFalse((tr.X != tr_expected.X).any())
        self.assertFalse((tr.Y != tr_expected.Y).any())
        self.assertFalse((ts.X != ts_expected.X).any())
        self.assertFalse((ts.Y != ts_expected.Y).any())

        self.logger.info("Testing splitting of sparse dataset")
        ds = CDLRandom(n_samples=10, random_state=0).load()

        ds = ds.tosparse()

        tts = CTrainTestSplit(train_size=0.25, random_state=0, shuffle=False)
        tr, ts = tts.split(ds)

        self.assertEqual(2, tr.num_samples)
        self.assertEqual(8, ts.num_samples)

        self.assertTrue(tr.issparse)
        self.assertTrue(ts.issparse)
    def _set_up(self, kernel_name):

        self.d_dense = CDLRandom(n_samples=10,
                                 n_features=5,
                                 n_redundant=0,
                                 n_informative=3,
                                 n_clusters_per_class=1,
                                 random_state=100).load()

        self.p1_dense = self.d_dense.X[0, :]
        self.p2_dense = self.d_dense.X[1, :]

        self.d_sparse = self.d_dense.tosparse()
        self.p1_sparse = self.d_sparse.X[0, :]
        self.p2_sparse = self.d_sparse.X[1, :]

        self.kernel = CKernel.create(kernel_name)
示例#22
0
    def setUp(self):
        self.ds = CDLRandom(n_classes=3, n_samples=50, random_state=0,
                            n_informative=3).load()

        self.logger.info("Fit an SVM and classify dataset...")
        self.ova = CClassifierMulticlassOVA(CClassifierSVM)
        self.ova.fit(self.ds.X, self.ds.Y)
        self.labels, self.scores = self.ova.predict(
            self.ds.X, return_decision_function=True)
示例#23
0
    def setUp(self):

        self.ds = CDLRandom(n_samples=50, random_state=0).load()

        self.logger.info("Train an SVM and classify dataset...")
        self.svm = CClassifierSVM()
        self.svm.fit(self.ds.X, self.ds.Y)
        self.labels, self.scores = self.svm.predict(
            self.ds.X, return_decision_function=True)
示例#24
0
    def test_plot(self):

        ds = CDLRandom(n_samples=100,
                       n_features=2,
                       n_redundant=0,
                       random_state=100).load()

        self.logger.info("Train Sec SVM")
        sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-3, lb=-0.1, ub=0.5)
        sec_svm.verbose = 2
        sec_svm.fit(ds.X, ds.Y)

        self.logger.info("Train SVM")
        svm = CClassifierSVM(C=1)
        svm.fit(ds.X, ds.Y)

        self._compute_alignment(ds, sec_svm, svm)

        fig = CFigure(height=5, width=8)
        fig.subplot(1, 2, 1)
        # Plot dataset points
        fig.sp.plot_ds(ds)
        # Plot objective function
        fig.sp.plot_fun(svm.predict,
                        multipoint=True,
                        plot_background=True,
                        plot_levels=False,
                        n_grid_points=100,
                        grid_limits=ds.get_bounds())
        fig.sp.title("SVM")

        fig.subplot(1, 2, 2)
        # Plot dataset points
        fig.sp.plot_ds(ds)
        # Plot objective function
        fig.sp.plot_fun(sec_svm.predict,
                        multipoint=True,
                        plot_background=True,
                        plot_levels=False,
                        n_grid_points=100,
                        grid_limits=ds.get_bounds())
        fig.sp.title("Sec-SVM")

        fig.show()
    def setUpClass(cls):

        CUnitTest.setUpClass()

        cls.seed = 2

        cls.ds = CDLRandom(n_features=2, n_redundant=0,
                           n_informative=2, n_clusters_per_class=1,
                           random_state=cls.seed).load()
        cls.ds_sparse = cls.ds.tosparse()
示例#26
0
    def _dataset_creation(self):
        # generate synthetic data
        self.ds = CDLRandom(n_samples=100, n_classes=3, n_features=2,
                            n_redundant=0, n_clusters_per_class=1,
                            class_sep=1, random_state=0).load()

        # Add a new class modifying one of the existing clusters
        self.ds.Y[(self.ds.X[:, 0] > 0).logical_and(
            self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes

        self.lb = 0
        self.ub = 1

        # Data normalization
        self.normalizer = CNormalizerMinMax(
            feature_range=(self.lb, self.ub))
        self.normalizer = None
        if self.normalizer is not None:
            self.ds.X = self.normalizer.fit_transform(self.ds.X)
    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        ds = CDLRandom().load()

        # All linear transformations
        self._test_preprocess(ds, self.nc,
                              ['min-max', 'mean-std'],
                              [{'feature_range': (-1, 1)}, {}])

        # Mixed linear/nonlinear transformations
        self._test_preprocess(ds, self.nc,
                              ['pca', 'unit-norm'], [{}, {}])
示例#28
0
    def setUp(self):
        """Test for init and fit methods."""

        # generate synthetic data
        self.dataset = CDLRandom(n_features=100, n_redundant=20,
                                 n_informative=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly)
        self.ridges = [CClassifierRidge(
            preprocess=kernel() if kernel is not None else None)
            for kernel in kernel_types]
        self.logger.info(
            "Testing RIDGE with kernel functions: %s", str(kernel_types))

        for ridge in self.ridges:
            ridge.verbose = 2  # Enabling debug output for each classifier
            ridge.fit(self.dataset.X, self.dataset.Y)
    def setUp(self):

        ds = CDLRandom(n_samples=100, n_classes=3, n_features=2,
                       n_redundant=0, n_informative=2, n_clusters_per_class=1,
                       random_state=10000).load()

        self.dataset = ds[:50, :]
        self.test = ds[50:, :]

        self.logger.info("Initializing KNeighbors Classifier... ")
        self.knn = CClassifierKNN(n_neighbors=3)
        self.knn.fit(self.dataset)
class TestCClassifierNearestCentroid(CClassifierTestCases):
    """Unit test for CClassifierNearestCentroid."""

    def setUp(self):
        """Test for init and fit methods."""

        self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1,
                                 n_clusters_per_class=1).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.nc = CClassifierNearestCentroid()

    def test_plot(self):
        """ Compare the classifiers graphically"""
        ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2,
                            random_state=1).load()
        fig = self._test_plot(self.nc, ds, [-10])
        fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                            'test_c_classifier_nearest_centroid.pdf'))

    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        scores_d = self._test_fun(self.nc, self.dataset.todense())
        scores_s = self._test_fun(self.nc, self.dataset.tosparse())

        self.assert_array_almost_equal(scores_d, scores_s)

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        ds = CDLRandom().load()

        # All linear transformations
        self._test_preprocess(ds, self.nc,
                              ['min-max', 'mean-std'],
                              [{'feature_range': (-1, 1)}, {}])

        # Mixed linear/nonlinear transformations
        self._test_preprocess(ds, self.nc,
                              ['pca', 'unit-norm'], [{}, {}])