def test_iris(self, basis): X, y = load_iris(return_X_y=True) scml = SCML_Supervised(basis=basis, n_basis=85, k_genuine=7, k_impostor=5, random_state=42) scml.fit(X, y) csep = class_separation(scml.transform(X), y) assert csep < 0.24
def test_big_n_features(self): X, y = make_classification(n_samples=100, n_classes=3, n_features=60, n_informative=60, n_redundant=0, n_repeated=0, random_state=42) X = StandardScaler().fit_transform(X) scml = SCML_Supervised(random_state=42) scml.fit(X, y) csep = class_separation(scml.transform(X), y) assert csep < 0.7
def test_int_inputs_supervised(self, name): value = 1.0 d = {name: value} scml = SCML_Supervised(**d) X = np.array([[0, 0], [1, 1], [3, 3], [4, 4]]) y = np.array([1, 1, 0, 0]) msg = ("%s should be an integer, instead it is of type" " %s" % (name, type(value))) with pytest.raises(ValueError) as raised_error: scml.fit(X, y) assert msg == raised_error.value.args[0]
def test_small_n_basis_lda(self): X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) y = np.array([0, 0, 1, 1]) n_class = 2 scml = SCML_Supervised(n_basis=n_class-1) msg = ("The number of basis is less than the number of classes, which may" " lead to poor discriminative performance.") with pytest.warns(UserWarning) as raised_warning: scml.fit(X, y) assert msg == raised_warning[0].message.args[0]
def test_lda_toy(self): expected_n_basis = 7 model = SCML_Supervised(n_basis=expected_n_basis) X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) y = np.array([0, 0, 1, 1]) basis, n_basis = model._generate_bases_LDA(X, y) # All points are along the same line, so the only possible basis will be # the vector along that line normalized. In this case it is possible to # obtain it with positive or negative orientations. expected_basis = np.ones((expected_n_basis, 2)) / np.sqrt(2) assert n_basis == expected_n_basis np.testing.assert_allclose(np.abs(basis), expected_basis)
def test_triplet_diffs_toy(self): expected_n_basis = 10 model = SCML_Supervised(n_basis=expected_n_basis) X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) triplets = np.array([[0, 1, 2], [0, 1, 3], [1, 0, 2], [1, 0, 3], [2, 3, 1], [2, 3, 0], [3, 2, 1], [3, 2, 0]]) basis, n_basis = model._generate_bases_dist_diff(triplets, X) # All points are along the same line, so the only possible basis will be # the vector along that line normalized. expected_basis = np.ones((expected_n_basis, 2)) / np.sqrt(2) assert n_basis == expected_n_basis np.testing.assert_allclose(basis, expected_basis)
def test_iris(self, basis): """ SCML applied to Iris dataset should give better results when computing class separation. """ X, y = load_iris(return_X_y=True) before = class_separation(X, y) scml = SCML_Supervised(basis=basis, n_basis=85, k_genuine=7, k_impostor=5, random_state=42) scml.fit(X, y) after = class_separation(scml.transform(X), y) assert before > after + 0.03 # It's better by a margin of 0.03
def test_lda(self, n_samples, n_features, n_classes): X, y = make_classification(n_samples=n_samples, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, n_repeated=0) X = StandardScaler().fit_transform(X) model = SCML_Supervised() basis, n_basis = model._generate_bases_LDA(X, y) num_eig = min(n_classes - 1, n_features) expected_n_basis = min(20 * n_features, n_samples * 2 * num_eig - 1) assert n_basis == expected_n_basis assert basis.shape == (expected_n_basis, n_features)
def test_big_n_basis_lda(self): X = np.array([[0, 0], [1, 1], [3, 3]]) y = np.array([1, 2, 3]) n_class = 3 num_eig = min(n_class - 1, X.shape[1]) n_basis = X.shape[0] * 2 * num_eig scml = SCML_Supervised(n_basis=n_basis) msg = ("Not enough samples to generate %d LDA bases, n_basis" "should be smaller than %d" % (n_basis, n_basis)) with pytest.raises(ValueError) as raised_error: scml.fit(X, y) assert msg == raised_error.value.args[0]
def test_triplet_diffs(self, n_samples, n_features, n_classes): X, y = make_classification(n_samples=n_samples, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, n_repeated=0) X = StandardScaler().fit_transform(X) model = SCML_Supervised() constraints = Constraints(y) triplets = constraints.generate_knntriplets(X, model.k_genuine, model.k_impostor) basis, n_basis = model._generate_bases_dist_diff(triplets, X) expected_n_basis = n_features * 80 assert n_basis == expected_n_basis assert basis.shape == (expected_n_basis, n_features)
def test_lda(self, n_samples, n_features, n_classes): """ Test that when n_basis=None, the correct n_basis is generated, for SCML_Supervised and different values of n_samples, n_features and n_classes. """ X, y = make_classification(n_samples=n_samples, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, n_repeated=0) X = StandardScaler().fit_transform(X) msg = "As no value for `n_basis` was selected, " with pytest.warns(UserWarning) as raised_warning: model = SCML_Supervised(n_basis=None) # Explicit n_basis=None basis, n_basis = model._generate_bases_LDA(X, y) assert msg in str(raised_warning[0].message) num_eig = min(n_classes - 1, n_features) expected_n_basis = min(20 * n_features, n_samples * 2 * num_eig - 1) assert n_basis == expected_n_basis assert basis.shape == (expected_n_basis, n_features)
def test_triplet_diffs(self, n_samples, n_features, n_classes): """ Test that the correct value of n_basis is being generated with different triplet constraints. """ X, y = make_classification(n_samples=n_samples, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, n_repeated=0) X = StandardScaler().fit_transform(X) model = SCML_Supervised(n_basis=None) # Explicit n_basis=None constraints = Constraints(y) triplets = constraints.generate_knntriplets(X, model.k_genuine, model.k_impostor) msg = "As no value for `n_basis` was selected, " with pytest.warns(UserWarning) as raised_warning: basis, n_basis = model._generate_bases_dist_diff(triplets, X) assert msg in str(raised_warning[0].message) expected_n_basis = n_features * 80 assert n_basis == expected_n_basis assert basis.shape == (expected_n_basis, n_features)
def test_scml(self): msg = "As no value for `n_basis` was selected, " with pytest.warns(UserWarning) as raised_warning: check_estimator(SCML_Supervised()) assert msg in str(raised_warning[0].message)
(SDML(prior='identity', balance_param=1e-5), build_pairs) ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification), (SCML_Supervised(), build_classification)] ids_classifiers = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(init='pca'), build_regression)] ids_regressors = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _TripletsClassifierMixin, _QuadrupletsClassifierMixin) tuples_learners = pairs_learners + triplets_learners + quadruplets_learners ids_tuples_learners = ids_pairs_learners + ids_triplets_learners \ + ids_quadruplets_learners
def test_scml(self): check_estimator(SCML_Supervised())