示例#1
0
 def test_iris(self, basis):
   X, y = load_iris(return_X_y=True)
   scml = SCML_Supervised(basis=basis, n_basis=85, k_genuine=7, k_impostor=5,
                          random_state=42)
   scml.fit(X, y)
   csep = class_separation(scml.transform(X), y)
   assert csep < 0.24
示例#2
0
 def test_big_n_features(self):
   X, y = make_classification(n_samples=100, n_classes=3, n_features=60,
                              n_informative=60, n_redundant=0, n_repeated=0,
                              random_state=42)
   X = StandardScaler().fit_transform(X)
   scml = SCML_Supervised(random_state=42)
   scml.fit(X, y)
   csep = class_separation(scml.transform(X), y)
   assert csep < 0.7
示例#3
0
 def test_int_inputs_supervised(self, name):
     value = 1.0
     d = {name: value}
     scml = SCML_Supervised(**d)
     X = np.array([[0, 0], [1, 1], [3, 3], [4, 4]])
     y = np.array([1, 1, 0, 0])
     msg = ("%s should be an integer, instead it is of type"
            " %s" % (name, type(value)))
     with pytest.raises(ValueError) as raised_error:
         scml.fit(X, y)
     assert msg == raised_error.value.args[0]
示例#4
0
  def test_small_n_basis_lda(self):
    X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])
    y = np.array([0, 0, 1, 1])

    n_class = 2
    scml = SCML_Supervised(n_basis=n_class-1)
    msg = ("The number of basis is less than the number of classes, which may"
           " lead to poor discriminative performance.")
    with pytest.warns(UserWarning) as raised_warning:
      scml.fit(X, y)
    assert msg == raised_warning[0].message.args[0]
示例#5
0
 def test_lda_toy(self):
     expected_n_basis = 7
     model = SCML_Supervised(n_basis=expected_n_basis)
     X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])
     y = np.array([0, 0, 1, 1])
     basis, n_basis = model._generate_bases_LDA(X, y)
     # All points are along the same line, so the only possible basis will be
     # the vector along that line normalized. In this case it is possible to
     # obtain it with positive or negative orientations.
     expected_basis = np.ones((expected_n_basis, 2)) / np.sqrt(2)
     assert n_basis == expected_n_basis
     np.testing.assert_allclose(np.abs(basis), expected_basis)
示例#6
0
 def test_triplet_diffs_toy(self):
     expected_n_basis = 10
     model = SCML_Supervised(n_basis=expected_n_basis)
     X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])
     triplets = np.array([[0, 1, 2], [0, 1, 3], [1, 0, 2], [1, 0, 3],
                          [2, 3, 1], [2, 3, 0], [3, 2, 1], [3, 2, 0]])
     basis, n_basis = model._generate_bases_dist_diff(triplets, X)
     # All points are along the same line, so the only possible basis will be
     # the vector along that line normalized.
     expected_basis = np.ones((expected_n_basis, 2)) / np.sqrt(2)
     assert n_basis == expected_n_basis
     np.testing.assert_allclose(basis, expected_basis)
示例#7
0
 def test_iris(self, basis):
   """
   SCML applied to Iris dataset should give better results when
   computing class separation.
   """
   X, y = load_iris(return_X_y=True)
   before = class_separation(X, y)
   scml = SCML_Supervised(basis=basis, n_basis=85, k_genuine=7, k_impostor=5,
                          random_state=42)
   scml.fit(X, y)
   after = class_separation(scml.transform(X), y)
   assert before > after + 0.03  # It's better by a margin of 0.03
示例#8
0
  def test_lda(self, n_samples, n_features, n_classes):
    X, y = make_classification(n_samples=n_samples, n_classes=n_classes,
                               n_features=n_features, n_informative=n_features,
                               n_redundant=0, n_repeated=0)
    X = StandardScaler().fit_transform(X)

    model = SCML_Supervised()
    basis, n_basis = model._generate_bases_LDA(X, y)

    num_eig = min(n_classes - 1, n_features)
    expected_n_basis = min(20 * n_features, n_samples * 2 * num_eig - 1)
    assert n_basis == expected_n_basis
    assert basis.shape == (expected_n_basis, n_features)
示例#9
0
    def test_big_n_basis_lda(self):
        X = np.array([[0, 0], [1, 1], [3, 3]])
        y = np.array([1, 2, 3])

        n_class = 3
        num_eig = min(n_class - 1, X.shape[1])
        n_basis = X.shape[0] * 2 * num_eig

        scml = SCML_Supervised(n_basis=n_basis)
        msg = ("Not enough samples to generate %d LDA bases, n_basis"
               "should be smaller than %d" % (n_basis, n_basis))
        with pytest.raises(ValueError) as raised_error:
            scml.fit(X, y)
        assert msg == raised_error.value.args[0]
示例#10
0
  def test_triplet_diffs(self, n_samples, n_features, n_classes):
    X, y = make_classification(n_samples=n_samples, n_classes=n_classes,
                               n_features=n_features, n_informative=n_features,
                               n_redundant=0, n_repeated=0)
    X = StandardScaler().fit_transform(X)

    model = SCML_Supervised()
    constraints = Constraints(y)
    triplets = constraints.generate_knntriplets(X, model.k_genuine,
                                                model.k_impostor)
    basis, n_basis = model._generate_bases_dist_diff(triplets, X)

    expected_n_basis = n_features * 80
    assert n_basis == expected_n_basis
    assert basis.shape == (expected_n_basis, n_features)
示例#11
0
  def test_lda(self, n_samples, n_features, n_classes):
    """
    Test that when n_basis=None, the correct n_basis is generated,
    for SCML_Supervised and different values of n_samples, n_features
    and n_classes.
    """
    X, y = make_classification(n_samples=n_samples, n_classes=n_classes,
                               n_features=n_features, n_informative=n_features,
                               n_redundant=0, n_repeated=0)
    X = StandardScaler().fit_transform(X)

    msg = "As no value for `n_basis` was selected, "
    with pytest.warns(UserWarning) as raised_warning:
      model = SCML_Supervised(n_basis=None)  # Explicit n_basis=None
      basis, n_basis = model._generate_bases_LDA(X, y)
    assert msg in str(raised_warning[0].message)

    num_eig = min(n_classes - 1, n_features)
    expected_n_basis = min(20 * n_features, n_samples * 2 * num_eig - 1)
    assert n_basis == expected_n_basis
    assert basis.shape == (expected_n_basis, n_features)
示例#12
0
  def test_triplet_diffs(self, n_samples, n_features, n_classes):
    """
    Test that the correct value of n_basis is being generated with
    different triplet constraints.
    """
    X, y = make_classification(n_samples=n_samples, n_classes=n_classes,
                               n_features=n_features, n_informative=n_features,
                               n_redundant=0, n_repeated=0)
    X = StandardScaler().fit_transform(X)
    model = SCML_Supervised(n_basis=None)  # Explicit n_basis=None
    constraints = Constraints(y)
    triplets = constraints.generate_knntriplets(X, model.k_genuine,
                                                model.k_impostor)

    msg = "As no value for `n_basis` was selected, "
    with pytest.warns(UserWarning) as raised_warning:
      basis, n_basis = model._generate_bases_dist_diff(triplets, X)
    assert msg in str(raised_warning[0].message)

    expected_n_basis = n_features * 80
    assert n_basis == expected_n_basis
    assert basis.shape == (expected_n_basis, n_features)
示例#13
0
 def test_scml(self):
     msg = "As no value for `n_basis` was selected, "
     with pytest.warns(UserWarning) as raised_warning:
         check_estimator(SCML_Supervised())
     assert msg in str(raised_warning[0].message)
示例#14
0
    (SDML(prior='identity', balance_param=1e-5), build_pairs)
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=5), build_classification),
               (SDML_Supervised(prior='identity',
                                balance_param=1e-5), build_classification),
               (SCML_Supervised(), build_classification)]
ids_classifiers = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in classifiers]))

regressors = [(MLKR(init='pca'), build_regression)]
ids_regressors = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in regressors]))

WeaklySupervisedClasses = (_PairsClassifierMixin, _TripletsClassifierMixin,
                           _QuadrupletsClassifierMixin)

tuples_learners = pairs_learners + triplets_learners + quadruplets_learners
ids_tuples_learners = ids_pairs_learners + ids_triplets_learners \
                      + ids_quadruplets_learners
示例#15
0
 def test_scml(self):
     check_estimator(SCML_Supervised())