示例#1
0
    def test_clear(self):
        """
        Tests KNN clearing (:func:`~fatf.utils.models.models.KNN.clear`).
        """
        k = 2
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)

        # Clearing an unfitted model
        with pytest.raises(UnfittedModelError) as exception_info:
            clf.clear()
        assert self.unfitted_model_error == str(exception_info.value)

        # Clearing a fitted model
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices, self.unique_y,
                                    self.unique_y_counts,
                                    self.unique_y_probabilities)
        clf.clear()
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
示例#2
0
    def test_predict_proba(self):
        """
        Tests probas (:func:`~fatf.utils.models.models.KNN.predict_proba`).
        """
        # pylint: disable=too-many-statements
        # Regressor error
        k = 3
        clf = fumm.KNN(k=k, mode='r')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label_regressor,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices)
        with pytest.raises(RuntimeError) as exception_info:
            clf.predict_proba(self.X_test)
        assert str(exception_info.value) == self.runtime_error

        # Test other errors...
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)

        # Unfitted model
        with pytest.raises(UnfittedModelError) as exception_info:
            clf.predict_proba(self.X_test)
        assert self.unfitted_model_error == str(exception_info.value)

        # ...
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices, self.unique_y,
                                    self.unique_y_counts,
                                    self.unique_y_probabilities)

        # X is not 2D
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.predict_proba(self.X_3D)
        assert self.incorrect_shape_error_singular == str(exception_info.value)
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.predict_proba(self.y)
        assert self.incorrect_shape_error_singular == str(exception_info.value)

        # dtype is not similar to the training data
        with pytest.raises(ValueError) as exception_info:
            clf.predict_proba(self.X_cat)
        assert self.value_error_dtype == str(exception_info.value)
        with pytest.raises(ValueError) as exception_info:
            clf.predict_proba(self.X_cat_struct)
        assert self.value_error_dtype == str(exception_info.value)
        with pytest.raises(ValueError) as exception_info:
            clf.predict_proba(self.X_struct)
        assert self.value_error_dtype == str(exception_info.value)

        # Predict 0 examples
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.predict_proba(np.ones((0, 2), dtype=int))
        assert self.incorrect_shape_error_rows == str(exception_info.value)

        # The number of features disagrees...
        # ...unstructured
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.predict_proba(self.X_distances)
        assert str(exception_info.value).startswith(
            self.incorrect_shape_error_columns)
        # ...structured
        clf.clear()
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        with pytest.raises(ValueError) as exception_info:
            clf.predict_proba(self.X_test_struct[['a']])
        assert self.value_error_dtype == str(exception_info.value)

        # Numerical classifier on unstructured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices, self.unique_y,
                                    self.unique_y_counts,
                                    self.unique_y_probabilities)
        y_hat = clf.predict_proba(self.X_test)
        assert np.isclose(y_hat, self.y_test_3_proba, atol=1e-3).all()
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices, self.unique_y,
                                    self.unique_y_counts,
                                    self.unique_y_probabilities)
        y_hat = clf.predict_proba(self.X_test)
        y_true = np.full(
            (y_hat.shape[0], self.y_test_3_trainig_proba.shape[0]),
            fill_value=self.y_test_3_trainig_proba)
        assert np.isclose(y_hat, y_true, atol=1e-3).all()

        # Numerical classifier on structured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        y_hat = clf.predict_proba(self.X_test_struct)
        assert np.isclose(y_hat, self.y_test_3_proba, atol=1e-3).all()
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        y_hat = clf.predict_proba(self.X_test_struct)
        y_true = np.full(
            (y_hat.shape[0], self.y_test_3_trainig_proba.shape[0]),
            fill_value=self.y_test_3_trainig_proba)
        assert np.isclose(y_hat, y_true, atol=1e-3).all()

        # Categorical classifier on unstructured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, self.y_categorical)
        self._test_fitted_internals(
            clf, False, self.X, self.y_categorical, self.X_n,
            self.majority_label_categorical, self.X_categorical_indices,
            self.X_numerical_indices, self.unique_y_categorical,
            self.unique_y_counts, self.unique_y_probabilities)
        y_hat = clf.predict_proba(self.X_test)
        assert np.isclose(y_hat, self.y_test_3_proba, atol=1e-3).all()
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, self.y_categorical)
        self._test_fitted_internals(
            clf, False, self.X, self.y_categorical, self.X_n,
            self.majority_label_categorical, self.X_categorical_indices,
            self.X_numerical_indices, self.unique_y_categorical,
            self.unique_y_counts, self.unique_y_probabilities)
        y_hat = clf.predict_proba(self.X_test)
        y_true = np.full(
            (y_hat.shape[0], self.y_test_3_trainig_proba.shape[0]),
            fill_value=self.y_test_3_trainig_proba)
        assert np.isclose(y_hat, y_true, atol=1e-3).all()

        # Categorical classifier on structured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y_categorical)
        self._test_fitted_internals(
            clf, True, self.X_struct, self.y_categorical, self.X_n,
            self.majority_label_categorical, self.X_struct_categorical_indices,
            self.X_struct_numerical_indices, self.unique_y_categorical,
            self.unique_y_counts, self.unique_y_probabilities)
        y_hat = clf.predict_proba(self.X_test_struct)
        assert np.isclose(y_hat, self.y_test_3_proba, atol=1e-3).all()
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y_categorical)
        self._test_fitted_internals(
            clf, True, self.X_struct, self.y_categorical, self.X_n,
            self.majority_label_categorical, self.X_struct_categorical_indices,
            self.X_struct_numerical_indices, self.unique_y_categorical,
            self.unique_y_counts, self.unique_y_probabilities)
        y_hat = clf.predict_proba(self.X_test_struct)
        y_true = np.full(
            (y_hat.shape[0], self.y_test_3_trainig_proba.shape[0]),
            fill_value=self.y_test_3_trainig_proba)
        assert np.isclose(y_hat, y_true, atol=1e-3).all()
示例#3
0
    def test_predict(self):
        """
        Tests KNN predictions (:func:`~fatf.utils.models.models.KNN.predict`).
        """
        # pylint: disable=too-many-statements
        k = 2
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)

        # Unfitted model
        with pytest.raises(UnfittedModelError) as exception_info:
            clf.predict(self.X_test)
        assert self.unfitted_model_error == str(exception_info.value)

        # X is not 2D
        clf.fit(self.X, self.y)
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.predict(self.X_3D)
        assert self.incorrect_shape_error_singular == str(exception_info.value)
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.predict(self.y)
        assert self.incorrect_shape_error_singular == str(exception_info.value)

        # dtype is not similar to the training data
        with pytest.raises(ValueError) as exception_info:
            clf.predict(self.X_cat)
        assert self.value_error_dtype == str(exception_info.value)
        with pytest.raises(ValueError) as exception_info:
            clf.predict(self.X_cat_struct)
        assert self.value_error_dtype == str(exception_info.value)
        with pytest.raises(ValueError) as exception_info:
            clf.predict(self.X_struct)
        assert self.value_error_dtype == str(exception_info.value)

        # Predict 0 examples
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.predict(np.ones((0, 2), dtype=int))
        assert self.incorrect_shape_error_rows == str(exception_info.value)

        # The number of features disagrees...
        # ...unstructured
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.predict(self.X_distances)
        assert str(exception_info.value).startswith(
            self.incorrect_shape_error_columns)
        # ...structured
        clf.clear()
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        with pytest.raises(ValueError) as exception_info:
            clf.predict(self.X_test_struct[['a']])
        assert self.value_error_dtype == str(exception_info.value)

        # Regressor on unstructured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k, mode='r')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label_regressor,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices)
        y_hat = clf.predict(self.X_test)
        assert np.isclose(y_hat, self.y_test_3_regression, atol=1e-3).all()
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k, mode='r')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label_regressor,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices)
        y_hat = clf.predict(self.X_test)
        y_true = np.array(y_hat.shape[0] * [self.majority_label_regressor])
        assert np.isclose(y_hat, y_true, atol=1e-3).all()

        # Regressor on structured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k, mode='r')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label_regressor,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices)
        y_hat = clf.predict(self.X_test_struct)
        assert np.isclose(y_hat, self.y_test_3_regression, atol=1e-3).all()
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k, mode='r')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label_regressor,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices)
        y_hat = clf.predict(self.X_test_struct)
        y_true = np.array(y_hat.shape[0] * [self.majority_label_regressor])
        assert np.isclose(y_hat, y_true, atol=1e-3).all()

        # Numerical classifier on unstructured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices, self.unique_y,
                                    self.unique_y_counts,
                                    self.unique_y_probabilities)
        y_hat = clf.predict(self.X_test)
        assert np.isclose(y_hat, self.y_test_3_classification, atol=1e-3).all()
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices, self.unique_y,
                                    self.unique_y_counts,
                                    self.unique_y_probabilities)
        y_hat = clf.predict(self.X_test)
        y_true = np.array(y_hat.shape[0] * [self.majority_label])
        assert np.isclose(y_hat, y_true, atol=1e-3).all()

        # Numerical classifier on structured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        y_hat = clf.predict(self.X_test_struct)
        assert np.isclose(y_hat, self.y_test_3_classification, atol=1e-3).all()
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        y_hat = clf.predict(self.X_test_struct)
        y_true = np.array(y_hat.shape[0] * [self.majority_label])
        assert np.isclose(y_hat, y_true, atol=1e-3).all()

        # Categorical classifier on unstructured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, self.y_categorical)
        self._test_fitted_internals(
            clf, False, self.X, self.y_categorical, self.X_n,
            self.majority_label_categorical, self.X_categorical_indices,
            self.X_numerical_indices, self.unique_y_categorical,
            self.unique_y_counts, self.unique_y_probabilities)
        y_hat = clf.predict(self.X_test)
        assert np.array_equal(y_hat, self.y_test_3_classification_categorical)
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, self.y_categorical)
        self._test_fitted_internals(
            clf, False, self.X, self.y_categorical, self.X_n,
            self.majority_label_categorical, self.X_categorical_indices,
            self.X_numerical_indices, self.unique_y_categorical,
            self.unique_y_counts, self.unique_y_probabilities)
        y_hat = clf.predict(self.X_test)
        y_true = np.array(y_hat.shape[0] * [self.majority_label_categorical])
        assert np.array_equal(y_hat, y_true)

        # Categorical classifier on structured
        # Sample smaller than k
        k = 3
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y_categorical)
        self._test_fitted_internals(
            clf, True, self.X_struct, self.y_categorical, self.X_n,
            self.majority_label_categorical, self.X_struct_categorical_indices,
            self.X_struct_numerical_indices, self.unique_y_categorical,
            self.unique_y_counts, self.unique_y_probabilities)
        y_hat = clf.predict(self.X_test_struct)
        assert np.array_equal(y_hat, self.y_test_3_classification_categorical)
        # Sample bigger than k
        k = 10
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y_categorical)
        self._test_fitted_internals(
            clf, True, self.X_struct, self.y_categorical, self.X_n,
            self.majority_label_categorical, self.X_struct_categorical_indices,
            self.X_struct_numerical_indices, self.unique_y_categorical,
            self.unique_y_counts, self.unique_y_probabilities)
        y_hat = clf.predict(self.X_test_struct)
        y_true = np.array(y_hat.shape[0] * [self.majority_label_categorical])
        assert np.array_equal(y_hat, y_true)

        # Test when the majority class is ambiguous -- sample smaller than k
        y = np.array([0, 1, 0, 1, 0, 1])  # pylint: disable=invalid-name
        majority_label = 0
        unique_y = np.array([0, 1])
        unique_y_counts = np.array([3, 3])
        unique_y_probabilities = np.array([.5, .5])
        X_test = np.array([[0, 0], [2, 0]])  # pylint: disable=invalid-name
        y_test = np.array([0, 0])
        #
        k = 4
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X, y)
        self._test_fitted_internals(clf, False, self.X, y, self.X_n,
                                    majority_label, self.X_categorical_indices,
                                    self.X_numerical_indices, unique_y,
                                    unique_y_counts, unique_y_probabilities)
        y_hat = clf.predict(X_test)
        assert np.array_equal(y_hat, y_test)
示例#4
0
    def test_get_distances(self):
        """
        Tests distances (:func:`~fatf.utils.models.models.KNN._get_distances`).
        """
        k = 2
        clf = fumm.KNN(k=k)

        def is_unfitted():
            return self._test_unfitted_internals(clf,
                                                 init_k=2,
                                                 init_is_classifier=True)

        is_unfitted()

        # Numerical distances on unstructured
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices, self.unique_y,
                                    self.unique_y_counts,
                                    self.unique_y_probabilities)
        dist = clf._get_distances(self.X_test)
        assert np.isclose(dist, self.X_distances, atol=1e-3).all()

        # Categorical distances on unstructured
        clf.clear()
        is_unfitted()
        clf.fit(self.X_cat, self.y)
        self._test_fitted_internals(clf, False, self.X_cat, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_cat_categorical_indices,
                                    self.X_cat_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        dist = clf._get_distances(self.X_cat_test)
        assert np.isclose(dist, self.X_cat_distances, atol=1e-3).all()

        # Numerical distances on structured
        clf.clear()
        is_unfitted()
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        dist = clf._get_distances(self.X_test_struct)
        assert np.isclose(dist, self.X_distances, atol=1e-3).all()

        # Categorical distances on structured
        clf.clear()
        is_unfitted()
        clf.fit(self.X_cat_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_cat_struct, self.y,
                                    self.X_n, self.majority_label,
                                    self.X_cat_struct_categorical_indices,
                                    self.X_cat_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        dist = clf._get_distances(self.X_cat_struct_test)
        assert np.isclose(dist, self.X_cat_distances, atol=1e-3).all()

        # Numerical-categorical distances on structured
        clf.clear()
        is_unfitted()
        clf.fit(self.X_mix, self.y)
        self._test_fitted_internals(clf, True, self.X_mix, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_mix_categorical_indices,
                                    self.X_mix_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)
        dist = clf._get_distances(self.X_test_mix)
        assert np.isclose(dist, self.X_mix_distances, atol=1e-3).all()
示例#5
0
    def test_fit(self):
        """
        Tests KNN fitting (:func:`~fatf.utils.models.models.KNN.fit`).
        """
        # pylint: disable=too-many-statements
        k = 2
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k)
        clf.fit(self.X, self.y)
        self._test_fitted_internals(clf, False, self.X, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_categorical_indices,
                                    self.X_numerical_indices, self.unique_y,
                                    self.unique_y_counts,
                                    self.unique_y_probabilities)

        # Fitting a pre-fitted model
        with pytest.raises(PrefittedModelError) as exception_info:
            clf.fit(self.X, self.y)
        assert self.prefitted_model_error == str(exception_info.value)

        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k)
        # X is not 2D
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.fit(self.X_3D, self.y)
        assert self.incorrect_shape_error_2d == str(exception_info.value)
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.fit(self.X_3D, self.X)
        assert self.incorrect_shape_error_2d == str(exception_info.value)
        # y is not 1D
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.fit(self.X, self.X_3D)
        assert self.incorrect_shape_error_1d == str(exception_info.value)

        # 0 examples
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.fit(np.ndarray((0, 5)), self.y)
        assert self.incorrect_shape_error_X0 == str(exception_info.value)
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.fit(np.ndarray((0, ), dtype=[('a', str), ('b', int)]), self.y)
        assert self.incorrect_shape_error_X0 == str(exception_info.value)

        # 0 features
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.fit(np.ndarray((5, 0)), self.y)
        assert self.incorrect_shape_error_X1 == str(exception_info.value)

        # Test whether the shape of X agrees with the shape of y
        with pytest.raises(IncorrectShapeError) as exception_info:
            clf.fit(self.X, self.X_numerical_indices)
        assert self.incorrect_shape_error_Xy == str(exception_info.value)

        # Fitting regressor to a categorical label vector
        clf = fumm.KNN(k=k, mode='r')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
        with pytest.raises(TypeError) as exception_info:
            y_pred = np.array(self.X.shape[0] * ['a'])
            clf.fit(self.X, y_pred)
        assert self.type_error_regressor == str(exception_info.value)

        # Fitting to a structured numerical array
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_struct, self.y)
        self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_struct_categorical_indices,
                                    self.X_struct_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)

        # Fitting to a structured mixed numerical-categorical array
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_mix, self.y)
        self._test_fitted_internals(clf, True, self.X_mix, self.y, self.X_n,
                                    self.majority_label,
                                    self.X_mix_categorical_indices,
                                    self.X_mix_numerical_indices,
                                    self.unique_y, self.unique_y_counts,
                                    self.unique_y_probabilities)

        # Fit a regressor to a numerical data and check internal parameters
        clf = fumm.KNN(k=k, mode='regressor')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
        clf.fit(self.X_short, self.y_short_numerical)
        self._test_fitted_internals(
            clf, False, self.X_short, self.y_short_numerical, self.X_short_n,
            self.short_numerical_majority_label_regressor,
            self.X_short_categorical_indices, self.X_short_numerical_indices)

        # Fit a classifier to a numerical data and check internal parameters
        clf = fumm.KNN(k=k, mode='classifier')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_short, self.y_short_numerical)
        self._test_fitted_internals(
            clf, False, self.X_short, self.y_short_numerical, self.X_short_n,
            self.short_numerical_majority_label_classifier,
            self.X_short_categorical_indices, self.X_short_numerical_indices,
            self.short_numerical_unique_y,
            self.short_numerical_unique_y_counts,
            self.short_numerical_unique_y_probabilities)

        # Fit a classifier to a categorical data and check internal parameters
        clf = fumm.KNN(k=k, mode='classifier')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf.fit(self.X_short, self.y_short)
        self._test_fitted_internals(
            clf, False, self.X_short, self.y_short, self.X_short_n,
            self.short_majority_label, self.X_short_categorical_indices,
            self.X_short_numerical_indices, self.short_unique_y,
            self.short_unique_y_counts, self.short_unique_y_probabilities)
示例#6
0
    def test_knn(self):
        """
        Tests KNN initialisation.
        """
        # k is not an integer
        with pytest.raises(TypeError) as exception_info:
            clf = fumm.KNN(k=None)
        assert str(exception_info.value) == self.type_error_k
        with pytest.raises(TypeError) as exception_info:
            clf = fumm.KNN(k='k')
        assert str(exception_info.value) == self.type_error_k
        with pytest.raises(TypeError) as exception_info:
            clf = fumm.KNN(k=-5.5)
        assert str(exception_info.value) == self.type_error_k

        # k is a negative integer
        with pytest.raises(ValueError) as exception_info:
            clf = fumm.KNN(k=-5)
        assert str(exception_info.value) == self.value_error_k

        # mode specifier is wrong
        with pytest.raises(ValueError) as exception_info:
            clf = fumm.KNN(k=5, mode=object())
        assert str(exception_info.value) == self.value_error_mode
        with pytest.raises(ValueError) as exception_info:
            clf = fumm.KNN(k=5, mode=3)
        assert str(exception_info.value) == self.value_error_mode
        with pytest.raises(ValueError) as exception_info:
            clf = fumm.KNN(k=5, mode='C')
        assert str(exception_info.value) == self.value_error_mode

        clf = fumm.KNN()
        self._test_unfitted_internals(clf,
                                      init_k=self.k,
                                      init_is_classifier=True)

        k = 8
        clf = fumm.KNN(k=k)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)

        clf = fumm.KNN(k=k, mode=None)
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf = fumm.KNN(k=k, mode='c')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
        clf = fumm.KNN(k=k, mode='classifier')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)

        clf = fumm.KNN(k=k, mode='r')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
        clf = fumm.KNN(k=k, mode='regressor')
        self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
示例#7
0
def test_validate_input_local_fidelity():
    """
    Tests the ``_validate_input_local_fidelity`` function.

    This function tests the :func:`fatf.utils.transparency.\
surrogate_evaluation._validate_input_local_fidelity` function.
    """
    incorrect_shape_dataset = ('The input dataset must be a 2-dimensional '
                               'numpy array.')
    type_error_dataset = ('The input dataset must be of a base type -- '
                          'numbers and/or strings.')
    incorrect_shape_datarow = ('The data_row must either be a 1-dimensional '
                               'numpy array or a numpy void object for '
                               'structured data rows.')
    incorrect_dtype_data = ('The dtype of the data_row is too different from '
                            'the dtype of the dataset array.')
    datarow_features_error = ('The data_row must contain the same number of '
                              'features as the dataset.')

    global_model_incompatible = ('The global predictive function must have '
                                 'exactly *one* required parameter to work '
                                 'with this metric.')
    global_model_type = ('The global_predictive_function should be a Python '
                         'callable, e.g., a Python function.')
    local_model_incompatible = ('The local predictive function must have '
                                'exactly *one* required parameter to work '
                                'with this metric.')
    local_model_type = ('The local_predictive_function should be a Python '
                        'callable, e.g., a Python function.')

    metric_param_error = ('The metric_function must take exactly *two* '
                          'required parameters.')
    metric_type_error = ('The metric_function should be a Python callable, '
                         'e.g., a Python function.')

    explained_class_value_error = ('The explained_class_index parameter is '
                                   'negative or larger than the number of '
                                   'classes output by the global '
                                   'probabilistic model.')
    explained_class_type_error = ('For probabilistic global models, i.e., '
                                  'global predictive functions, the '
                                  'explained_class_index parameter has to be '
                                  'an integer or None.')
    explained_class_warning = ('The explained_class_index parameter is not '
                               'None and will be ignored since the global '
                               'model is not probabilistic.')

    features_index_error = ('The following column indices are invalid for '
                            'the input dataset: {}.')
    features_type_error = ('The explained_feature_indices parameter must be '
                           'a Python list or None.')

    fidelity_radius_type_error = ('The fidelity_radius_percentage must be an '
                                  'integer between 1 and 100.')
    fidelity_radius_value_error = ('The fidelity_radius_percentage must be an '
                                   'integer between 1 and 100.')

    samples_number_value_error = ('The samples_number must be a positive '
                                  'integer.')
    samples_number_type_error = 'The samples_number must be an integer.'

    with pytest.raises(IncorrectShapeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY[0], None, None,
                                            None, None, None, None, None, None)
    assert str(exin.value) == incorrect_shape_dataset

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(np.array([[None]]), None, None,
                                            None, None, None, None, None, None)
    assert str(exin.value) == type_error_dataset

    with pytest.raises(IncorrectShapeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY, None, None,
                                            None, None, None, None, None)
    assert str(exin.value) == incorrect_shape_datarow

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(
            NUMERICAL_NP_ARRAY, np.array(['0']), None, None, None, None, None,
            None, None)  # yapf: disable
    assert str(exin.value) == incorrect_dtype_data

    with pytest.raises(IncorrectShapeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0][0:2], None,
                                            None, None, None, None, None, None)
    assert str(exin.value) == datarow_features_error

    def predict(x):
        return np.ones(x.shape[0])

    def predict_invalid(x_1, x_2):
        pass  # pragma: no cover

    def predict_proba(x):
        return np.ones((x.shape[0], 3))

    def predict_proba_invalid():
        pass  # pragma: no cover

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], None, None,
                                            None, None, None, None, None)
    assert str(exin.value) == global_model_type
    with pytest.raises(IncompatibleModelError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_invalid, None, None, None,
                                            None, None, None)
    assert str(exin.value) == global_model_incompatible

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            None, None, None, None, None, None)
    assert str(exin.value) == local_model_type
    with pytest.raises(IncompatibleModelError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_proba,
                                            predict_proba_invalid, None, None,
                                            None, None, None)
    assert str(exin.value) == local_model_incompatible

    def invalid_metric(x):
        pass  # pragma: no cover

    def metric(x_1, x_2):
        pass  # pragma: no cover

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, None, None, None, None,
                                            None)
    assert str(exin.value) == metric_type_error
    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, invalid_metric, None,
                                            None, None, None)
    assert str(exin.value) == metric_param_error

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_proba, predict, metric,
                                            '1', None, None, None)
    assert str(exin.value) == explained_class_type_error
    with pytest.raises(ValueError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_proba, predict, metric, -1,
                                            None, None, None)
    assert str(exin.value) == explained_class_value_error
    with pytest.raises(ValueError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_proba, predict, metric, 3,
                                            None, None, None)
    assert str(exin.value) == explained_class_value_error
    #
    with pytest.warns(UserWarning) as w:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, metric, 3, None, 1, 1)
    assert len(w) == 1
    assert str(w[0].message) == explained_class_warning

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_proba,
                                            predict, metric, None,
                                            np.array([10, 11]), None, None)
    assert str(exin.value) == features_type_error
    with pytest.raises(IndexError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, metric, None, [10, 11],
                                            None, None)
    assert str(exin.value) == features_index_error.format(np.array([10, 11]))

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_proba, predict, metric, 1,
                                            [1, 2], 'a', None)
    assert str(exin.value) == fidelity_radius_type_error
    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_proba, predict, metric, 1,
                                            [1, 2], None, None)
    assert str(exin.value) == fidelity_radius_type_error
    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            predict_proba, predict, metric, 1,
                                            [1, 2], 55.0, None)
    assert str(exin.value) == fidelity_radius_type_error
    #
    with pytest.raises(ValueError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, metric, None, [1, 2], 0,
                                            None)
    assert str(exin.value) == fidelity_radius_value_error
    with pytest.raises(ValueError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, metric, None, [1, 2], 101,
                                            None)
    assert str(exin.value) == fidelity_radius_value_error

    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, metric, None, None, 100,
                                            None)
    assert str(exin.value) == samples_number_type_error
    with pytest.raises(TypeError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, metric, None, None, 100,
                                            55.0)
    assert str(exin.value) == samples_number_type_error
    #
    with pytest.raises(ValueError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, metric, None, None, 100,
                                            0)
    assert str(exin.value) == samples_number_value_error
    with pytest.raises(ValueError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0], predict,
                                            predict, metric, None, None, 100,
                                            -42)
    assert str(exin.value) == samples_number_value_error

    clf = fumm.KNN(k=3)
    clf.fit(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET)

    with pytest.raises(ValueError) as exin:
        futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                            NUMERICAL_NP_ARRAY[0],
                                            clf.predict_proba, predict, metric,
                                            10, None, 10, 1)
    assert str(exin.value) == explained_class_value_error

    # All OK
    assert futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY,
                                               NUMERICAL_NP_ARRAY[0],
                                               clf.predict_proba, predict,
                                               metric, 1, [0, 1], 10, 1)
示例#8
0
def test_local_fidelity_score():
    """
    Tests the ``local_fidelity_score`` function.

    This function tests the
    :func:`fatf.utils.transparency.surrogate_evaluation.local_fidelity_score`
    function.
    """
    accuracy_warning = ('Some of the given labels are not present in either '
                        'of the input arrays: {}.')
    fatf.setup_random_seed()

    def accuracy(global_predictions, local_predictions):
        global_predictions[global_predictions >= 0.5] = 1
        global_predictions[global_predictions < 0.5] = 0

        local_predictions[local_predictions >= 0.5] = 1
        local_predictions[local_predictions < 0.5] = 0

        confusion_matrix = fumt.get_confusion_matrix(global_predictions,
                                                     local_predictions,
                                                     labels=[0, 1])
        accuracy = fummet.accuracy(confusion_matrix)

        return accuracy

    def accuracy_prob(global_predictions,
                      local_predictions,
                      global_proba=True,
                      local_proba=True):
        if global_proba:
            global_predictions = np.argmax(global_predictions, axis=1)
        if local_proba:
            local_predictions = np.argmax(local_predictions, axis=1)

        confusion_matrix = fumt.get_confusion_matrix(global_predictions,
                                                     local_predictions,
                                                     labels=[0, 1, 2])
        accuracy = fummet.accuracy(confusion_matrix)

        return accuracy

    def accuracy_proba_np(global_predictions, local_predictions):
        return accuracy_prob(global_predictions,
                             local_predictions,
                             global_proba=False,
                             local_proba=True)

    def accuracy_proba_nn(global_predictions, local_predictions):
        return accuracy_prob(global_predictions,
                             local_predictions,
                             global_proba=False,
                             local_proba=False)

    def reg_dist(global_predictions, local_predictions):
        return (global_predictions - local_predictions).sum()

    predictor = fumm.KNN(k=3)
    predictor.fit(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET)

    regressor = fumm.KNN(k=3, mode='regressor')
    regressor.fit(NUMERICAL_NP_ARRAY_LOCAL, NUMERICAL_NP_ARRAY_LOCAL_TARGET)

    regressor_23 = fumm.KNN(k=3, mode='regressor')
    regressor_23.fit(NUMERICAL_NP_ARRAY_LOCAL[:, [2, 3]],
                     NUMERICAL_NP_ARRAY_LOCAL_TARGET)

    # Structured array
    predictor_struct = fumm.KNN(k=3)
    predictor_struct.fit(NUMERICAL_STRUCT_ARRAY, NUMERICAL_NP_ARRAY_TARGET)
    #
    regressor_struct_cd = fumm.KNN(k=3, mode='regressor')
    regressor_struct_cd.fit(NUMERICAL_STRUCT_ARRAY_LOCAL[['c', 'd']],
                            NUMERICAL_NP_ARRAY_LOCAL_TARGET)

    # Global: probabilistic...
    # ...local: regressor
    comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                           NUMERICAL_NP_ARRAY[0],
                                           predictor.predict_proba,
                                           regressor.predict, accuracy, 2)
    assert np.isclose(comparison, 0.26)
    # ...local: classifier
    comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                           NUMERICAL_NP_ARRAY[0],
                                           predictor.predict_proba,
                                           predictor.predict, accuracy, 2)
    assert np.isclose(comparison, 1.0)
    # ...local: probabilistic
    with pytest.warns(UserWarning) as w:
        comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                               NUMERICAL_NP_ARRAY[0],
                                               predictor.predict_proba,
                                               predictor.predict_proba,
                                               accuracy_prob)
    assert len(w) == 1
    assert str(w[0].message) == accuracy_warning.format(set([1]))
    assert np.isclose(comparison, 1.0)

    # Global: classifier...
    # ...local: probabilistic
    with pytest.warns(UserWarning) as w:
        comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                               NUMERICAL_NP_ARRAY[0],
                                               predictor.predict,
                                               predictor.predict_proba,
                                               accuracy_proba_np)
    assert len(w) == 1
    assert str(w[0].message) == accuracy_warning.format(set([1]))
    assert np.isclose(comparison, 1.0)
    # ...local: classifier
    with pytest.warns(UserWarning) as w:
        comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                               NUMERICAL_NP_ARRAY[0],
                                               predictor.predict,
                                               predictor.predict,
                                               accuracy_proba_nn)
    assert len(w) == 1
    assert str(w[0].message) == accuracy_warning.format(set([1]))
    assert np.isclose(comparison, 1.0)

    # Global: regressor...
    # ...local: regressor
    comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                           NUMERICAL_NP_ARRAY[0],
                                           regressor.predict,
                                           regressor_23.predict,
                                           reg_dist,
                                           explained_feature_indices=[2, 3])
    assert np.isclose(comparison, 0)

    # Structured array
    # Global: probabilistic...
    # ...local: regressor
    comparison = futs.local_fidelity_score(
        NUMERICAL_STRUCT_ARRAY,
        NUMERICAL_STRUCT_ARRAY[0],
        predictor_struct.predict_proba,
        regressor_struct_cd.predict,
        accuracy,
        0,
        explained_feature_indices=['c', 'd'])
    assert np.isclose(comparison, 0.94)