def test_clear(self): """ Tests KNN clearing (:func:`~fatf.utils.models.models.KNN.clear`). """ k = 2 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) # Clearing an unfitted model with pytest.raises(UnfittedModelError) as exception_info: clf.clear() assert self.unfitted_model_error == str(exception_info.value) # Clearing a fitted model clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label, self.X_categorical_indices, self.X_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) clf.clear() self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True)
def test_predict_proba(self): """ Tests probas (:func:`~fatf.utils.models.models.KNN.predict_proba`). """ # pylint: disable=too-many-statements # Regressor error k = 3 clf = fumm.KNN(k=k, mode='r') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False) clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label_regressor, self.X_categorical_indices, self.X_numerical_indices) with pytest.raises(RuntimeError) as exception_info: clf.predict_proba(self.X_test) assert str(exception_info.value) == self.runtime_error # Test other errors... k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) # Unfitted model with pytest.raises(UnfittedModelError) as exception_info: clf.predict_proba(self.X_test) assert self.unfitted_model_error == str(exception_info.value) # ... clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label, self.X_categorical_indices, self.X_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) # X is not 2D with pytest.raises(IncorrectShapeError) as exception_info: clf.predict_proba(self.X_3D) assert self.incorrect_shape_error_singular == str(exception_info.value) with pytest.raises(IncorrectShapeError) as exception_info: clf.predict_proba(self.y) assert self.incorrect_shape_error_singular == str(exception_info.value) # dtype is not similar to the training data with pytest.raises(ValueError) as exception_info: clf.predict_proba(self.X_cat) assert self.value_error_dtype == str(exception_info.value) with pytest.raises(ValueError) as exception_info: clf.predict_proba(self.X_cat_struct) assert self.value_error_dtype == str(exception_info.value) with pytest.raises(ValueError) as exception_info: clf.predict_proba(self.X_struct) assert self.value_error_dtype == str(exception_info.value) # Predict 0 examples with pytest.raises(IncorrectShapeError) as exception_info: clf.predict_proba(np.ones((0, 2), dtype=int)) assert self.incorrect_shape_error_rows == str(exception_info.value) # The number of features disagrees... # ...unstructured with pytest.raises(IncorrectShapeError) as exception_info: clf.predict_proba(self.X_distances) assert str(exception_info.value).startswith( self.incorrect_shape_error_columns) # ...structured clf.clear() self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) with pytest.raises(ValueError) as exception_info: clf.predict_proba(self.X_test_struct[['a']]) assert self.value_error_dtype == str(exception_info.value) # Numerical classifier on unstructured # Sample smaller than k k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label, self.X_categorical_indices, self.X_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict_proba(self.X_test) assert np.isclose(y_hat, self.y_test_3_proba, atol=1e-3).all() # Sample bigger than k k = 10 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label, self.X_categorical_indices, self.X_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict_proba(self.X_test) y_true = np.full( (y_hat.shape[0], self.y_test_3_trainig_proba.shape[0]), fill_value=self.y_test_3_trainig_proba) assert np.isclose(y_hat, y_true, atol=1e-3).all() # Numerical classifier on structured # Sample smaller than k k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict_proba(self.X_test_struct) assert np.isclose(y_hat, self.y_test_3_proba, atol=1e-3).all() # Sample bigger than k k = 10 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict_proba(self.X_test_struct) y_true = np.full( (y_hat.shape[0], self.y_test_3_trainig_proba.shape[0]), fill_value=self.y_test_3_trainig_proba) assert np.isclose(y_hat, y_true, atol=1e-3).all() # Categorical classifier on unstructured # Sample smaller than k k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, self.y_categorical) self._test_fitted_internals( clf, False, self.X, self.y_categorical, self.X_n, self.majority_label_categorical, self.X_categorical_indices, self.X_numerical_indices, self.unique_y_categorical, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict_proba(self.X_test) assert np.isclose(y_hat, self.y_test_3_proba, atol=1e-3).all() # Sample bigger than k k = 10 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, self.y_categorical) self._test_fitted_internals( clf, False, self.X, self.y_categorical, self.X_n, self.majority_label_categorical, self.X_categorical_indices, self.X_numerical_indices, self.unique_y_categorical, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict_proba(self.X_test) y_true = np.full( (y_hat.shape[0], self.y_test_3_trainig_proba.shape[0]), fill_value=self.y_test_3_trainig_proba) assert np.isclose(y_hat, y_true, atol=1e-3).all() # Categorical classifier on structured # Sample smaller than k k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y_categorical) self._test_fitted_internals( clf, True, self.X_struct, self.y_categorical, self.X_n, self.majority_label_categorical, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y_categorical, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict_proba(self.X_test_struct) assert np.isclose(y_hat, self.y_test_3_proba, atol=1e-3).all() # Sample bigger than k k = 10 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y_categorical) self._test_fitted_internals( clf, True, self.X_struct, self.y_categorical, self.X_n, self.majority_label_categorical, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y_categorical, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict_proba(self.X_test_struct) y_true = np.full( (y_hat.shape[0], self.y_test_3_trainig_proba.shape[0]), fill_value=self.y_test_3_trainig_proba) assert np.isclose(y_hat, y_true, atol=1e-3).all()
def test_predict(self): """ Tests KNN predictions (:func:`~fatf.utils.models.models.KNN.predict`). """ # pylint: disable=too-many-statements k = 2 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) # Unfitted model with pytest.raises(UnfittedModelError) as exception_info: clf.predict(self.X_test) assert self.unfitted_model_error == str(exception_info.value) # X is not 2D clf.fit(self.X, self.y) with pytest.raises(IncorrectShapeError) as exception_info: clf.predict(self.X_3D) assert self.incorrect_shape_error_singular == str(exception_info.value) with pytest.raises(IncorrectShapeError) as exception_info: clf.predict(self.y) assert self.incorrect_shape_error_singular == str(exception_info.value) # dtype is not similar to the training data with pytest.raises(ValueError) as exception_info: clf.predict(self.X_cat) assert self.value_error_dtype == str(exception_info.value) with pytest.raises(ValueError) as exception_info: clf.predict(self.X_cat_struct) assert self.value_error_dtype == str(exception_info.value) with pytest.raises(ValueError) as exception_info: clf.predict(self.X_struct) assert self.value_error_dtype == str(exception_info.value) # Predict 0 examples with pytest.raises(IncorrectShapeError) as exception_info: clf.predict(np.ones((0, 2), dtype=int)) assert self.incorrect_shape_error_rows == str(exception_info.value) # The number of features disagrees... # ...unstructured with pytest.raises(IncorrectShapeError) as exception_info: clf.predict(self.X_distances) assert str(exception_info.value).startswith( self.incorrect_shape_error_columns) # ...structured clf.clear() self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) with pytest.raises(ValueError) as exception_info: clf.predict(self.X_test_struct[['a']]) assert self.value_error_dtype == str(exception_info.value) # Regressor on unstructured # Sample smaller than k k = 3 clf = fumm.KNN(k=k, mode='r') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False) clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label_regressor, self.X_categorical_indices, self.X_numerical_indices) y_hat = clf.predict(self.X_test) assert np.isclose(y_hat, self.y_test_3_regression, atol=1e-3).all() # Sample bigger than k k = 10 clf = fumm.KNN(k=k, mode='r') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False) clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label_regressor, self.X_categorical_indices, self.X_numerical_indices) y_hat = clf.predict(self.X_test) y_true = np.array(y_hat.shape[0] * [self.majority_label_regressor]) assert np.isclose(y_hat, y_true, atol=1e-3).all() # Regressor on structured # Sample smaller than k k = 3 clf = fumm.KNN(k=k, mode='r') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label_regressor, self.X_struct_categorical_indices, self.X_struct_numerical_indices) y_hat = clf.predict(self.X_test_struct) assert np.isclose(y_hat, self.y_test_3_regression, atol=1e-3).all() # Sample bigger than k k = 10 clf = fumm.KNN(k=k, mode='r') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label_regressor, self.X_struct_categorical_indices, self.X_struct_numerical_indices) y_hat = clf.predict(self.X_test_struct) y_true = np.array(y_hat.shape[0] * [self.majority_label_regressor]) assert np.isclose(y_hat, y_true, atol=1e-3).all() # Numerical classifier on unstructured # Sample smaller than k k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label, self.X_categorical_indices, self.X_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict(self.X_test) assert np.isclose(y_hat, self.y_test_3_classification, atol=1e-3).all() # Sample bigger than k k = 10 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label, self.X_categorical_indices, self.X_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict(self.X_test) y_true = np.array(y_hat.shape[0] * [self.majority_label]) assert np.isclose(y_hat, y_true, atol=1e-3).all() # Numerical classifier on structured # Sample smaller than k k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict(self.X_test_struct) assert np.isclose(y_hat, self.y_test_3_classification, atol=1e-3).all() # Sample bigger than k k = 10 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict(self.X_test_struct) y_true = np.array(y_hat.shape[0] * [self.majority_label]) assert np.isclose(y_hat, y_true, atol=1e-3).all() # Categorical classifier on unstructured # Sample smaller than k k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, self.y_categorical) self._test_fitted_internals( clf, False, self.X, self.y_categorical, self.X_n, self.majority_label_categorical, self.X_categorical_indices, self.X_numerical_indices, self.unique_y_categorical, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict(self.X_test) assert np.array_equal(y_hat, self.y_test_3_classification_categorical) # Sample bigger than k k = 10 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, self.y_categorical) self._test_fitted_internals( clf, False, self.X, self.y_categorical, self.X_n, self.majority_label_categorical, self.X_categorical_indices, self.X_numerical_indices, self.unique_y_categorical, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict(self.X_test) y_true = np.array(y_hat.shape[0] * [self.majority_label_categorical]) assert np.array_equal(y_hat, y_true) # Categorical classifier on structured # Sample smaller than k k = 3 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y_categorical) self._test_fitted_internals( clf, True, self.X_struct, self.y_categorical, self.X_n, self.majority_label_categorical, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y_categorical, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict(self.X_test_struct) assert np.array_equal(y_hat, self.y_test_3_classification_categorical) # Sample bigger than k k = 10 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y_categorical) self._test_fitted_internals( clf, True, self.X_struct, self.y_categorical, self.X_n, self.majority_label_categorical, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y_categorical, self.unique_y_counts, self.unique_y_probabilities) y_hat = clf.predict(self.X_test_struct) y_true = np.array(y_hat.shape[0] * [self.majority_label_categorical]) assert np.array_equal(y_hat, y_true) # Test when the majority class is ambiguous -- sample smaller than k y = np.array([0, 1, 0, 1, 0, 1]) # pylint: disable=invalid-name majority_label = 0 unique_y = np.array([0, 1]) unique_y_counts = np.array([3, 3]) unique_y_probabilities = np.array([.5, .5]) X_test = np.array([[0, 0], [2, 0]]) # pylint: disable=invalid-name y_test = np.array([0, 0]) # k = 4 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X, y) self._test_fitted_internals(clf, False, self.X, y, self.X_n, majority_label, self.X_categorical_indices, self.X_numerical_indices, unique_y, unique_y_counts, unique_y_probabilities) y_hat = clf.predict(X_test) assert np.array_equal(y_hat, y_test)
def test_get_distances(self): """ Tests distances (:func:`~fatf.utils.models.models.KNN._get_distances`). """ k = 2 clf = fumm.KNN(k=k) def is_unfitted(): return self._test_unfitted_internals(clf, init_k=2, init_is_classifier=True) is_unfitted() # Numerical distances on unstructured clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label, self.X_categorical_indices, self.X_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) dist = clf._get_distances(self.X_test) assert np.isclose(dist, self.X_distances, atol=1e-3).all() # Categorical distances on unstructured clf.clear() is_unfitted() clf.fit(self.X_cat, self.y) self._test_fitted_internals(clf, False, self.X_cat, self.y, self.X_n, self.majority_label, self.X_cat_categorical_indices, self.X_cat_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) dist = clf._get_distances(self.X_cat_test) assert np.isclose(dist, self.X_cat_distances, atol=1e-3).all() # Numerical distances on structured clf.clear() is_unfitted() clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) dist = clf._get_distances(self.X_test_struct) assert np.isclose(dist, self.X_distances, atol=1e-3).all() # Categorical distances on structured clf.clear() is_unfitted() clf.fit(self.X_cat_struct, self.y) self._test_fitted_internals(clf, True, self.X_cat_struct, self.y, self.X_n, self.majority_label, self.X_cat_struct_categorical_indices, self.X_cat_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) dist = clf._get_distances(self.X_cat_struct_test) assert np.isclose(dist, self.X_cat_distances, atol=1e-3).all() # Numerical-categorical distances on structured clf.clear() is_unfitted() clf.fit(self.X_mix, self.y) self._test_fitted_internals(clf, True, self.X_mix, self.y, self.X_n, self.majority_label, self.X_mix_categorical_indices, self.X_mix_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) dist = clf._get_distances(self.X_test_mix) assert np.isclose(dist, self.X_mix_distances, atol=1e-3).all()
def test_fit(self): """ Tests KNN fitting (:func:`~fatf.utils.models.models.KNN.fit`). """ # pylint: disable=too-many-statements k = 2 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k) clf.fit(self.X, self.y) self._test_fitted_internals(clf, False, self.X, self.y, self.X_n, self.majority_label, self.X_categorical_indices, self.X_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) # Fitting a pre-fitted model with pytest.raises(PrefittedModelError) as exception_info: clf.fit(self.X, self.y) assert self.prefitted_model_error == str(exception_info.value) clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k) # X is not 2D with pytest.raises(IncorrectShapeError) as exception_info: clf.fit(self.X_3D, self.y) assert self.incorrect_shape_error_2d == str(exception_info.value) with pytest.raises(IncorrectShapeError) as exception_info: clf.fit(self.X_3D, self.X) assert self.incorrect_shape_error_2d == str(exception_info.value) # y is not 1D with pytest.raises(IncorrectShapeError) as exception_info: clf.fit(self.X, self.X_3D) assert self.incorrect_shape_error_1d == str(exception_info.value) # 0 examples with pytest.raises(IncorrectShapeError) as exception_info: clf.fit(np.ndarray((0, 5)), self.y) assert self.incorrect_shape_error_X0 == str(exception_info.value) with pytest.raises(IncorrectShapeError) as exception_info: clf.fit(np.ndarray((0, ), dtype=[('a', str), ('b', int)]), self.y) assert self.incorrect_shape_error_X0 == str(exception_info.value) # 0 features with pytest.raises(IncorrectShapeError) as exception_info: clf.fit(np.ndarray((5, 0)), self.y) assert self.incorrect_shape_error_X1 == str(exception_info.value) # Test whether the shape of X agrees with the shape of y with pytest.raises(IncorrectShapeError) as exception_info: clf.fit(self.X, self.X_numerical_indices) assert self.incorrect_shape_error_Xy == str(exception_info.value) # Fitting regressor to a categorical label vector clf = fumm.KNN(k=k, mode='r') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False) with pytest.raises(TypeError) as exception_info: y_pred = np.array(self.X.shape[0] * ['a']) clf.fit(self.X, y_pred) assert self.type_error_regressor == str(exception_info.value) # Fitting to a structured numerical array clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_struct, self.y) self._test_fitted_internals(clf, True, self.X_struct, self.y, self.X_n, self.majority_label, self.X_struct_categorical_indices, self.X_struct_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) # Fitting to a structured mixed numerical-categorical array clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_mix, self.y) self._test_fitted_internals(clf, True, self.X_mix, self.y, self.X_n, self.majority_label, self.X_mix_categorical_indices, self.X_mix_numerical_indices, self.unique_y, self.unique_y_counts, self.unique_y_probabilities) # Fit a regressor to a numerical data and check internal parameters clf = fumm.KNN(k=k, mode='regressor') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False) clf.fit(self.X_short, self.y_short_numerical) self._test_fitted_internals( clf, False, self.X_short, self.y_short_numerical, self.X_short_n, self.short_numerical_majority_label_regressor, self.X_short_categorical_indices, self.X_short_numerical_indices) # Fit a classifier to a numerical data and check internal parameters clf = fumm.KNN(k=k, mode='classifier') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_short, self.y_short_numerical) self._test_fitted_internals( clf, False, self.X_short, self.y_short_numerical, self.X_short_n, self.short_numerical_majority_label_classifier, self.X_short_categorical_indices, self.X_short_numerical_indices, self.short_numerical_unique_y, self.short_numerical_unique_y_counts, self.short_numerical_unique_y_probabilities) # Fit a classifier to a categorical data and check internal parameters clf = fumm.KNN(k=k, mode='classifier') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf.fit(self.X_short, self.y_short) self._test_fitted_internals( clf, False, self.X_short, self.y_short, self.X_short_n, self.short_majority_label, self.X_short_categorical_indices, self.X_short_numerical_indices, self.short_unique_y, self.short_unique_y_counts, self.short_unique_y_probabilities)
def test_knn(self): """ Tests KNN initialisation. """ # k is not an integer with pytest.raises(TypeError) as exception_info: clf = fumm.KNN(k=None) assert str(exception_info.value) == self.type_error_k with pytest.raises(TypeError) as exception_info: clf = fumm.KNN(k='k') assert str(exception_info.value) == self.type_error_k with pytest.raises(TypeError) as exception_info: clf = fumm.KNN(k=-5.5) assert str(exception_info.value) == self.type_error_k # k is a negative integer with pytest.raises(ValueError) as exception_info: clf = fumm.KNN(k=-5) assert str(exception_info.value) == self.value_error_k # mode specifier is wrong with pytest.raises(ValueError) as exception_info: clf = fumm.KNN(k=5, mode=object()) assert str(exception_info.value) == self.value_error_mode with pytest.raises(ValueError) as exception_info: clf = fumm.KNN(k=5, mode=3) assert str(exception_info.value) == self.value_error_mode with pytest.raises(ValueError) as exception_info: clf = fumm.KNN(k=5, mode='C') assert str(exception_info.value) == self.value_error_mode clf = fumm.KNN() self._test_unfitted_internals(clf, init_k=self.k, init_is_classifier=True) k = 8 clf = fumm.KNN(k=k) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf = fumm.KNN(k=k, mode=None) self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf = fumm.KNN(k=k, mode='c') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf = fumm.KNN(k=k, mode='classifier') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=True) clf = fumm.KNN(k=k, mode='r') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False) clf = fumm.KNN(k=k, mode='regressor') self._test_unfitted_internals(clf, init_k=k, init_is_classifier=False)
def test_validate_input_local_fidelity(): """ Tests the ``_validate_input_local_fidelity`` function. This function tests the :func:`fatf.utils.transparency.\ surrogate_evaluation._validate_input_local_fidelity` function. """ incorrect_shape_dataset = ('The input dataset must be a 2-dimensional ' 'numpy array.') type_error_dataset = ('The input dataset must be of a base type -- ' 'numbers and/or strings.') incorrect_shape_datarow = ('The data_row must either be a 1-dimensional ' 'numpy array or a numpy void object for ' 'structured data rows.') incorrect_dtype_data = ('The dtype of the data_row is too different from ' 'the dtype of the dataset array.') datarow_features_error = ('The data_row must contain the same number of ' 'features as the dataset.') global_model_incompatible = ('The global predictive function must have ' 'exactly *one* required parameter to work ' 'with this metric.') global_model_type = ('The global_predictive_function should be a Python ' 'callable, e.g., a Python function.') local_model_incompatible = ('The local predictive function must have ' 'exactly *one* required parameter to work ' 'with this metric.') local_model_type = ('The local_predictive_function should be a Python ' 'callable, e.g., a Python function.') metric_param_error = ('The metric_function must take exactly *two* ' 'required parameters.') metric_type_error = ('The metric_function should be a Python callable, ' 'e.g., a Python function.') explained_class_value_error = ('The explained_class_index parameter is ' 'negative or larger than the number of ' 'classes output by the global ' 'probabilistic model.') explained_class_type_error = ('For probabilistic global models, i.e., ' 'global predictive functions, the ' 'explained_class_index parameter has to be ' 'an integer or None.') explained_class_warning = ('The explained_class_index parameter is not ' 'None and will be ignored since the global ' 'model is not probabilistic.') features_index_error = ('The following column indices are invalid for ' 'the input dataset: {}.') features_type_error = ('The explained_feature_indices parameter must be ' 'a Python list or None.') fidelity_radius_type_error = ('The fidelity_radius_percentage must be an ' 'integer between 1 and 100.') fidelity_radius_value_error = ('The fidelity_radius_percentage must be an ' 'integer between 1 and 100.') samples_number_value_error = ('The samples_number must be a positive ' 'integer.') samples_number_type_error = 'The samples_number must be an integer.' with pytest.raises(IncorrectShapeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY[0], None, None, None, None, None, None, None, None) assert str(exin.value) == incorrect_shape_dataset with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(np.array([[None]]), None, None, None, None, None, None, None, None) assert str(exin.value) == type_error_dataset with pytest.raises(IncorrectShapeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY, None, None, None, None, None, None, None) assert str(exin.value) == incorrect_shape_datarow with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity( NUMERICAL_NP_ARRAY, np.array(['0']), None, None, None, None, None, None, None) # yapf: disable assert str(exin.value) == incorrect_dtype_data with pytest.raises(IncorrectShapeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0][0:2], None, None, None, None, None, None, None) assert str(exin.value) == datarow_features_error def predict(x): return np.ones(x.shape[0]) def predict_invalid(x_1, x_2): pass # pragma: no cover def predict_proba(x): return np.ones((x.shape[0], 3)) def predict_proba_invalid(): pass # pragma: no cover with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], None, None, None, None, None, None, None) assert str(exin.value) == global_model_type with pytest.raises(IncompatibleModelError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_invalid, None, None, None, None, None, None) assert str(exin.value) == global_model_incompatible with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, None, None, None, None, None, None) assert str(exin.value) == local_model_type with pytest.raises(IncompatibleModelError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_proba, predict_proba_invalid, None, None, None, None, None) assert str(exin.value) == local_model_incompatible def invalid_metric(x): pass # pragma: no cover def metric(x_1, x_2): pass # pragma: no cover with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, None, None, None, None, None) assert str(exin.value) == metric_type_error with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, invalid_metric, None, None, None, None) assert str(exin.value) == metric_param_error with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_proba, predict, metric, '1', None, None, None) assert str(exin.value) == explained_class_type_error with pytest.raises(ValueError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_proba, predict, metric, -1, None, None, None) assert str(exin.value) == explained_class_value_error with pytest.raises(ValueError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_proba, predict, metric, 3, None, None, None) assert str(exin.value) == explained_class_value_error # with pytest.warns(UserWarning) as w: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, metric, 3, None, 1, 1) assert len(w) == 1 assert str(w[0].message) == explained_class_warning with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_proba, predict, metric, None, np.array([10, 11]), None, None) assert str(exin.value) == features_type_error with pytest.raises(IndexError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, metric, None, [10, 11], None, None) assert str(exin.value) == features_index_error.format(np.array([10, 11])) with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_proba, predict, metric, 1, [1, 2], 'a', None) assert str(exin.value) == fidelity_radius_type_error with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_proba, predict, metric, 1, [1, 2], None, None) assert str(exin.value) == fidelity_radius_type_error with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict_proba, predict, metric, 1, [1, 2], 55.0, None) assert str(exin.value) == fidelity_radius_type_error # with pytest.raises(ValueError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, metric, None, [1, 2], 0, None) assert str(exin.value) == fidelity_radius_value_error with pytest.raises(ValueError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, metric, None, [1, 2], 101, None) assert str(exin.value) == fidelity_radius_value_error with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, metric, None, None, 100, None) assert str(exin.value) == samples_number_type_error with pytest.raises(TypeError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, metric, None, None, 100, 55.0) assert str(exin.value) == samples_number_type_error # with pytest.raises(ValueError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, metric, None, None, 100, 0) assert str(exin.value) == samples_number_value_error with pytest.raises(ValueError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predict, predict, metric, None, None, 100, -42) assert str(exin.value) == samples_number_value_error clf = fumm.KNN(k=3) clf.fit(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET) with pytest.raises(ValueError) as exin: futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], clf.predict_proba, predict, metric, 10, None, 10, 1) assert str(exin.value) == explained_class_value_error # All OK assert futs._validate_input_local_fidelity(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], clf.predict_proba, predict, metric, 1, [0, 1], 10, 1)
def test_local_fidelity_score(): """ Tests the ``local_fidelity_score`` function. This function tests the :func:`fatf.utils.transparency.surrogate_evaluation.local_fidelity_score` function. """ accuracy_warning = ('Some of the given labels are not present in either ' 'of the input arrays: {}.') fatf.setup_random_seed() def accuracy(global_predictions, local_predictions): global_predictions[global_predictions >= 0.5] = 1 global_predictions[global_predictions < 0.5] = 0 local_predictions[local_predictions >= 0.5] = 1 local_predictions[local_predictions < 0.5] = 0 confusion_matrix = fumt.get_confusion_matrix(global_predictions, local_predictions, labels=[0, 1]) accuracy = fummet.accuracy(confusion_matrix) return accuracy def accuracy_prob(global_predictions, local_predictions, global_proba=True, local_proba=True): if global_proba: global_predictions = np.argmax(global_predictions, axis=1) if local_proba: local_predictions = np.argmax(local_predictions, axis=1) confusion_matrix = fumt.get_confusion_matrix(global_predictions, local_predictions, labels=[0, 1, 2]) accuracy = fummet.accuracy(confusion_matrix) return accuracy def accuracy_proba_np(global_predictions, local_predictions): return accuracy_prob(global_predictions, local_predictions, global_proba=False, local_proba=True) def accuracy_proba_nn(global_predictions, local_predictions): return accuracy_prob(global_predictions, local_predictions, global_proba=False, local_proba=False) def reg_dist(global_predictions, local_predictions): return (global_predictions - local_predictions).sum() predictor = fumm.KNN(k=3) predictor.fit(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET) regressor = fumm.KNN(k=3, mode='regressor') regressor.fit(NUMERICAL_NP_ARRAY_LOCAL, NUMERICAL_NP_ARRAY_LOCAL_TARGET) regressor_23 = fumm.KNN(k=3, mode='regressor') regressor_23.fit(NUMERICAL_NP_ARRAY_LOCAL[:, [2, 3]], NUMERICAL_NP_ARRAY_LOCAL_TARGET) # Structured array predictor_struct = fumm.KNN(k=3) predictor_struct.fit(NUMERICAL_STRUCT_ARRAY, NUMERICAL_NP_ARRAY_TARGET) # regressor_struct_cd = fumm.KNN(k=3, mode='regressor') regressor_struct_cd.fit(NUMERICAL_STRUCT_ARRAY_LOCAL[['c', 'd']], NUMERICAL_NP_ARRAY_LOCAL_TARGET) # Global: probabilistic... # ...local: regressor comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predictor.predict_proba, regressor.predict, accuracy, 2) assert np.isclose(comparison, 0.26) # ...local: classifier comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predictor.predict_proba, predictor.predict, accuracy, 2) assert np.isclose(comparison, 1.0) # ...local: probabilistic with pytest.warns(UserWarning) as w: comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predictor.predict_proba, predictor.predict_proba, accuracy_prob) assert len(w) == 1 assert str(w[0].message) == accuracy_warning.format(set([1])) assert np.isclose(comparison, 1.0) # Global: classifier... # ...local: probabilistic with pytest.warns(UserWarning) as w: comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predictor.predict, predictor.predict_proba, accuracy_proba_np) assert len(w) == 1 assert str(w[0].message) == accuracy_warning.format(set([1])) assert np.isclose(comparison, 1.0) # ...local: classifier with pytest.warns(UserWarning) as w: comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], predictor.predict, predictor.predict, accuracy_proba_nn) assert len(w) == 1 assert str(w[0].message) == accuracy_warning.format(set([1])) assert np.isclose(comparison, 1.0) # Global: regressor... # ...local: regressor comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY[0], regressor.predict, regressor_23.predict, reg_dist, explained_feature_indices=[2, 3]) assert np.isclose(comparison, 0) # Structured array # Global: probabilistic... # ...local: regressor comparison = futs.local_fidelity_score( NUMERICAL_STRUCT_ARRAY, NUMERICAL_STRUCT_ARRAY[0], predictor_struct.predict_proba, regressor_struct_cd.predict, accuracy, 0, explained_feature_indices=['c', 'd']) assert np.isclose(comparison, 0.94)