def create_iris_pandas(): X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data() X_train = pd.DataFrame(X_train, columns=feature_names) X_test = pd.DataFrame(X_test, columns=feature_names) return X_train, X_test, y_train, y_test, feature_names
def test_weird_predict_proba_function(self): X_train, X_test, y_train, y_test, _, _ = create_iris_data() # A weird model that modifies the input dataset by # adding back the target column class WeirdModelPredictProbaWrapper(): def __init__(self, model): self.model = model def predict(self, test_data_pandas): return self.model.predict(test_data_pandas) def predict_proba(self, test_data_pandas): if TARGET not in test_data_pandas.columns: test_data_pandas[TARGET] = 0 return self.model.predict_proba( test_data_pandas.drop(columns=TARGET)) model = create_lightgbm_classifier(X_train, y_train) model = WeirdModelPredictProbaWrapper(model) X_train = X_train.copy() X_test = X_test.copy() X_train[TARGET] = y_train X_test[TARGET] = y_test message = ('Calling model predict_proba function modifies ' 'input dataset features. Please check if ' 'predict function is defined correctly.') with pytest.raises(UserConfigValidationException, match=message): RAIInsights(model=model, train=X_train, test=X_test, target_column=TARGET, task_type='classification')
def test_rai_insights_iris(self, manager_type): X_train, X_test, y_train, y_test, feature_names, classes = \ create_iris_data() models = create_models_classification(X_train, y_train) X_train[LABELS] = y_train X_test[LABELS] = y_test manager_args = { ManagerParams.TREATMENT_FEATURES: [feature_names[0]], ManagerParams.DESIRED_CLASS: 0, ManagerParams.FEATURE_IMPORTANCE: True } for model in models: run_rai_insights(model, X_train, X_test, LABELS, None, manager_type, manager_args, classes)
def test_validate_bad_target_name(self): X_train, X_test, y_train, y_test, _, _ = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train[TARGET] = y_train X_test[TARGET] = y_test with pytest.raises(UserConfigValidationException) as ucve: RAIInsights(model=model, train=X_train, test=X_test, target_column='bad_target', task_type='classification') assert "Target name bad_target not present in train/test data" in \ str(ucve.value)
def test_validate_unsupported_task_type(self): X_train, X_test, y_train, y_test, _, _ = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train[TARGET] = y_train X_test[TARGET] = y_test message = ("Unsupported task type 'regre'. " "Should be one of \\['classification', 'regression'\\]") with pytest.raises(UserConfigValidationException, match=message): RAIInsights(model=model, train=X_train, test=X_test, target_column=TARGET, task_type='regre')
def test_validate_categorical_features_not_having_train_features(self): X_train, X_test, y_train, y_test, _, _ = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train[TARGET] = y_train X_test[TARGET] = y_test message = ("Feature names in categorical_features " "do not exist in train data: \\['not_a_feature'\\]") with pytest.raises(UserConfigValidationException, match=message): RAIInsights(model=model, train=X_train, test=X_test, target_column=TARGET, task_type='classification', categorical_features=['not_a_feature'])
def test_validate_categorical_features_having_target(self): X_train, X_test, y_train, y_test, _, _ = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train[TARGET] = y_train X_test[TARGET] = y_test with pytest.raises(UserConfigValidationException) as ucve: RAIInsights(model=model, train=X_train, test=X_test, target_column=TARGET, task_type='classification', categorical_features=[TARGET]) assert 'Found target name target in categorical feature list' in \ str(ucve.value)
def test_validate_test_data_size(self): X_train, X_test, y_train, y_test, _, _ = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train[TARGET] = y_train X_test[TARGET] = y_test with pytest.raises(UserConfigValidationException) as ucve: RAIInsights(model=model, train=X_train, test=X_test, target_column='bad_target', task_type='classification', maximum_rows_for_test=len(y_test) - 1) assert "The test data has 31 rows, but limit is set to 30 rows" in \ str(ucve.value) assert "Please resample the test data or " +\ "adjust maximum_rows_for_test" in \ str(ucve.value)
def test_desired_class_opposite_multi_classification(self): X_train, X_test, y_train, y_test, feature_names, classes = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train[TARGET] = y_train X_test[TARGET] = y_test rai_insights = RAIInsights(model=model, train=X_train, test=X_test, target_column=TARGET, task_type='classification') with pytest.raises( UserConfigValidationException, match='The desired_class attribute should not be \'opposite\'' ' It should be the class value for multiclass' ' classification scenario.'): rai_insights.counterfactual.add(total_CFs=10, method='random', desired_class='opposite')
def test_feature_importance_with_less_counterfactuals(self): X_train, X_test, y_train, y_test, feature_names, classes = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train[TARGET] = y_train X_test[TARGET] = y_test rai_insights = RAIInsights(model=model, train=X_train, test=X_test, target_column=TARGET, task_type='classification') with pytest.raises( UserConfigValidationException, match="A total_CFs value of at least 10 is required to " "use counterfactual feature importances. " "Either increase total_CFs to at least 10 or " "set feature_importance to False."): rai_insights.counterfactual.add(total_CFs=5, method='random', desired_class=2)