def test_eval_data_having_new_categories(self):
        train_data = pd.DataFrame(data=[[1, 2, 0], [2, 3, 1], [3, 3, 0]],
                                  columns=['c1', 'c2', TARGET])
        test_data = pd.DataFrame(data=[[1, 1, 0]],
                                 columns=['c1', 'c2', TARGET])

        X_train = train_data.drop([TARGET], axis=1)
        y_train = train_data[TARGET]
        model = create_lightgbm_classifier(X_train, y_train)

        rai_insights = RAIInsights(model=model,
                                   train=train_data,
                                   test=test_data,
                                   target_column=TARGET,
                                   task_type='classification',
                                   categorical_features=['c2'])

        message = ("Counterfactual example generation requires "
                   "that every category of "
                   "categorical features present in the test data be "
                   "also present in the train data. "
                   "Categories missing from train data: "
                   "{'c2': \\[1\\]}")
        with pytest.raises(UserConfigValidationException, match=message):
            rai_insights.counterfactual.add(total_CFs=10,
                                            method='random',
                                            desired_class='opposite')
    def test_weird_predict_proba_function(self):
        X_train, X_test, y_train, y_test, _, _ = create_iris_data()

        # A weird model that modifies the input dataset by
        # adding back the target column
        class WeirdModelPredictProbaWrapper():
            def __init__(self, model):
                self.model = model

            def predict(self, test_data_pandas):
                return self.model.predict(test_data_pandas)

            def predict_proba(self, test_data_pandas):
                if TARGET not in test_data_pandas.columns:
                    test_data_pandas[TARGET] = 0
                return self.model.predict_proba(
                    test_data_pandas.drop(columns=TARGET))

        model = create_lightgbm_classifier(X_train, y_train)
        model = WeirdModelPredictProbaWrapper(model)
        X_train = X_train.copy()
        X_test = X_test.copy()
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ('Calling model predict_proba function modifies '
                   'input dataset features. Please check if '
                   'predict function is defined correctly.')
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification')
    def test_validate_bad_target_name(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column='bad_target',
                        task_type='classification')
        assert "Target name bad_target not present in train/test data" in \
            str(ucve.value)
    def test_validate_unsupported_task_type(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ("Unsupported task type 'regre'. "
                   "Should be one of \\['classification', 'regression'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='regre')
    def test_classes_passes(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai = RAIInsights(model=model,
                          train=X_train,
                          test=X_test,
                          target_column=TARGET,
                          task_type='classification')
        # validate classes are always sorted
        classes = rai._classes
        assert np.all(classes[:-1] <= classes[1:])
    def test_mismatch_train_test_features(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test['bad_target'] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification')
        assert 'The features in train and test data do not match' in \
            str(ucve.value)
    def test_classes_exceptions(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        classes=[0, 1, 2])
        assert 'The train labels and distinct values in ' + \
            'target (train data) do not match' in str(ucve.value)

        y_train[0] = 2
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        classes=[0, 1])
        assert 'The train labels and distinct values in target ' + \
            '(train data) do not match' in str(ucve.value)

        y_train[0] = 0
        y_test[0] = 2
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        classes=[0, 1])

        assert 'The train labels and distinct values in target ' + \
            '(test data) do not match' in str(ucve.value)
    def test_validate_categorical_features_having_target(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        categorical_features=[TARGET])
        assert 'Found target name target in categorical feature list' in \
            str(ucve.value)
    def test_unsupported_train_test_types(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train.values,
                        test=X_test.values,
                        target_column=TARGET,
                        task_type='classification')

        assert "Unsupported data type for either train or test. " + \
            "Expecting pandas DataFrame for train and test." in str(ucve.value)
    def test_model_analysis_incorrect_task_type(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        err_msg = ('The regression model'
                   'provided has a predict_proba function. '
                   'Please check the task_type.')
        with pytest.raises(UserConfigValidationException, match=err_msg):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='regression')
    def test_validate_categorical_features_not_having_train_features(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ("Feature names in categorical_features "
                   "do not exist in train data: \\['not_a_feature'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        categorical_features=['not_a_feature'])
    def test_treatment_features_list_not_having_train_features(self):
        X_train, y_train, X_test, y_test, _ = \
            create_binary_classification_dataset()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='classification')

        message = ("Feature names in treatment_features "
                   "do not exist in train data: \\['not_a_feature'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            rai_insights.causal.add(treatment_features=['not_a_feature'])
    def test_feature_metadata(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test
        from responsibleai.feature_metadata import FeatureMetadata
        feature_metadata = FeatureMetadata(identity_feature_name='id')

        err_msg = ('The given identity feature name id is not present'
                   ' in user features.')
        with pytest.raises(UserConfigValidationException, match=err_msg):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        feature_metadata=feature_metadata)
    def test_permitted_range_not_having_train_features(self):
        X_train, y_train, X_test, y_test, _ = \
            create_binary_classification_dataset()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='classification')

        message = ("Feature names in permitted_range do "
                   "not exist in train data: \\['not_a_feature'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            rai_insights.counterfactual.add(
                total_CFs=10, permitted_range={'not_a_feature': [20, 40]})
    def test_desired_class_not_set(self):
        X_train, y_train, X_test, y_test, _ = \
            create_binary_classification_dataset()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='classification')
        with pytest.raises(
                UserConfigValidationException,
                match='The desired_class attribute should be '
                'either \'opposite\' for binary classification or '
                'the class value for multi-classification scenarios.'):
            rai_insights.counterfactual.add(total_CFs=10, method='random')
    def test_validate_test_data_size(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column='bad_target',
                        task_type='classification',
                        maximum_rows_for_test=len(y_test) - 1)
        assert "The test data has 31 rows, but limit is set to 30 rows" in \
            str(ucve.value)
        assert "Please resample the test data or " +\
            "adjust maximum_rows_for_test" in \
            str(ucve.value)
    def test_desired_class_opposite_multi_classification(self):
        X_train, X_test, y_train, y_test, feature_names, classes = \
            create_iris_data()
        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='classification')

        with pytest.raises(
                UserConfigValidationException,
                match='The desired_class attribute should not be \'opposite\''
                ' It should be the class value for multiclass'
                ' classification scenario.'):
            rai_insights.counterfactual.add(total_CFs=10,
                                            method='random',
                                            desired_class='opposite')
    def test_feature_importance_with_less_counterfactuals(self):
        X_train, X_test, y_train, y_test, feature_names, classes = \
            create_iris_data()
        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='classification')

        with pytest.raises(
                UserConfigValidationException,
                match="A total_CFs value of at least 10 is required to "
                "use counterfactual feature importances. "
                "Either increase total_CFs to at least 10 or "
                "set feature_importance to False."):
            rai_insights.counterfactual.add(total_CFs=5,
                                            method='random',
                                            desired_class=2)
    def test_validate_serializer(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:

            class LoadOnlySerializer:
                def __init__(self, logger=None):
                    self._logger = logger

                def load(self):
                    pass

            serializer = LoadOnlySerializer()
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        serializer=serializer)
        assert 'The serializer does not implement save()' in str(ucve.value)

        with pytest.raises(UserConfigValidationException) as ucve:

            class SaveOnlySerializer:
                def __init__(self, logger=None):
                    self._logger = logger

                def save(self):
                    pass

            serializer = SaveOnlySerializer()
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        serializer=serializer)
        assert 'The serializer does not implement load()' in str(ucve.value)

        with pytest.raises(UserConfigValidationException) as ucve:

            class Serializer:
                def __init__(self, logger=None):
                    self._logger = logger

                def save(self):
                    pass

                def load(self):
                    pass

            serializer = Serializer(logger=logging.getLogger('some logger'))

            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        serializer=serializer)
        assert 'The serializer should be serializable via pickle' in \
            str(ucve.value)