Пример #1
0
    def test_invalid_outcome_name(self, data_type):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        dataset = iris.frame

        if data_type == DataTypeCombinations.Incorrect:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(dataframe=dataset,
                             continuous_features=feature_names,
                             outcome_name=1)

            assert "should provide the name of outcome feature as a string" in str(
                ve)
        elif data_type == DataTypeCombinations.AsNone:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(dataframe=dataset,
                             continuous_features=feature_names,
                             outcome_name=None)

            assert "should provide the name of outcome feature as a string" in str(
                ve)
        else:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(dataframe=dataset,
                             continuous_features=feature_names)

            assert "should provide the name of outcome feature" in str(ve)
Пример #2
0
 def _get_exp(self,
              backend,
              method="random",
              is_public_data_interface=True):
     if is_public_data_interface:
         dataset = helpers.load_adult_income_dataset()
         d = dice_ml.Data(dataframe=dataset,
                          continuous_features=['age', 'hours_per_week'],
                          outcome_name='income')
     else:
         d = dice_ml.Data(features={
             'age': [17, 90],
             'workclass':
             ['Government', 'Other/Unknown', 'Private', 'Self-Employed'],
             'education': [
                 'Assoc', 'Bachelors', 'Doctorate', 'HS-grad', 'Masters',
                 'Prof-school', 'School', 'Some-college'
             ],
             'marital_status':
             ['Divorced', 'Married', 'Separated', 'Single', 'Widowed'],
             'occupation': [
                 'Blue-Collar', 'Other/Unknown', 'Professional', 'Sales',
                 'Service', 'White-Collar'
             ],
             'race': ['Other', 'White'],
             'gender': ['Female', 'Male'],
             'hours_per_week': [1, 99]
         },
                          outcome_name='income')
     ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
     m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
     exp = dice_ml.Dice(d, m, method=method)
     return exp
Пример #3
0
    def test_invalid_continuous_features(self, data_type):
        iris = load_iris(as_frame=True)
        dataset = iris.frame

        if data_type == DataTypeCombinations.Incorrect:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(dataframe=dataset,
                             continuous_features=np.array(iris.feature_names),
                             outcome_name='target')

            assert "should provide the name(s) of continuous features in the data as a list" in str(
                ve)
        elif data_type == DataTypeCombinations.AsNone:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(dataframe=dataset,
                             continuous_features=None,
                             outcome_name='target')

            assert "should provide the name(s) of continuous features in the data as a list" in str(
                ve)
        else:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(dataframe=dataset, outcome_name='target')

            assert 'continuous_features should be provided' in str(ve)
Пример #4
0
    def test_invalid_dataframe(self, data_type):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        dataset = iris.frame

        if data_type == DataTypeCombinations.Incorrect:
            with pytest.raises(ValueError, match="should provide a pandas dataframe"):
                dice_ml.Data(dataframe=dataset.values, continuous_features=feature_names,
                             outcome_name='target')
        else:
            with pytest.raises(ValueError, match="should provide a pandas dataframe"):
                dice_ml.Data(dataframe=None, continuous_features=feature_names,
                             outcome_name='target')
Пример #5
0
    def test_numeric_categories(self, desired_range, method,
                                create_boston_data):
        x_train, x_test, y_train, y_test, feature_names = \
            create_boston_data

        rfc = RandomForestRegressor(n_estimators=10,
                                    max_depth=4,
                                    random_state=777)
        model = rfc.fit(x_train, y_train)

        dataset_train = x_train.copy()
        dataset_train['Outcome'] = y_train
        feature_names.remove('CHAS')

        d = dice_ml.Data(dataframe=dataset_train,
                         continuous_features=feature_names,
                         outcome_name='Outcome')
        m = dice_ml.Model(model=model,
                          backend='sklearn',
                          model_type='regressor')
        exp = dice_ml.Dice(d, m, method=method)

        cf_explanation = exp.generate_counterfactuals(
            query_instances=x_test.iloc[0:1],
            total_CFs=10,
            desired_range=desired_range)

        assert cf_explanation is not None
Пример #6
0
    def on_button_clicked(b):
        with button_output:
            print("Generating explanations may take a few minutes...")
            print()

            #SETTING UP
            d = dice_ml.Data(dataframe=dataname, continuous_features=cont_feat, outcome_name=outcome_name)

            backend = 'TF'+tf.__version__[0] # TF2
            m = dice_ml.Model(model=modelname, backend=backend)

            exp = dice_ml.Dice(d, m)

            #Generating CFs
            query_instance = dict(zip(feature_names, explore.queryvaluestouse))

            if f.weightdropdown.value=='Use Default Weights':
                dice_exp = exp.generate_counterfactuals(query_instance,total_CFs=num_exp.value, desired_class="opposite",
                                                        features_to_vary=f.useusing,
                                                        proximity_weight=prox.value, diversity_weight=div.value)
            elif f.weightdropdown.value=='Choose Your Own Weights':
                #putting weights into dict
                weightstouse=dict(zip(f.useusing, f.weightvaluestouse))
                dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=num_exp.value, desired_class="opposite",
                                                        features_to_vary=f.useusing, feature_weights=weightstouse,
                                                        proximity_weight=prox.value, diversity_weight=div.value)

            explore.dice_exp=dice_exp
Пример #7
0
    def __init__(self, mlmodel: MLModel, hyperparams: Optional[Dict] = None) -> None:

        supported_backends = ["tensorflow", "pytorch"]
        if mlmodel.backend not in supported_backends:
            raise ValueError(
                f"{mlmodel.backend} is not in supported backends {supported_backends}"
            )

        super().__init__(mlmodel)
        self._continuous = mlmodel.data.continuous
        self._categorical = mlmodel.data.categorical
        self._target = mlmodel.data.target
        self._model = mlmodel

        checked_hyperparams = merge_default_parameters(
            hyperparams, self._DEFAULT_HYPERPARAMS
        )
        # Prepare data for dice data structure
        self._dice_data = dice_ml.Data(
            dataframe=mlmodel.data.df,
            continuous_features=self._continuous,
            outcome_name=self._target,
        )

        self._dice_model = dice_ml.Model(model=mlmodel, backend="sklearn")

        self._dice = dice_ml.Dice(self._dice_data, self._dice_model, method="random")
        self._num = checked_hyperparams["num"]
        self._desired_class = checked_hyperparams["desired_class"]
        self._post_hoc_sparsity_param = checked_hyperparams["posthoc_sparsity_param"]
Пример #8
0
 def test_user_data_corruption(self):
     dataset = helpers.load_adult_income_dataset()
     dataset_copy = dataset.copy()
     dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'],
                  outcome_name='income', permitted_range={'age': [45, 60]},
                  continuous_features_precision={'hours_per_week': 2})
     pd.testing.assert_frame_equal(dataset, dataset_copy)
Пример #9
0
def data_object():
    dataset = helpers.load_adult_income_dataset()
    return dice_ml.Data(dataframe=dataset,
                        continuous_features=['age', 'hours_per_week'],
                        outcome_name='income',
                        permitted_range={'age': [45, 60]},
                        continuous_features_precision={'hours_per_week': 2})
Пример #10
0
 def _get_exp(self, backend, method="random"):
     dataset = helpers.load_adult_income_dataset()
     d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')
     ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
     m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
     exp = dice_ml.Dice(d, m, method=method)
     return exp
Пример #11
0
 def test_min_max_equal(self):
     dataset = helpers.load_min_max_equal_dataset()
     dice_data = dice_ml.Data(dataframe=dataset,
                              continuous_features=['Numerical'],
                              outcome_name='Outcome')
     assert all(
         dice_data.normalize_data(dice_data.data_df)['Numerical'] == 0)
Пример #12
0
def regression_exp_object(method="random"):
    backend = 'sklearn'
    dataset = helpers.load_custom_testing_dataset_regression()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_regression()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend, model_type='regressor')
    exp = dice_ml.Dice(d, m, method=method)
    return exp
Пример #13
0
def random_binary_classification_exp_object():
    backend = 'sklearn'
    dataset = helpers.load_custom_testing_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m, method='random')
    return exp
Пример #14
0
def pyt_exp_object():
    backend = 'PYT'
    dataset = helpers.load_adult_income_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')
    ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
    m = dice_ml.Model(model_path= ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m)
    return exp
Пример #15
0
def public_data_object():
    """
    Returns a public data object for the adult income dataset
    """
    dataset = helpers.load_adult_income_dataset()
    return dice_ml.Data(dataframe=dataset,
                        continuous_features=['age', 'hours_per_week'],
                        outcome_name='income')
Пример #16
0
def binary_classification_exp_object_out_of_order(method="random"):
    backend = 'sklearn'
    dataset = helpers.load_outcome_not_last_column_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_binary()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m, method=method)
    return exp
Пример #17
0
    def test_not_found_outcome_name(self):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        dataset = iris.frame

        with pytest.raises(UserConfigValidationException,
                           match="outcome_name invalid not found in"):
            dice_ml.Data(dataframe=dataset,
                         continuous_features=feature_names,
                         outcome_name='invalid')
Пример #18
0
    def test_unseen_permitted_range(self):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        permitted_range = {'age': [45, 60]}
        dataset = iris.frame

        with pytest.raises(
                UserConfigValidationException,
                match="permitted_range contains some feature names which are not part of columns in dataframe"):
            dice_ml.Data(dataframe=dataset, continuous_features=feature_names,
                         outcome_name='target', permitted_range=permitted_range)
Пример #19
0
    def test_invalid_dataframe(self):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        dataset = iris.frame

        with pytest.raises(ValueError) as ve:
            dice_ml.Data(dataframe=dataset.values,
                         continuous_features=feature_names,
                         outcome_name='target')

        assert "should provide a pandas dataframe" in str(ve)
Пример #20
0
    def test_unseen_continuous_feature_names(self):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        feature_names.append("new feature")
        dataset = iris.frame

        with pytest.raises(
                UserConfigValidationException,
                match="continuous_features contains some feature names which are not part of columns in dataframe"):
            dice_ml.Data(dataframe=dataset, continuous_features=feature_names,
                         outcome_name='target')
Пример #21
0
    def test_invalid_outcome_name(self):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        dataset = iris.frame

        with pytest.raises(ValueError) as ve:
            dice_ml.Data(dataframe=dataset,
                         continuous_features=feature_names,
                         outcome_name=1)

        assert "should provide the name of outcome feature as a string" in str(
            ve)
Пример #22
0
    def test_invalid_continuous_features(self):
        iris = load_iris(as_frame=True)
        feature_names = np.array(iris.feature_names)
        dataset = iris.frame

        with pytest.raises(ValueError) as ve:
            dice_ml.Data(dataframe=dataset,
                         continuous_features=feature_names,
                         outcome_name='target')

        assert "should provide the name(s) of continuous features in the data as a list" in str(
            ve)
Пример #23
0
    def test_unseen_continuous_features_precision(self):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        continuous_features_precision = {'hours_per_week': 2}
        dataset = iris.frame

        with pytest.raises(
                UserConfigValidationException,
                match="continuous_features_precision contains some feature names which"
                      " are not part of columns in dataframe"):
            dice_ml.Data(dataframe=dataset, continuous_features=feature_names,
                         outcome_name='target',
                         continuous_features_precision=continuous_features_precision)
def data_object():
    features_dict = OrderedDict(
        [('age', [17, 90]),
         ('workclass', ['Government', 'Other/Unknown', 'Private', 'Self-Employed']),
         ('education', ['Assoc', 'Bachelors', 'Doctorate', 'HS-grad', 'Masters', 'Prof-school', 'School', 'Some-college']),
         ('marital_status', ['Divorced', 'Married', 'Separated', 'Single', 'Widowed']),
         ('occupation', ['Blue-Collar', 'Other/Unknown', 'Professional', 'Sales', 'Service', 'White-Collar']),
         ('race', ['Other', 'White']),
         ('gender', ['Female', 'Male']),
         ('hours_per_week', [1, 99])]
    )  # providing an OrderedDict to make it work for Python<3.6
    return dice_ml.Data(features=features_dict, outcome_name='income',
                        type_and_precision={'hours_per_week': ['float', 2]}, mad={'age': 10})
Пример #25
0
    def test_invalid_dataframe(self, data_type):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        dataset = iris.frame

        if data_type == DataTypeCombinations.Incorrect:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(dataframe=dataset.values,
                             continuous_features=feature_names,
                             outcome_name='target')
            assert "should provide a pandas dataframe" in str(ve)
        elif data_type == DataTypeCombinations.AsNone:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(dataframe=None,
                             continuous_features=feature_names,
                             outcome_name='target')
            assert "should provide a pandas dataframe" in str(ve)
        else:
            with pytest.raises(ValueError) as ve:
                dice_ml.Data(continuous_features=feature_names,
                             outcome_name='target')
            assert "dataframe not found in params" in str(ve)
Пример #26
0
def private_data_object():
    """
    Returns a private data object containing meta information about the adult income dataset
    """
    features_dict = OrderedDict([('age', [17, 90]),
                            ('workclass', ['Government', 'Other/Unknown', 'Private', 'Self-Employed']),
                            ('education', ['Assoc', 'Bachelors', 'Doctorate', 'HS-grad', 'Masters', 'Prof-school', 'School', 'Some-college']),
                            ('marital_status', ['Divorced', 'Married', 'Separated', 'Single', 'Widowed']),
                            ('occupation', ['Blue-Collar', 'Other/Unknown', 'Professional', 'Sales', 'Service', 'White-Collar']),
                            ('race', ['Other', 'White']),
                            ('gender', ['Female', 'Male']),
                            ('hours_per_week', [1, 99])]) # providing an OrderedDict to make it work for Python<=3.6
    return dice_ml.Data(features=features_dict, outcome_name='income')
Пример #27
0
    def test_check_features_to_vary(self, features_to_vary):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        dataset = iris.frame

        dice_data = dice_ml.Data(dataframe=dataset, continuous_features=feature_names,
                                 outcome_name='target')

        if features_to_vary is not None and features_to_vary != 'all':
            with pytest.raises(
                    UserConfigValidationException,
                    match="Got features {" + "'not_a_feature'" + "} which are not present in training data"):
                dice_data.check_features_to_vary(features_to_vary=features_to_vary)
        else:
            dice_data.check_features_to_vary(features_to_vary=features_to_vary)
Пример #28
0
    def _create_diceml_explainer(self, method, continuous_features):

        dice_data = dice_ml.Data(dataframe=self._train,
                                 continuous_features=continuous_features,
                                 outcome_name=self._target_column)
        model_type = CounterfactualConstants.CLASSIFIER \
            if self._task_type == ModelTask.CLASSIFICATION else \
            CounterfactualConstants.REGRESSOR
        dice_model = dice_ml.Model(model=self._model,
                                   backend=CounterfactualConstants.SKLEARN,
                                   model_type=model_type)

        dice_explainer = Dice(dice_data, dice_model, method=method)

        return dice_explainer
Пример #29
0
    def test_check_permitted_range_with_unknown_feature(self, permitted_range):
        iris = load_iris(as_frame=True)
        feature_names = iris.feature_names
        dataset = iris.frame

        dice_data = dice_ml.Data(dataframe=dataset, continuous_features=feature_names,
                                 outcome_name='target')

        if permitted_range is not None:
            with pytest.raises(
                    UserConfigValidationException,
                    match="Got features {" + "'not_a_feature'" + "} which are not present in training data"):
                dice_data.check_permitted_range(permitted_range=permitted_range)
        else:
            dice_data.check_permitted_range(permitted_range=permitted_range)
Пример #30
0
    def test_check_permitted_range_with_unknown_categorical_value(self):
        iris = load_iris(as_frame=True)
        permitted_range = {'new_feature': ['unknown_category']}
        feature_names = iris.feature_names
        dataset = iris.frame
        dataset['new_feature'] = np.repeat(['known_category'], dataset.shape[0])

        dice_data = dice_ml.Data(dataframe=dataset, continuous_features=feature_names,
                                 outcome_name='target')

        with pytest.raises(
                UserConfigValidationException) as ucve:
            dice_data.check_permitted_range(permitted_range=permitted_range)

        assert 'The category {0} does not occur in the training data for feature {1}. Allowed categories are {2}'.format(
            'unknown_category', 'new_feature', ['known_category']) in str(ucve)