Пример #1
0
    def test_argument_types(self, transformX, transformY, transformA):
        # This is an expanded-out version of one of the smoke tests
        expgrad = ExponentiatedGradient(self.learner,
                                        constraints=DemographicParity(),
                                        eps=0.1)
        expgrad.fit(transformX(self.X),
                    transformY(self.y),
                    sensitive_features=transformA(self.A))

        res = expgrad._expgrad_result._as_dict()
        Q = res["best_classifier"]
        res["n_classifiers"] = len(res["classifiers"])

        disp = DemographicParity()
        disp.load_data(self.X, self.y, sensitive_features=self.A)
        error = ErrorRate()
        error.load_data(self.X, self.y, sensitive_features=self.A)
        res["disp"] = disp.gamma(Q).max()
        res["error"] = error.gamma(Q)[0]

        assert res["best_gap"] == pytest.approx(0.0000, abs=self._PRECISION)
        assert res["last_t"] == 5
        assert res["best_t"] == 5
        assert res["disp"] == pytest.approx(0.1, abs=self._PRECISION)
        assert res["error"] == pytest.approx(0.25, abs=self._PRECISION)
        assert res["n_oracle_calls"] == 32
        assert res["n_classifiers"] == 3
    def test_argument_types(self, transformX, transformY, transformA,
                            A_two_dim):
        # This is an expanded-out version of one of the smoke tests
        X, y, A = _get_data(A_two_dim)
        merged_A = _map_into_single_column(A)

        expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(),
                                        constraints=DemographicParity(),
                                        eps=0.1)
        expgrad.fit(transformX(X),
                    transformY(y),
                    sensitive_features=transformA(A))

        Q = expgrad._best_classifier
        n_classifiers = len(expgrad._classifiers)

        disparity_moment = DemographicParity()
        disparity_moment.load_data(X, y, sensitive_features=merged_A)
        error = ErrorRate()
        error.load_data(X, y, sensitive_features=merged_A)
        disparity = disparity_moment.gamma(Q).max()
        error = error.gamma(Q)[0]

        assert expgrad._best_gap == pytest.approx(0.0000, abs=_PRECISION)
        assert expgrad._last_t == 5
        assert expgrad._best_t == 5
        assert disparity == pytest.approx(0.1, abs=_PRECISION)
        assert error == pytest.approx(0.25, abs=_PRECISION)
        assert expgrad._n_oracle_calls == 32
        assert n_classifiers == 3
Пример #3
0
    def test_argument_types(self, transformX, transformY, transformA,
                            A_two_dim):
        # This is an expanded-out version of one of the smoke tests
        X, y, A = _get_data(A_two_dim)
        merged_A = _map_into_single_column(A)

        expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(),
                                        constraints=DemographicParity(),
                                        eps=0.1)
        expgrad.fit(transformX(X),
                    transformY(y),
                    sensitive_features=transformA(A))

        res = expgrad._expgrad_result._as_dict()
        Q = res["best_classifier"]
        res["n_classifiers"] = len(res["classifiers"])

        disp = DemographicParity()
        disp.load_data(X, y, sensitive_features=merged_A)
        error = ErrorRate()
        error.load_data(X, y, sensitive_features=merged_A)
        res["disp"] = disp.gamma(Q).max()
        res["error"] = error.gamma(Q)[0]

        assert res["best_gap"] == pytest.approx(0.0000, abs=_PRECISION)
        assert res["last_t"] == 5
        assert res["best_t"] == 5
        assert res["disp"] == pytest.approx(0.1, abs=_PRECISION)
        assert res["error"] == pytest.approx(0.25, abs=_PRECISION)
        assert res["n_oracle_calls"] == 32
        assert res["n_classifiers"] == 3
Пример #4
0
def gridSearch(model, X_train, Y_train, A_train, grid_size):
    """
    Generates a sequence of relabellings and reweightings, and trains a predictor for each. 
    Only applicable for binary feature.
    
    Parameters:
    x_train: input data for training model
    y_train: list of ground truths
    model: the unmitigated algorthmic model
    
    Returns a dataframe of the different predictors and its accuracy scores and disparity scores.
    
    """
    sweep = GridSearch(model,
                       constraints=DemographicParity(),
                       grid_size=grid_size)

    # we extract the full set of predictors from the `GridSearch` object
    sweep.fit(X_train, Y_train, sensitive_features=A_train)

    predictors = sweep._predictors
    """
    Remove the predictors which are dominated in the error-disparity space by others from the sweep 
    (note that the disparity will only be calculated for the protected attribute; 
    other potentially protected attributes will not be mitigated)
   
    In general, one might not want to do this, since there may be other considerations beyond the strict 
    optimisation of error and disparity (of the given protected attribute).
    """
    errors, disparities = [], []
    for m in predictors:
        classifier = lambda X: m.predict(X)

        error = ErrorRate()
        error.load_data(X_train,
                        pd.Series(Y_train),
                        sensitive_features=A_train)
        disparity = DemographicParity()
        disparity.load_data(X_train,
                            pd.Series(Y_train),
                            sensitive_features=A_train)

        errors.append(error.gamma(classifier)[0])
        disparities.append(disparity.gamma(classifier).max())

    all_results = pd.DataFrame({
        "predictor": predictors,
        "error": errors,
        "disparity": disparities
    })

    non_dominated = []
    for row in all_results.itertuples():
        errors_for_lower_or_eq_disparity = all_results["error"][
            all_results["disparity"] <= row.disparity]
        if row.error <= errors_for_lower_or_eq_disparity.min():
            non_dominated.append(row.predictor)

    return non_dominated
    def test_argument_types_ratio_bound(self, transformX, transformY,
                                        transformA, A_two_dim):
        # This is an expanded-out version of one of the smoke tests
        X, y, A = _get_data(A_two_dim)
        merged_A = _map_into_single_column(A)

        transformed_X = transformX(X)
        transformed_y = transformY(y)
        transformed_A = transformA(A)
        eps = 0.1
        ratio = 1.0

        expgrad = ExponentiatedGradient(
            LeastSquaresBinaryClassifierLearner(),
            constraints=DemographicParity(ratio_bound_slack=eps,
                                          ratio_bound=ratio),
            eps=eps,
        )
        expgrad.fit(transformed_X,
                    transformed_y,
                    sensitive_features=transformed_A)

        def Q(X):
            return expgrad._pmf_predict(X)[:, 1]

        n_predictors = len(expgrad.predictors_)

        disparity_moment = DemographicParity(ratio_bound_slack=eps,
                                             ratio_bound=ratio)
        disparity_moment.load_data(X, y, sensitive_features=merged_A)
        error = ErrorRate()
        error.load_data(X, y, sensitive_features=merged_A)
        disparity = disparity_moment.gamma(Q).max()
        disp = disparity_moment.gamma(Q)
        disp_eps = disparity_moment.gamma(Q) - disparity_moment.bound()
        error = error.gamma(Q)[0]

        assert expgrad.best_gap_ == pytest.approx(0.0000, abs=_PRECISION)
        assert expgrad.last_iter_ == 5
        assert expgrad.best_iter_ == 5
        assert disparity == pytest.approx(0.1, abs=_PRECISION)
        assert np.all(np.isclose(disp - eps, disp_eps))
        assert error == pytest.approx(0.25, abs=_PRECISION)
        assert expgrad.n_oracle_calls_ == 32
        assert n_predictors == 3
    def __remove_predictors_dominated_error_disparity_by_sweep(self, predictors, X_train, Y_train, A_train):
        errors, disparities = [], []
        for m in predictors:
            def classifier(X): return m.predict(X)

            error = ErrorRate()
            error.load_data(X_train, pd.Series(Y_train),
                            sensitive_features=A_train.diabetic)
            disparity = DemographicParity()
            disparity.load_data(X_train, pd.Series(
                Y_train), sensitive_features=A_train.diabetic)

            errors.append(error.gamma(classifier)[0])
            disparities.append(disparity.gamma(classifier).max())

        return pd.DataFrame({"predictor": predictors, "error": errors, "disparity": disparities})
Пример #7
0
errors, disparities = [], []
for m in predictors:

    def classifier(X):
        return m.predict(X)

    error = ErrorRate()
    error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train)
    disparity = DemographicParity()
    disparity.load_data(X_train,
                        pd.Series(Y_train),
                        sensitive_features=A_train)

    errors.append(error.gamma(classifier)[0])
    disparities.append(disparity.gamma(classifier).max())

all_results = pd.DataFrame({
    "predictor": predictors,
    "error": errors,
    "disparity": disparities
})

non_dominated = []
for row in all_results.itertuples():
    errors_for_lower_or_eq_disparity = all_results["error"][
        all_results["disparity"] <= row.disparity]
    if row.error <= errors_for_lower_or_eq_disparity.min():
        non_dominated.append(row.predictor)

# %%