Exemplo n.º 1
0
def test_linear_dependence_pd():
    X = np.array([
        [
            0,
            0,
            1,
            1,
        ],
        [
            1,
            1,
            2,
            2,
        ],
        [
            0.1,
            0.2,
            1.2,
            1.1,
        ],
    ]).T

    df = pd.DataFrame(X, columns=["a", "b", "c"])

    X_tfm = (CorrelationRemover(
        sensitive_feature_ids=["a"]).fit(df).transform(df))
    assert X_tfm.shape[1] == 2
    assert np.allclose(X_tfm[:, 0], 1.5)
Exemplo n.º 2
0
def test_linear_dependence():
    X = np.array([
        [
            0,
            0,
            1,
            1,
        ],
        [
            1,
            1,
            2,
            2,
        ],
        [
            0.1,
            0.2,
            1.2,
            1.1,
        ],
    ]).T

    X_tfm = CorrelationRemover(sensitive_feature_ids=[0]).fit(X).transform(X)
    assert X_tfm.shape[1] == 2
    assert np.allclose(X_tfm[:, 0], 1.5)
Exemplo n.º 3
0
def test_estimator_checks(test_fn):
    test_fn(CorrelationRemover.__name__,
            CorrelationRemover(sensitive_feature_ids=[]))
    test_fn(CorrelationRemover.__name__,
            CorrelationRemover(sensitive_feature_ids=[0]))
Exemplo n.º 4
0
# Copyright (c) Microsoft Corporation and Fairlearn contributors.
# Licensed under the MIT License.

import numpy as np
import pandas as pd
from sklearn.utils.estimator_checks import parametrize_with_checks

from fairlearn.preprocessing import CorrelationRemover


@parametrize_with_checks([
    CorrelationRemover(sensitive_feature_ids=[]),
    CorrelationRemover(sensitive_feature_ids=[0]),
])
def test_sklearn_compatible_estimator(estimator, check):
    check(estimator)


def test_linear_dependence():
    X = np.array([
        [
            0,
            0,
            1,
            1,
        ],
        [
            1,
            1,
            2,
            2,
    "race_Asian", 'race_Caucasian', 'race_Hispanic', 'race_Other',
    'race_Unknown', 'had_inpatient_days_False', 'medicare_False'
],
                   axis=1)

X_raw = X_raw[[
    'time_in_hospital', 'had_inpatient_days_True', 'medicare_True',
    'race_AfricanAmerican'
]]

# %%
# We are now going to fit the CorrelationRemover to the data,
# and transform it. The transformed array will be placed back
# in a Pandas DataFrame, for plotting purposes.

cr = CorrelationRemover(sensitive_feature_ids=["race_AfricanAmerican"])
X_cr = cr.fit_transform(X_raw)
X_cr = pd.DataFrame(
    X_cr,
    columns=['time_in_hospital', 'had_inpatient_days_True', 'medicare_True'])
X_cr["race_AfricanAmerican"] = X_raw["race_AfricanAmerican"]

cr_alpha = CorrelationRemover(sensitive_feature_ids=['race_AfricanAmerican'],
                              alpha=0.5)
X_cr_alpha = cr_alpha.fit_transform(X_raw)
X_cr_alpha = pd.DataFrame(
    X_cr_alpha,
    columns=['time_in_hospital', 'had_inpatient_days_True', 'medicare_True'])
X_cr_alpha["race_AfricanAmerican"] = X_raw["race_AfricanAmerican"]

# %%