Exemplo n.º 1
0
    def explain(self, request: Dict) -> Dict:
        inputs = request["instances"]
        predictions = np.array(request["outputs"])

        dataframe_predicted = pd.DataFrame(inputs, columns=self.feature_names)
        dataframe_predicted[self.label_names[0]] = predictions

        dataset_predicted = BinaryLabelDataset(
            favorable_label=self.favorable_label,
            unfavorable_label=self.unfavorable_label,
            df=dataframe_predicted,
            label_names=self.label_names,
            protected_attribute_names=['age'])

        metrics = BinaryLabelDatasetMetric(
            dataset_predicted,
            unprivileged_groups=self.unprivileged_groups,
            privileged_groups=self.privileged_groups)

        return {
            "predictions": predictions.tolist(),
            "metrics": {
                "base_rate":
                metrics.base_rate(),
                "consistency":
                metrics.consistency().tolist(),
                "disparate_impact":
                metrics.disparate_impact(),
                "num_instances":
                metrics.num_instances(),
                "num_negatives":
                metrics.num_negatives(),
                "num_positives":
                metrics.num_positives(),
                "statistical_parity_difference":
                metrics.statistical_parity_difference(),
            }
        }
Exemplo n.º 2
0
def main() -> None:

    # read from inventor
    filepath = ait_input.get_inventory_path('Data')

    # prepare column names as given by german.doc
    column_names = [
        'status', 'month', 'credit_history', 'purpose', 'credit_amount',
        'savings', 'employment', 'investment_as_income_percentage',
        'personal_status', 'other_debtors', 'residence_since', 'property',
        'age', 'installment_plans', 'housing', 'number_of_credits',
        'skill_level', 'people_liable_for', 'telephone', 'foreign_worker',
        'credit'
    ]

    # load into a dataframe
    df = data_loading(filepath=filepath,
                      column_names=column_names,
                      na_values=None)

    # prepare for mappings
    mappings = {
        'label_maps': [{
            1.0: 'Good Credit',
            2.0: 'Bad Credit'
        }],
        'protected_attribute_maps': [{
            1.0: 'Male',
            0.0: 'Female'
        }, {
            1.0: 'Old',
            0.0: 'Young'
        }]
    }

    # prepare for categorical features
    categorical_features = [
        'status', 'credit_history', 'purpose', 'savings', 'employment',
        'other_debtors', 'property', 'installment_plans', 'housing',
        'skill_level', 'telephone', 'foreign_worker'
    ]

    # load param
    protected_attribute = ait_input.get_method_param_value(
        'protected_attribute')
    privileged_classes = ait_input.get_method_param_value('privileged_classes')

    # input check
    ait_input_check(protected_attribute, privileged_classes)

    # prepare for structure from dataframe and edit data features setting
    dataset = StandardDataset(
        df=df,
        label_name='credit',
        favorable_classes=[1],
        protected_attribute_names=[protected_attribute],
        privileged_classes=[lambda x: x >= privileged_classes],
        instance_weights_name=None,
        categorical_features=categorical_features,
        features_to_keep=None,
        features_to_drop=['personal_status', 'sex'],
        na_values=None,
        custom_preprocessing=preprocessing,
        metadata=mappings)

    # set two variables for the privileged (1) and unprivileged (0) values for the age attribute.
    privileged_groups = [{protected_attribute: 1}]
    unprivileged_groups = [{protected_attribute: 0}]

    # compute fairness metric on original training dataset
    metric_fairness = BinaryLabelDatasetMetric(
        dataset,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)

    print("Original training dataset: German Credit Data")
    print(
        "Difference in mean outcomes between unprivileged and privileged groups = %f"
        % metric_fairness.mean_difference())
    print("unprivileged groups = %f" %
          metric_fairness.base_rate(privileged=False))
    print("privileged groups = %f" %
          metric_fairness.base_rate(privileged=True))

    # resource observed_predicted_plot
    save_metric_fairness_plot(metric_fairness, protected_attribute)

    # measures
    measure_mean_difference(metric_fairness.mean_difference())

    # ait.log
    move_log()
        unprivileged_groups=unpriv_group,
        privileged_groups=priv_group,
    )
    tpr_difference = metric_test_data.true_positive_rate_difference()
    tpr_priviledged = metric_test_data.true_positive_rate(True)
    tpr_unpriviledged = metric_test_data.true_positive_rate(False)
    return tpr_difference, tpr_priviledged, tpr_unpriviledged


compute_statistical_parity(test_data.convert_to_dataframe()[0], unpriv_group,
                           priv_group)
# %% [markdown]
# To be clear, because we're looking at the original label distribution this is the base rate difference between the two groups

# %%
metric_test_data.base_rate(False)  # Base rate of the unprivileged group

# %%
metric_test_data.base_rate(True)  # Base rate of the privileged group

# %% [markdown]
# To explore the data, it can also help to convert it to a dataframe.
# Note that we get the same numbers as the reported base rates above,
# but because when calculating base rates the favorable label is taken (which is actually 0),  it's 1-...

# %% [markdown]
# **Report**
#
# Report basic statistics in your report, such as the size of the training and test set.
#
# Now let's explore the *training* data further.