Пример #1
0
def get_adult(shuffle=True, seed=None):
    np.random.seed(seed)
    adult = fetch_adult()
    X = adult["data"]
    y = adult["target"]

    X[:, [9,-1]] = X[:, [-1,9]] # puts sex into the last column

    # Erase nan values
    y = y[~np.isnan(X).any(axis=1)]
    X = X[~np.isnan(X).any(axis=1)]

    # shuffle data
    if shuffle:
        permute = np.random.permutation(len(y))
        y = y[permute]
        X = X[permute]

    # transform to binary and set proper type
    X = X.astype('float')
    for i, lab in enumerate(y):
        if y[i] == "<=50K":
            y[i] = 0.
        else:
            y[i] = 1.
    y = y.astype('float')
    return X, y
Пример #2
0
def get_adult():
    adult = fetch_adult()
    X = adult["data"]
    y = adult["target"]

    X[:, [9, -1]] = X[:, [-1, 9]]  # puts sex into the last column

    # Erase nan values
    y = y[~np.isnan(X).any(axis=1)]
    X = X[~np.isnan(X).any(axis=1)]

    # transform to binary and set proper type
    X = X.astype('float')
    for i, lab in enumerate(y):
        if y[i] == "<=50K":
            y[i] = 0.
        else:
            y[i] = 1.
    y = y.astype('float')
    n, _ = X.shape
    return X, y
Пример #3
0
# Copyright (c) Microsoft Corporation and Fairlearn contributors.
# Licensed under the MIT License.
"""Produce plot of selection rates for the quickstart guide."""
from bokeh.plotting import figure, show
from fairlearn.metrics import MetricFrame, selection_rate
from fairlearn.datasets import fetch_adult

data = fetch_adult(as_frame=True)
X = data.data
y_true = (data.target == '>50K') * 1
sex = X['sex']

selection_rates = MetricFrame(selection_rate,
                              y_true,
                              y_true,
                              sensitive_features=sex)

xs = list(selection_rates.by_group.index)
ys = [selection_rates.by_group[s] for s in xs]

p = figure(x_range=xs,
           plot_height=480,
           plot_width=640,
           title="Fraction earning over $50,0000",
           toolbar_location=None,
           tools="")

p.vbar(x=xs, top=ys, width=0.9)

p.y_range.start = 0
p.xgrid.grid_line_color = None