def get_adult(shuffle=True, seed=None): np.random.seed(seed) adult = fetch_adult() X = adult["data"] y = adult["target"] X[:, [9,-1]] = X[:, [-1,9]] # puts sex into the last column # Erase nan values y = y[~np.isnan(X).any(axis=1)] X = X[~np.isnan(X).any(axis=1)] # shuffle data if shuffle: permute = np.random.permutation(len(y)) y = y[permute] X = X[permute] # transform to binary and set proper type X = X.astype('float') for i, lab in enumerate(y): if y[i] == "<=50K": y[i] = 0. else: y[i] = 1. y = y.astype('float') return X, y
def get_adult(): adult = fetch_adult() X = adult["data"] y = adult["target"] X[:, [9, -1]] = X[:, [-1, 9]] # puts sex into the last column # Erase nan values y = y[~np.isnan(X).any(axis=1)] X = X[~np.isnan(X).any(axis=1)] # transform to binary and set proper type X = X.astype('float') for i, lab in enumerate(y): if y[i] == "<=50K": y[i] = 0. else: y[i] = 1. y = y.astype('float') n, _ = X.shape return X, y
# Copyright (c) Microsoft Corporation and Fairlearn contributors. # Licensed under the MIT License. """Produce plot of selection rates for the quickstart guide.""" from bokeh.plotting import figure, show from fairlearn.metrics import MetricFrame, selection_rate from fairlearn.datasets import fetch_adult data = fetch_adult(as_frame=True) X = data.data y_true = (data.target == '>50K') * 1 sex = X['sex'] selection_rates = MetricFrame(selection_rate, y_true, y_true, sensitive_features=sex) xs = list(selection_rates.by_group.index) ys = [selection_rates.by_group[s] for s in xs] p = figure(x_range=xs, plot_height=480, plot_width=640, title="Fraction earning over $50,0000", toolbar_location=None, tools="") p.vbar(x=xs, top=ys, width=0.9) p.y_range.start = 0 p.xgrid.grid_line_color = None