def persist_model(clf, dataset_name, flipped=False): out.create_dir('./pickled_models') out.create_dir('./pickled_models/{}'.format(dataset_name)) joblib.dump( clf, "./pickled_models/{}/{}.pkl".format( dataset_name, clf.filename() if not flipped else clf.filename() + '_flipped'))
def check_data_file(dataset, base_url, fname): #files = os.listdir(".") # get the current directory listing files_dir = os.path.dirname(os.path.realpath( __file__)) + '/data/' + dataset # get path of this file output.create_dir(files_dir) files = os.listdir(files_dir) # get the current directory listing print("Looking for file '%s' in the current directory..." % fname) full_file = "{}/{}".format(files_dir, fname) if fname not in files: print("'{}' not found! Downloading ...".format(fname)) url = base_url + urllib.parse.quote(fname) response = urllib.request.urlopen(url) content_charset = response.info().get_content_charset() if content_charset is not None: # string file data = response.read().decode( response.info().get_content_charset(), 'ignore') write_spec = "w" else: # binary file data = response.read() write_spec = "wb" with open(full_file, write_spec) as fileOut: fileOut.write(data) print("'%s' download and saved locally.." % fname) else: print("File found in current directory..") return full_file
def __init__(self, dataset, val_split=0.): self.hyperparam_store_loc = 'hyperparams/prob_class_params' output.create_dir('hyperparams') self.ds_name = dataset self.val_split = val_split self.X = None
def plot_one_var_vs_other_together(res_dir, sens_dict, nosens_dict, x_label, y_label, format='png'): plt.rcParams['font.size'] = 24 plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['ps.fonttype'] = 42 plt.rcParams['axes.labelsize'] = 22 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 15 plt.rcParams['axes.linewidth'] = 3 plt.rcParams['xtick.labelsize'] = 16 plt.rcParams['ytick.labelsize'] = 16 plt.rcParams['legend.fontsize'] = 10 plt.rcParams['figure.titlesize'] = 28 plt.rcParams['lines.linewidth'] = 3.0 plots_dir = res_dir + "/disparity_plots" out.create_dir(plots_dir) filename = '{}_vs_{}_together'.format('_'.join(x_label.split()), '_'.join(y_label.split())) figpath = plots_dir + '/' + filename + '.' + format fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111) idx = 0 for model, vals in sens_dict.items(): ax.plot(vals[0], vals[1], color=colors[idx], label='{} (Women)'.format(model.shortfilename())) ax.plot(nosens_dict[model][0], nosens_dict[model][1], linestyle=':', color=colors[idx], label='{} (Men)'.format(model.shortfilename())) idx += 1 ax.set_xlabel(x_label + ' (' + r'$\delta$' + ')') ax.set_ylabel(y_label) # box = ax.get_position() # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) # ax.legend(loc='lower right', bbox_to_anchor=(1, 0.5)) ax.legend(loc='lower right') plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') # for i, ax_obj in enumerate([ax]): # fig_legend = plt.figure(figsize=(3, 3)) # handles, labels = ax_obj.get_legend_handles_labels() # fig_legend.legend(handles, labels, 'center', ncol=1) # fig_legend.savefig(plots_dir + '/' + filename + "_legend." + format, format=format, bbox_inches='tight') # fig_legend.savefig(plots_dir + '/' + filename + "_legend.pdf", format='pdf', bbox_inches='tight') return "{}\n\n{}".format( get_wiki_link(figpath), get_wiki_link(plots_dir + '/' + filename + "_legend." + format))
def find_neighbourhoods(self, X, Y, tau_sens, tau_nosens, model=None): out.create_dir('./params') k_means_params = aeio.load_params('./params/KMeans', '{}_{}_{}'.format(model.filename(), tau_sens, tau_nosens) if model is not None else '{}_{}'.format(tau_sens, tau_nosens)) if k_means_params is None: k_means_params = self.find_best_params(X, Y, kmeans_param_grid, adjusted_mutual_info_score, KMeans) aeio.save_params('./params/KMeans', '{}_{}_{}'.format(model.filename(), tau_sens, tau_nosens), k_means_params) alg = KMeans(**k_means_params, n_jobs=-1, random_state=42) alg.fit(X) return alg.cluster_centers_, alg.labels_, k_means_params
def __init__(self): self.dataset, self.models_other_than_rules = exp.base_exp(return_vars=True) self.prediction_task = exp.dataset_info[self.dataset]['prediction_task'] self.res_dir = 'results/{}'.format(self.dataset) out.create_dir(self.res_dir) self.res_file_path = self.res_dir + '/res_utilities_thresholds.txt' self.seg_file_path = self.res_dir + '/res_segregation.txt' self.wiki_parent_path = "Actionable-Explanations/Simple-Explanations-{}".format(self.dataset) self.sens_group_desc = exp.dataset_info[self.dataset]['sens_f'] self.cost_groups = {cf.ONE_GROUP_IND: "all", 0: self.sens_group_desc[0], 1: self.sens_group_desc[1]} self.segregation_indices = [si.Atkinson, si.Centralization, si.Clustering]
def plot_dtree(res_dir, clf, feature_info): plots_dir = res_dir + "/disparity_plots" out.create_dir(plots_dir) filename = "{}_viz".format(clf.filename()) figpath = plots_dir + '/' + filename + '.png' dot_data = StringIO() export_graphviz(clf.clf, out_file=dot_data, filled=True, rounded=True, special_characters=True, feature_names=get_feature_names(feature_info)) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) graph.write_png(figpath) return get_wiki_link(figpath)
def __init__(self, subsample_size_test=None, subsample_size_train=None): self.dataset, self.models_other_than_rules = exp.base_exp( return_vars=True) self.res_dir = 'results/{}'.format(self.dataset) out.create_dir(self.res_dir) self.res_dir = self.res_dir if not exp.FAIRNESS_CONSTRAINTS else '{}/FC'.format( self.res_dir) out.create_dir(self.res_dir) self.res_file_path = self.res_dir + '/res_lti.txt' self.wiki_parent_path = "Actionable-Explanations/Simple-Explanations-{}".format( self.dataset) self.subsample_size_test = subsample_size_test self.subsample_size_train = subsample_size_train self.sens_group_desc = exp.dataset_info[self.dataset]['sens_f'] self.prediction_task = exp.dataset_info[ self.dataset]['prediction_task'] self.cost_groups = { cf.ONE_GROUP_IND: "all", 0: self.sens_group_desc[0], 1: self.sens_group_desc[1] }
def plot_covar_matrix(res_dir, X, feature_info, format='png'): plt.rcParams['font.size'] = 12 plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['ps.fonttype'] = 42 plt.rcParams['axes.labelsize'] = 16 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 10 plt.rcParams['axes.linewidth'] = 1 plt.rcParams['xtick.labelsize'] = 10 plt.rcParams['ytick.labelsize'] = 10 plt.rcParams['legend.fontsize'] = 16 plt.rcParams['figure.titlesize'] = 15 plt.rcParams['lines.linewidth'] = 1.0 covar_mat = np.cov(X, rowvar=False) assert covar_mat.shape[0] == X.shape[1] and covar_mat.shape[1] == X.shape[1] plots_dir = res_dir + "/disparity_plots" out.create_dir(plots_dir) filename = 'training_set_feature_covar_mat' figpath = plots_dir + '/' + filename + '.' + format plt.figure(figsize=(20, 18)) import seaborn as sns ax = sns.heatmap(covar_mat, annot=True, xticklabels=get_feature_names(feature_info), yticklabels=get_feature_names(feature_info)) ax.xaxis.tick_top() # x axis on top ax.xaxis.set_label_position('top') ax.tick_params(length=0) plt.xticks(rotation=1) plt.title('Covariance Matrix') plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') return get_wiki_link(figpath)
def get_segregation_plots_new(res_dir, outer_seg_index_mapping, fc, format='png'): """ This function was written very very close to the deadline. """ plt.rcParams['font.size'] = 24 plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['ps.fonttype'] = 42 plt.rcParams['axes.labelsize'] = 22 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 15 plt.rcParams['axes.linewidth'] = 3 plt.rcParams['xtick.labelsize'] = 12 plt.rcParams['ytick.labelsize'] = 12 plt.rcParams['legend.fontsize'] = 10 plt.rcParams['figure.titlesize'] = 28 plt.rcParams['lines.linewidth'] = 3.0 plots_dir = res_dir + "/segregation_plots" out.create_dir(plots_dir) x_title = r'$\tau$' print("Initially passed dict: {}".format(outer_seg_index_mapping)) for tau_nosens, seg_index_mapping in outer_seg_index_mapping.items(): if tau_nosens > 0: continue count = 3 if fc: for index_type, mapping in seg_index_mapping.items(): # plot_title = str(index_type) plot_title = '' fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111) ax.get_yaxis().get_major_formatter().set_useOffset(False) x_vals, y_vals, y_vals_old = [], [], [] for model, tau_mapping in mapping.items(): if '0.00' in tau_mapping: x_vals.append(model.tau) y_vals.append(tau_mapping.pop('0.00')) for inner_model, inner_tau_mapping in mapping.items(): if 'Original Population' in inner_tau_mapping and model.tau == inner_model.tau: y_vals_old.append( inner_tau_mapping['Original Population']) print(x_vals, y_vals_old, y_vals) x_vals, y_vals, y_vals_old = list( zip(*(sorted(zip(x_vals, y_vals, y_vals_old), key=operator.itemgetter(0))))) ax.plot(x_vals, y_vals, color=colors[count], marker='o', label='Impacted Population') ax.plot(x_vals, y_vals_old, color=colors[count + 1], marker='o', label='Initial Population') ax.set_title(plot_title) ax.set_ylabel(str(index_type)) ax.set_xlabel(x_title) ax.set_xticks(x_vals) ax.set_xticklabels(list(map(str, x_vals))) # box = ax.get_position() # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) if 'atkinson' in index_type.shortname().lower(): ax.legend(loc='lower left') elif 'aci' in index_type.shortname().lower(): ax.legend(loc='upper right') else: ax.legend(loc='lower right') filename = "segregation_{}_fc".format(index_type.shortname()) figpath = plots_dir + '/' + filename + '.' + format plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') # for i, ax_obj in enumerate([ax]): # fig_legend = plt.figure(figsize=(4, 3)) # handles, labels = ax_obj.get_legend_handles_labels() # fig_legend.legend(handles, labels, 'center', ncol=1) # fig_legend.savefig(plots_dir + '/' + filename + "_legend." + format, format=format, bbox_inches='tight') # fig_legend.savefig(plots_dir + '/' + filename + "_legend.pdf", format='pdf', bbox_inches='tight') plt.close(fig) yield get_wiki_link(figpath) else: x_labels, y_old, y_new = [], [], [] for index_type, mapping in seg_index_mapping.items(): x_labels, y_old, y_new = [], [], [] # plot_title = str(index_type) for model, tau_mapping in mapping.items(): if '0.00' in tau_mapping: print("Outer: {}".format(model.filename())) # if index_type.shortname() == 'centralization': # x_labels.append('{}\n{}'.format(model.shortfilename(), 'Cent.')) # elif index_type.shortname() == 'atkinson': # x_labels.append('{}\n{}'.format(model.shortfilename(), 'Atkinson')) # else: # x_labels.append('{}\n{}'.format(model.shortfilename(), index_type.shortname())) x_labels.append(model.shortfilename()) y_new.append(tau_mapping['0.00']) for inner_model, inner_tau_mapping in mapping.items(): if 'Original Population' in inner_tau_mapping and model.filename( ) == inner_model.filename(): print("Added") y_old.append( inner_tau_mapping['Original Population']) break fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111) ax.get_yaxis().get_major_formatter().set_useOffset(False) ind = np.arange(len(y_old)) width = 0.25 min_y = min(min(y_new), min(y_old)) max_y = max(max(y_new), max(y_old)) range_y = max_y - min_y ax.set_ylim(min_y - range_y, max_y + range_y) ax.bar(ind + width, y_new, width=width, color=colors[0], label='Impacted Population') ax.bar(ind, y_old, width=width, color=colors[1], label='Initial Population') ax.set_xticks(ind + width / 2) ax.set_ylabel(str(index_type)) ax.set_xticklabels(x_labels) ax.legend(loc='upper right') filename = "segregation_{}".format(index_type.shortname()) figpath = plots_dir + '/' + filename + '.' + format plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') plt.close(fig) yield get_wiki_link(figpath)
def get_segregation_plots(res_dir, outer_seg_index_mapping, format='png'): plt.rcParams['font.size'] = 24 plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['ps.fonttype'] = 42 plt.rcParams['axes.labelsize'] = 22 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 15 plt.rcParams['axes.linewidth'] = 3 plt.rcParams['xtick.labelsize'] = 10 plt.rcParams['ytick.labelsize'] = 12 plt.rcParams['legend.fontsize'] = 18 plt.rcParams['figure.titlesize'] = 28 plt.rcParams['lines.linewidth'] = 3.0 plots_dir = res_dir + "/segregation_plots" out.create_dir(plots_dir) x_title, y_title = r'$C_s$', r'$C$_~s' threed_plot_mapping = { } # mapping from { SSI : {model: ([<x_vals>], [<y_vals>], [<z_vals>])} } plot_strings_mapping = {} # mapping from { SSI : [<str1>, <str2>....] } strings_to_write = [] print("Initially passed dict: {}".format(outer_seg_index_mapping)) for tau_nosens, seg_index_mapping in outer_seg_index_mapping.items(): plot_strings_mapping[tau_nosens] = {} for index_type, mapping in seg_index_mapping.items(): if index_type.shortname() not in threed_plot_mapping: threed_plot_mapping[index_type.shortname()] = {} # plot_title = str(index_type) plot_title = '' fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111) ax.get_yaxis().get_major_formatter().set_useOffset(False) # ax.set_yscale('log') count, min_y, max_y = 0, INF, -INF for model, tau_mapping in mapping.items(): y_vals_old = tau_mapping.pop('Original Population') x_labels, y_vals = list(zip(*tau_mapping.items())) x_labels = list(map(float, x_labels)) x_labels, y_vals = list( zip(*(sorted(zip(x_labels, y_vals), key=operator.itemgetter(0))))) y_vals_old = [y_vals_old] * len( x_labels) if y_vals_old is not None else None x_vals = np.arange(1, len(x_labels) + 1, 1) ax.plot(x_vals, y_vals, color=colors[count], marker='o', label=model.filename()) if y_vals_old is not None: ax.plot(x_vals, y_vals_old, color=colors[-1], linestyle='dashed', marker='o', label='Original Population') if model in threed_plot_mapping[index_type.shortname()]: threed_plot_mapping[ index_type.shortname()][model][0] += list( map(float, x_labels)) threed_plot_mapping[index_type.shortname( )][model][1] += [tau_nosens] * len(x_labels) threed_plot_mapping[ index_type.shortname()][model][2] += list( y_vals) # X, Y, Z threed_plot_mapping[index_type.shortname( )]['Original Population'][0] += list(map(float, x_labels)) threed_plot_mapping[index_type.shortname( )]['Original Population'][1] += [tau_nosens ] * len(x_labels) threed_plot_mapping[index_type.shortname( )]['Original Population'][2] += list(y_vals_old) else: threed_plot_mapping[index_type.shortname()][model] = [ list(map(float, x_labels)), [tau_nosens] * len(x_labels), list(y_vals) ] # X, Y, Z threed_plot_mapping[ index_type.shortname()]['Original Population'] = [ list(map(float, x_labels)), [tau_nosens] * len(x_labels), list(y_vals_old) ] # X, Y, Z count += 1 ax.set_title(plot_title) ax.set_ylabel(str(index_type)) ax.set_xlabel(x_title) ax.set_xticks(x_vals) ax.set_xticklabels(x_labels) # box = ax.get_position() # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) # ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) filename = "segregation_{}_{}".format(index_type.shortname(), tau_nosens) figpath = plots_dir + '/' + filename + '.' + format plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') for i, ax_obj in enumerate([ax]): fig_legend = plt.figure(figsize=(4, 3)) handles, labels = ax_obj.get_legend_handles_labels() fig_legend.legend(handles, labels, 'center', ncol=1) fig_legend.savefig(plots_dir + '/' + filename + "_legend." + format, format=format, bbox_inches='tight') fig_legend.savefig(plots_dir + '/' + filename + "_legend.pdf", format='pdf', bbox_inches='tight') plt.close(fig) if index_type.shortname() in plot_strings_mapping[tau_nosens]: plot_strings_mapping[tau_nosens][ index_type.shortname()].append("{}\n\n{}".format( get_wiki_link(figpath), get_wiki_link(plots_dir + '/' + filename + "_legend." + format))) else: plot_strings_mapping[tau_nosens][index_type.shortname()] = [ "{}\n\n{}".format( get_wiki_link(figpath), get_wiki_link(plots_dir + '/' + filename + "_legend." + format)) ] print("3D plot mapping: {}".format(threed_plot_mapping)) print("Plot string mapping: {}".format(plot_strings_mapping)) for seg_index, model_mapping in threed_plot_mapping.items(): strings_to_write.append("== {} ==".format(str(seg_index))) for tau_nosens, seg_index_mapping in plot_strings_mapping.items(): strings_to_write.append( "=== Tau for non-sens: {} ===".format(tau_nosens)) strings_to_write += seg_index_mapping[seg_index] # plot_title = str(seg_index) plot_title = '' filename = "segregation_{}_3d".format(seg_index) figpath = plots_dir + '/' + filename + '.' + format fig = plt.figure() ax = fig.add_subplot(111, projection='3d') count = 0 for model, x_y_z in model_mapping.items(): x_meshgrid, y_meshgrid = np.meshgrid(x_y_z[0], x_y_z[1]) _, z_meshgrid = np.meshgrid(x_y_z[0], x_y_z[2]) ax.plot_wireframe( x_meshgrid, y_meshgrid, z_meshgrid, label=model if isinstance(model, str) else model.filename(), linestyle='dashed' if isinstance(model, str) else 'solid', color=colors[count]) count += 1 ax.set_title(plot_title) ax.set_xlabel(x_title) ax.set_ylabel(y_title) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') plt.close(fig) strings_to_write.append(get_wiki_link(figpath)) return strings_to_write
def get_pdf_plots(res_dir, X, sens_group, taus_for_sens, feature_info, data_for_pdf, tau_nosens, separate_legend=False, y_title='', combine_all_plots=False, plot_title='', filename='', format='png'): assert len(feature_info) == X.shape[1] plt.rcParams['font.size'] = 24 plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['ps.fonttype'] = 42 plt.rcParams['axes.labelsize'] = 22 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 15 plt.rcParams['axes.linewidth'] = 3 plt.rcParams['xtick.labelsize'] = 10 plt.rcParams['ytick.labelsize'] = 12 plt.rcParams['legend.fontsize'] = 18 plt.rcParams['figure.titlesize'] = 28 plt.rcParams['lines.linewidth'] = 3.0 plots_dir = res_dir + "/pdf_before_after_plots" out.create_dir(plots_dir) X_sens, X_nosens = X[sens_group], X[~sens_group] for i in range(len(feature_info)): for model, tau_sens_to_population in data_for_pdf.items(): plot_title = "{}, feature name: {} ({}), C_~s = {}".format( model.filename(), feature_info[i][0], feature_info[i][1], tau_nosens) filename = "pdf_{}_{}_{}".format(model.filename(), tau_nosens, feature_info[i][0]) figpath = plots_dir + '/' + filename + '.' + format figpath_full = plots_dir + '/' + filename + '_complete_population.' + format fig1, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True, figsize=(6, 6)) fig3 = plt.figure(figsize=(4, 4)) ax3 = fig3.add_subplot(111) fig1.suptitle(plot_title) ax1.set_title("Sens Group") ax2.set_title("Non-Sens Group") x_vals = sorted(list(set(X[:, i]))) y_vals_sens = [ np.count_nonzero(X_sens[:, i] == x_val) / len(X_sens) for x_val in x_vals ] y_vals_nosens = [ np.count_nonzero(X_nosens[:, i] == x_val) / len(X_nosens) for x_val in x_vals ] y_vals_all = [ np.count_nonzero(X[:, i] == x_val) / len(X) for x_val in x_vals ] ax1.plot(x_vals, y_vals_sens, color=colors[0], marker='', linestyle='dashed') ax2.plot(x_vals, y_vals_nosens, color=colors[0], marker='', linestyle='dashed', label='Original Population') ax3.plot(x_vals, y_vals_all, color=colors[0], marker='', linestyle='dashed', label='Original Population') for tau_sens, population in tau_sens_to_population.items(): idx = taus_for_sens.index(tau_sens) population_sens, population_nosens = population[ sens_group], population[~sens_group] y_vals_sens = [ np.count_nonzero(population_sens[:, i] == x_val) / len(population_sens) for x_val in x_vals ] y_vals_nosens = [ np.count_nonzero(population_nosens[:, i] == x_val) / len(population_nosens) for x_val in x_vals ] y_vals_all = [ np.count_nonzero(population[:, i] == x_val) / len(population) for x_val in x_vals ] ax1.plot(x_vals, y_vals_sens, color=colors[idx + 1], marker='') ax2.plot(x_vals, y_vals_nosens, color=colors[idx + 1], marker='', label='{}'.format(tau_sens)) ax3.plot(x_vals, y_vals_all, color=colors[idx + 1], marker='', label='{}'.format(tau_sens)) if not separate_legend: box2, box3 = ax2.get_position(), ax3.get_position() ax2.set_position( [box2.x0, box2.y0, box2.width * 0.8, box2.height]) ax2.legend(loc='center left', bbox_to_anchor=(1, 0.5)) ax3.set_position( [box3.x0, box3.y0, box3.width * 0.8, box3.height]) ax3.legend(loc='center left', bbox_to_anchor=(1, 0.5)) fig1.savefig(figpath, format=format, bbox_inches='tight') fig1.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') fig3.savefig(figpath_full, format=format, bbox_inches='tight') fig3.savefig(plots_dir + '/' + filename + '_complete_population.pdf', format='pdf', bbox_inches='tight') # if separate_legend: # fig_legend = plt.figure(figsize=(4, 3)) # handles, labels = ax2.get_legend_handles_labels() # fig_legend.legend(handles, labels, 'center', ncol=1) # fig_legend.savefig(plots_dir + '/' + filename + "_legend" + format, format=format, bbox_inches='tight') # fig_legend.savefig(plots_dir + '/' + filename + "_legend.pdf", format='pdf', bbox_inches='tight') plt.close(fig1) plt.close(fig3) wiki_string = "\n{}\n\n{}\n\n".format(get_wiki_link(figpath), get_wiki_link(figpath_full)) yield wiki_string
def get_abs_clustering_plots(res_dir, thresholds, new_abs_clustering_index, old_abs_clustering_index, tau_nosens, y_title, plot_title='', filename='abs_index_utility_threshold', format='png'): plt.rcParams['font.size'] = 24 plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['ps.fonttype'] = 42 plt.rcParams['axes.labelsize'] = 22 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 15 plt.rcParams['axes.linewidth'] = 3 plt.rcParams['xtick.labelsize'] = 10 plt.rcParams['ytick.labelsize'] = 12 plt.rcParams['legend.fontsize'] = 18 plt.rcParams['figure.titlesize'] = 28 plt.rcParams['lines.linewidth'] = 3.0 plots_dir = res_dir + "/disparity_plots" out.create_dir(plots_dir) x_title, plot_title = r'$C_s$', r'$C$_~s = ' + str(tau_nosens) fig = plt.figure(figsize=(6, 6)) ax = fig.add_subplot(111) count, min_y, max_y = 0, INF, -INF for k, v in new_abs_clustering_index.items(): y_vals, y_vals_old = v, [old_abs_clustering_index[k]] * len( thresholds) if old_abs_clustering_index is not None else None x_labels = [ "{:.2f}".format(float(x)) if float(10000 * x) % 100 == 0 else '' for x in thresholds ] x_vals = np.arange(1, len(x_labels) + 1, 1) ax.plot(x_vals, y_vals, color=colors[count], marker='o', label=k.filename()) if y_vals_old is not None: ax.plot(x_vals, y_vals_old, color=colors[count], linestyle='dashed', marker='o') count += 1 # ax.set_ylim(min_y - 0.5, max_y + 0.5) # ax.set_yticks(list(ax.get_yticks())[:-1]) ax.set_title(plot_title) ax.set_xlabel(x_title) ax.set_ylabel(y_title) ax.set_xticks(x_vals) ax.set_xticklabels(x_labels) # if 'fpr' in y_title.lower(): box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) figpath = plots_dir + '/' + filename + '.' + format plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') # for i, ax_obj in enumerate([ax]): # fig_legend = plt.figure(figsize=(4, 3)) # handles, labels = ax_obj.get_legend_handles_labels() # fig_legend.legend(handles, labels, 'center', ncol=1) # fig_legend.savefig(plots_dir + '/' + filename + "_legend." + format, format=format, bbox_inches='tight') # fig_legend.savefig(plots_dir + '/' + filename + "_legend.pdf", format='pdf', bbox_inches='tight') plt.clf() plt.close() wiki_string = "\n{}\n\n".format(get_wiki_link(figpath)) return wiki_string
def get_utility_threshold_plots(res_dir, models, col_headings, values, values_old, tau_nosens, plot_title='', filename='utility_threshold', format='png'): plt.rcParams['font.size'] = 24 plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['ps.fonttype'] = 42 plt.rcParams['axes.labelsize'] = 20 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 15 plt.rcParams['axes.linewidth'] = 1 plt.rcParams['xtick.labelsize'] = 10 plt.rcParams['ytick.labelsize'] = 12 plt.rcParams['legend.fontsize'] = 14 plt.rcParams['figure.titlesize'] = 28 plt.rcParams['lines.linewidth'] = 1.0 plots_dir = res_dir + "/disparity_plots" out.create_dir(plots_dir) values = np.array(values) x_title, y_title = r'$C_s$', col_headings[2].split("<<BR>>")[0].strip() # plot_title = r'$C$_~s = ' + str(tau_nosens) plot_title = '' fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111) all_model_names = [str(x) for x in models] for i in range(len(all_model_names)): mask = np.where(values[:, 0] == all_model_names[i])[0] x_labels, y_vals = values[mask, 1].flatten(), list(values[mask, 2].flatten()) y_vals_old = list(values_old[mask, 2].flatten()) y_vals_old = [float(val.split('(')[0].strip()) for val in y_vals_old] x_labels = ["{:.2f}".format(float(x)) for x in x_labels] x_vals = np.arange(1, len(x_labels) + 1, 1) y_vals = [float(val.split('(')[0].strip()) for val in y_vals] ax.plot(x_vals, y_vals, color=colors[i], marker='o', label=all_model_names[i].split("<<BR>>")[0]) ax.plot(x_vals, y_vals_old, color=colors[i], linestyle='dashed', marker='o') ax.set_title(plot_title) ax.set_xlabel(x_title) ax.set_ylabel(y_title) ax.set_xticks(x_vals) ax.set_xticklabels(x_labels) # if 'fpr' in y_title.lower(): # box = ax.get_position() # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) # ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) figpath = plots_dir + '/' + filename + '.' + format plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') for i, ax_obj in enumerate([ax]): fig_legend = plt.figure(figsize=(3, 3)) handles, labels = ax_obj.get_legend_handles_labels() fig_legend.legend(handles, labels, 'center', ncol=1) fig_legend.savefig(plots_dir + '/' + filename + "_legend." + format, format=format, bbox_inches='tight') fig_legend.savefig(plots_dir + '/' + filename + "_legend.pdf", format='pdf', bbox_inches='tight') plt.clf() plt.close() wiki_string = "\n{}\n\n{}\n\n".format( get_wiki_link(figpath), get_wiki_link(plots_dir + '/' + filename + "_legend." + format)) return wiki_string
def get_disparity_plots(res_dir, col_headings, values, plot_title='', filename='all_disp_in_one', format='png'): plt.rcParams['font.size'] = 24 plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['ps.fonttype'] = 42 plt.rcParams['axes.labelsize'] = 22 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 15 plt.rcParams['axes.linewidth'] = 3 plt.rcParams['xtick.labelsize'] = 14 plt.rcParams['ytick.labelsize'] = 14 plt.rcParams['legend.fontsize'] = 18 plt.rcParams['figure.titlesize'] = 28 plt.rcParams['lines.linewidth'] = 3.0 plots_dir = res_dir + "/disparity_plots" out.create_dir(plots_dir) values = np.array(values) x_labels = list(values[:, 0]) x_labels = [label.split("<<BR>>")[0].strip() for label in x_labels] try: x_labels[x_labels.index("LogReg")] = "Log\nReg" x_labels[x_labels.index("NeuralNet")] = "Neural\nNet" except: pass x_title, y_title = "Model", "Disparity" wiki_string = "" width = 0.2 x_vals = np.array(range(1, 2 * len(x_labels), 2)) order_of_cols = get_column_heading_order(values[0, 1:].flatten()) values[:, 1:] = values[:, 1:][:, order_of_cols] col_headings = np.array(col_headings) print(col_headings, type(col_headings), order_of_cols) col_headings[1:] = col_headings[1:][order_of_cols] col_headings = list(col_headings) fig = plt.figure() ax = fig.add_subplot(111) all_rects, all_disp_types = [], [] for i in range(1, len(col_headings)): if 'statistical' in col_headings[i].lower(): disparity_type = "Statistical\nDisparity" else: disparity_type = col_headings[i].split("<<BR>>")[0].replace( 'Disparity ', 'Disparity\n').replace(' (', '\n(') y_vals = list(values[:, i].flatten()) y_vals = ([float(val.split('(')[0].strip()) for val in y_vals]) rect = ax.bar(x_vals + (i - 1) * width, y_vals, width, color=colors[i - 1]) all_rects.append(rect) all_disp_types.append(disparity_type) ax.set_title(plot_title) ax.set_yscale('log') ax.set_xlabel(x_title) ax.set_ylabel(y_title) ax.set_xticks(x_vals + (len(col_headings) - 2) * width / 2) ax.set_xticklabels(x_labels) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(all_rects, all_disp_types, loc='center left', bbox_to_anchor=(1, 0.5)) figpath = plots_dir + '/' + filename + '.' + format plt.savefig(figpath, format=format, bbox_inches='tight') plt.savefig(plots_dir + '/' + filename + '.pdf', format='pdf', bbox_inches='tight') plt.clf() plt.close() wiki_string += "\n{}\n\n".format(get_wiki_link(figpath)) return wiki_string
def run(self): learning_env = dec_rule_env.DecRuleEnv(self.dataset, self.sens_group_desc) learning_env.load_data(feature_engineering=True) self.initialize_variables(learning_env) if os.path.exists(self.res_dir + '/plots_pickled_data/seg_index_mapping.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/seg_index_mapping_fc.pkl'): self.seg_index_mapping = joblib.load(self.res_dir + '/plots_pickled_data/seg_index_mapping.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/seg_index_mapping_fc.pkl') seg_index_mapping_loaded = True else: seg_index_mapping_loaded = False all_models = self.models_other_than_rules with open(self.res_file_path, 'w') as group_res_file: group_res_file.write("= Disparity in effort analysis vs different utility thresholds =\n\n") with open(self.seg_file_path, 'w') as seg_res_file: seg_res_file.write("= Measuring Long Term Impact through Segregation =\n\n") for tau_nosens in self.taus_for_nosens: self.prev_labels = None self.disparity_table_values, self.disparity_table_values_old = [], [] self.number_flipped_sens = {} # maping of model : number of people flipped self.number_flipped_nosens = {} # maping of model : number of people flipped self.new_abs_clustering_index = {} # mapping of model : list of new abs_clustering_index if not seg_index_mapping_loaded: self.seg_index_mapping[tau_nosens] = {k(self.sens_group_train, self.feature_info):{} for k in self.segregation_indices} # mapping of seg_index: {model : {'Original Population': index_val, 'tau_1': index_val, .....}} self.new_abs_clustering_index_params = {} # mapping of model : list of best params for clustering to find neighbourhoods self.old_abs_clustering_index = {} # mapping of model : initial abs_clustering index self.data_for_pdf = {} # mapping of model : dictionary (see below for details of this dictionary) with open(self.res_file_path, 'a') as group_res_file: group_res_file.write("== Utility threshold for non sensitive people = {:.2f} ==\n\n".format(tau_nosens)) for model in all_models: ### set values in dicts # self.old_abs_clustering_index[model], initial_population_params = self.gini_index(self.x_train, self.y_train, self.sens_group_train, 'inf', 'inf') # self.old_ssi[model], self.new_ssi[model] = si.ssi(self.x_train, self.y_train, self.sens_group_train, 'inf', 'inf'), [] self.number_flipped_sens[model], self.number_flipped_nosens[model] = [], [] self.new_abs_clustering_index[model] = [] self.new_abs_clustering_index_params[model] = [] self.data_for_pdf[model] = {} # mapping of tau_sens : list of populations (x_train_new) all_flipped_x_test, all_flipped_y_test = self.get_flipped_dataset(model, 'test') all_flipped_x_train, all_flipped_y_train = self.get_flipped_dataset(model, 'train') utilities, utilities_train = self.get_utilities(model, 'test'), self.get_utilities(model, 'train') clf = model exists, loaded_clf = aeio.load_model(clf, self.dataset) if exists: clf = loaded_clf print ("Loaded {}...".format(str(clf))) else: raise ValueError("Run experiment.py first") if isinstance(clf, lm.LinReg) or isinstance(clf, lm.LogReg): with open(self.res_file_path, 'a') as group_res_file: group_res_file.write("\n{}\n".format(out.get_table(**aeio.get_regression_weights(self.feature_info, clf)))) elif isinstance(clf, lm.DTReg) or isinstance(clf, lm.DT): with open(self.res_file_path, 'a') as group_res_file: group_res_file.write("\n{}\n\n".format(aeio.plot_dtree(self.res_dir, clf, self.feature_info))) group_res_file.write("{}\n\n".format(aeio.plot_covar_matrix(self.res_dir, self.x_train, self.feature_info))) users_preds, users_preds_train = clf.predict(self.x_test).astype(bool if self.prediction_task == exp.CLASSIFICATION else float), \ clf.predict(self.x_train).astype(bool if self.prediction_task == exp.CLASSIFICATION else float) cost_funcs, _ = exp.dataset_info[self.dataset]['cost_funcs'](self.feature_info, self.x_train, self.sens_group_train, exp.dataset_info[self.dataset]['variable_constraints']) cost_funcs_rev, _ = exp.dataset_info[self.dataset]['cost_funcs'](self.feature_info, self.x_train, self.sens_group_train, exp.dataset_info[self.dataset]['variable_constraints_rev']) if not seg_index_mapping_loaded: for k,v in self.seg_index_mapping[tau_nosens].items(): v[model] = {'Original Population': k.val(X=self.x_train, y=self.y_train, cost_funcs=cost_funcs, cost_funcs_rev=cost_funcs_rev, anchor_indices=self.get_anchor_indices(model, 'train'), y_pred=users_preds_train)} sub_filter_sens, sub_filter_sens_train = np.zeros(len(self.x_test), dtype=bool), np.zeros(len(self.x_train), dtype=bool) sub_filter_nosens, sub_filter_nosens_train = np.zeros(len(self.x_test), dtype=bool), np.zeros(len(self.x_train), dtype=bool) sub_filter_sens[np.where(np.logical_and(self.sens_group_test, users_preds < self.y_test))[0]] = 1 sub_filter_nosens[np.where(np.logical_and(~self.sens_group_test, users_preds < self.y_test))[0]] = 1 sub_filter_sens_train[np.where(np.logical_and(self.sens_group_train, users_preds_train < self.y_train))[0]] = 1 sub_filter_nosens_train[np.where(np.logical_and(~self.sens_group_train, users_preds_train < self.y_train))[0]] = 1 print (set(list(utilities))) # explanations_given_sens = np.where(np.logical_and(~np.all(all_flipped_x_test == self.x_test, axis=1), self.sens_group_test))[0] # explanations_given_nosens = np.where(np.logical_and(~np.all(all_flipped_x_test == self.x_test, axis=1), ~self.sens_group_test))[0] with open(self.res_file_path, 'a') as group_res_file: group_res_file.write(" * For {}, # test explanations given = {} ({} sens, {} non-sens)\n\n".format(str(model), len(utilities[sub_filter_sens]) + len(utilities[sub_filter_nosens]), len(utilities[sub_filter_sens]), len(utilities[sub_filter_nosens]))) group_res_file.write(" * For {}, # train explanations given = {} ({} sens, {} non-sens)\n\n".format(str(model), len(utilities_train[sub_filter_sens_train]) + len(utilities_train[sub_filter_nosens_train]), len(utilities_train[sub_filter_sens_train]), len(utilities_train[sub_filter_nosens_train]))) for tau_sens in self.taus_for_sens: sens_flipped, nonsens_flipped = (np.where(np.logical_and(utilities > tau_sens, self.sens_group_test))[0], np.where(np.logical_and(utilities > tau_nosens, ~self.sens_group_test))[0]) sens_flipped_train, nonsens_flipped_train = (np.where(np.logical_and(utilities_train > tau_sens, self.sens_group_train))[0], np.where(np.logical_and(utilities_train > tau_nosens, ~self.sens_group_train))[0]) sens_utility_old, nosens_utility_old = np.mean(utilities[sens_flipped]), np.mean(utilities[nonsens_flipped]) new_x_test, new_y_test = self.x_test.copy(), self.y_test.copy() new_x_test[sens_flipped,:], new_y_test[sens_flipped] = all_flipped_x_test[sens_flipped,:], all_flipped_y_test[sens_flipped] new_x_test[nonsens_flipped,:], new_y_test[nonsens_flipped] = all_flipped_x_test[nonsens_flipped,:], all_flipped_y_test[nonsens_flipped] new_x_train, new_y_train = self.x_train.copy(), self.y_train.copy() new_x_train[sens_flipped_train,:], new_y_train[sens_flipped_train] = all_flipped_x_train[sens_flipped_train,:], all_flipped_y_train[sens_flipped_train] new_x_train[nonsens_flipped_train,:], new_y_train[nonsens_flipped_train] = all_flipped_x_train[nonsens_flipped_train,:], all_flipped_y_train[nonsens_flipped_train] # Find the abs_clustering/gini index of new population # index_val, clustering_params = self.gini_index(new_x_train, new_y_train, self.sens_group_train, 'inf', 'inf') # self.new_abs_clustering_index[model].append(index_val) # self.new_abs_clustering_index_params[model].append(clustering_params) self.data_for_pdf[model][tau_sens] = new_x_train # Plot data distribution in 2D # self.plot_data_points(new_x_train, self.sens_group_train, tau_sens, tau_nosens) try: new_y_pred, new_y_pred_train = model.predict(new_x_test).astype(bool if self.prediction_task == exp.CLASSIFICATION else float), \ model.predict(new_x_train).astype(bool if self.prediction_task == exp.CLASSIFICATION else float) except: _, clf = aeio.load_model(model, self.dataset) new_y_pred, new_y_pred_train = clf.predict(new_x_test).astype(bool if self.prediction_task == exp.CLASSIFICATION else float), \ clf.predict(new_x_train).astype(bool if self.prediction_task == exp.CLASSIFICATION else float) cost_funcs, _ = exp.dataset_info[self.dataset]['cost_funcs'](self.feature_info, new_x_train, self.sens_group_train, exp.dataset_info[self.dataset]['variable_constraints']) cost_funcs_rev, _ = exp.dataset_info[self.dataset]['cost_funcs'](self.feature_info, new_x_train, self.sens_group_train, exp.dataset_info[self.dataset]['variable_constraints_rev']) if not seg_index_mapping_loaded: for k,v in self.seg_index_mapping[tau_nosens].items(): v[model]['{:.2f}'.format(tau_sens)] = k.val(X=new_x_train, y=new_y_train, cost_funcs=cost_funcs, cost_funcs_rev=cost_funcs_rev, anchor_indices=self.get_anchor_indices(model, 'train'), y_pred=new_y_pred_train) with open(self.res_file_path, 'a') as group_res_file: group_res_file.write(" * For {} test set, with sens tau = {:.2f}, # flipped users = {} ({} sens, {} non-sens)\n\n".format(str(model), tau_sens, len(sens_flipped) + len(nonsens_flipped), len(sens_flipped), len(nonsens_flipped))) group_res_file.write(" * For {} train set, with sens tau = {:.2f}, # flipped users = {} ({} sens, {} non-sens)\n\n".format(str(model), tau_sens, len(sens_flipped_train) + len(nonsens_flipped_train), len(sens_flipped_train), len(nonsens_flipped_train))) self.number_flipped_sens[model].append(len(sens_flipped_train)/np.count_nonzero(users_preds_train < self.y_train)) self.number_flipped_nosens[model].append(len(nonsens_flipped_train)/np.count_nonzero(users_preds_train < self.y_train)) double_flipped_utilities = self.get_double_flipped_utilities(model, tau_sens, tau_nosens) if double_flipped_utilities is not None: sens_flipped_new, nonsens_flipped_new = (np.where(np.logical_and(double_flipped_utilities != 0, self.sens_group_test))[0], np.where(np.logical_and(double_flipped_utilities != 0, ~self.sens_group_test))[0]) # sens_flipped_new, nonsens_flipped_new = (np.where(np.logical_and(double_flipped_utilities > tau_sens, self.sens_group_test))[0], # np.where(np.logical_and(double_flipped_utilities > tau_nosens, ~self.sens_group_test))[0]) sens_utility_new, nosens_utility_new = np.mean(double_flipped_utilities[sens_flipped_new]), np.mean(double_flipped_utilities[nonsens_flipped_new]) if len(self.disparity_table_heading) <= 2: heading, formats, values = eval_formula.get_disparity_measures(new_y_test, new_y_pred, self.sens_group_test, sens_utility_new if not np.isnan(sens_utility_new) else 0., nosens_utility_new if not np.isnan(nosens_utility_new) else 0., self.prediction_task, return_heading_and_formats=True) self.disparity_table_heading += heading self.disparity_table_formats += formats else: values = eval_formula.get_disparity_measures(new_y_test, new_y_pred, self.sens_group_test, sens_utility_new if not np.isnan(sens_utility_new) else 0., nosens_utility_new if not np.isnan(nosens_utility_new) else 0., self.prediction_task, return_heading_and_formats=False) old_values = eval_formula.get_disparity_measures(self.y_test, users_preds, self.sens_group_test, sens_utility_old if not np.isnan(sens_utility_old) else 0., nosens_utility_old if not np.isnan(nosens_utility_old) else 0., self.prediction_task, return_heading_and_formats=False) self.disparity_table_values.append([str(model), "{:.2f}".format(tau_sens)] + values) self.disparity_table_values_old.append([str(model), "{:.2f}".format(tau_sens)] + old_values) # with open(self.res_file_path, 'a') as group_res_file: # group_res_file.write("{}\n\n".format(out.get_table(self.disparity_table_heading, # self.disparity_table_values, val_format=self.disparity_table_formats))) # for i in range(3, len(self.disparity_table_heading)): # self.disparity_table_values = np.array(self.disparity_table_values) # self.disparity_table_values_old = np.array(self.disparity_table_values_old) # heading = '_'.join(self.disparity_table_heading[i].split("<<BR>>")[0].strip().lower().split(" ")) # group_res_file.write(aeio.get_utility_threshold_plots(self.res_dir, all_models, self.disparity_table_heading[:2] + [self.disparity_table_heading[i]], # np.append(self.disparity_table_values[:,:2], self.disparity_table_values[:,i:i+1], axis=1), # np.append(self.disparity_table_values_old[:,:2], self.disparity_table_values_old[:,i:i+1], axis=1), # tau_nosens, filename='utility_threshold_{}_{}'.format(tau_nosens, heading), plot_title='')) # group_res_file.write("\n{}\n\n".format(aeio.get_abs_clustering_plots(self.res_dir, self.taus_for_sens, self.number_flipped_sens, # self.number_flipped_nosens, tau_nosens, 'Fraction of Flipped Users', filename='number_of_users_flipped_{}'.format(tau_nosens), plot_title=''))) # for wiki_path in aeio.get_pdf_plots(self.res_dir, self.x_train, self.sens_group_train, self.taus_for_sens, self.feature_info, self.data_for_pdf, tau_nosens): # group_res_file.write("\n{}\n\n".format(wiki_path)) out.create_dir(self.res_dir + '/plots_pickled_data') joblib.dump(self.seg_index_mapping, self.res_dir + '/plots_pickled_data/seg_index_mapping.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/seg_index_mapping_fc.pkl') with open(self.seg_file_path, 'a') as seg_res_file: for wiki_path in aeio.get_segregation_plots_new(self.res_dir, self.seg_index_mapping, exp.FAIRNESS_CONSTRAINTS): seg_res_file.write("\n{}\n\n".format(wiki_path)) out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png') out.upload_results([self.res_dir + '/pdf_before_after_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png') out.upload_results([self.res_dir + '/segregation_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png') out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf') out.upload_results([self.res_dir + '/pdf_before_after_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf') out.upload_results([self.res_dir + '/segregation_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
def run(self, test_or_train): learning_env = dec_rule_env.DecRuleEnv(self.dataset, self.sens_group_desc) learning_env.load_data(feature_engineering=True) self.initialize_variables(learning_env) self.set_vars(test_or_train) with open(self.res_file_path, 'w') as res_file: res_file.write( "= Effort, Reward and Utilities as functions of one another =\n\n" ) model_to_utility_sens, model_to_utility_nosens = {}, {} model_to_reward_sens, model_to_reward_nosens = {}, {} model_to_effort_sens, model_to_effort_nosens = {}, {} for model in self.models_other_than_rules: sens_utils_with_effort, nosens_utils_with_effort = [], [] sens_reward_with_effort, nosens_reward_with_effort = [], [] sens_effort_with_reward, nosens_effort_with_reward = [], [] model_start = time.time() exists, loaded_clf = aeio.load_model(model, self.dataset) if exists: model = loaded_clf print("Loaded {}...".format(str(model))) else: print("Training {}...".format(str(model))) model.train(self.x_train, self.y_train) aeio.persist_model(model, self.dataset) model_end = time.time() y_test_pred = model.predict(self.users).astype( bool if exp.dataset_info[self.dataset]['prediction_task'] == exp.CLASSIFICATION else float) y_train_pred = model.predict(self.role_model_users).astype( bool if exp.dataset_info[self.dataset]['prediction_task'] == exp.CLASSIFICATION else float) print("Model: {}, MAE: {}, MSE: {}".format( model, mean_absolute_error(self.users_gt, y_test_pred), mean_squared_error(self.users_gt, y_test_pred))) # continue self.role_model_users_pred = y_train_pred # This should not change self.users_preds = y_test_pred if test_or_train == 'test' else y_train_pred # change this based on which group's explanations are needed (test or train) sub_filter_sens = np.zeros(len(self.users), dtype=bool) sub_filter_nosens = np.zeros(len(self.users), dtype=bool) sub_filter_sens[np.where(np.logical_and(self.users_sens_group, self.users_preds < self.users_gt))[0][:self.subsample_size] if \ self.subsample_size is not None else np.where(np.logical_and(self.users_sens_group, self.users_preds < self.users_gt))[0]] = 1 sub_filter_nosens[np.where(np.logical_and(~self.users_sens_group, self.users_preds < self.users_gt))[0][:self.subsample_size] if \ self.subsample_size is not None else np.where(np.logical_and(~self.users_sens_group, self.users_preds < self.users_gt))[0]] = 1 filtered_users_sens = self.users[sub_filter_sens] user_gt_labels_sens = self.users_gt[sub_filter_sens] user_predicted_labels_sens = self.users_preds[sub_filter_sens] filtered_users_nosens = self.users[sub_filter_nosens] user_gt_labels_nosens = self.users_gt[sub_filter_nosens] user_predicted_labels_nosens = self.users_preds[sub_filter_nosens] ind_start = time.time() for delta in self.effort_deltas: sens_rewards, sens_utils, nosens_rewards, nosens_utils = [], [], [], [] for i, user in enumerate(filtered_users_sens): print("Computing for user", np.where(sub_filter_sens)[0][i]) user = np.array([user]) index_in_users = np.where(sub_filter_sens)[0][i] # optimizer = ge.SamplingMethod(np.array([1]), self.feature_info, self.cost_funcs, self.cost_funcs_rev, # exp.dataset_info[self.dataset]['variable_constraints'], model, self.dataset) # role_model, role_model_gt, role_model_pred = optimizer.sampling_based_explanations( # user, # self.role_model_users, # self.role_model_users_gt_labels, # self.role_model_users_pred, # user_gt_labels_sens[i], # user_predicted_labels_sens[i], # user_sens_group=1, # return_only_user=True) role_model, role_model_effort, role_model_reward, role_model_utility = \ self.sampling_based_explanations( user, self.role_model_users, self.role_model_users_gt_labels, self.role_model_users_pred, user_gt_labels_sens[i], user_predicted_labels_sens[i], user_sens_group=1, cost_sens_group=np.array([1]), variable_to_optimize='reward', variable_to_threshold='effort', threshold_value=delta ) assert role_model_utility == role_model_reward - role_model_effort sens_rewards.append(role_model_reward) print( "[Sens] Model: {}, Effort threshold: {}, Effort value: {}, Max Reward: {}" .format(model, delta, role_model_effort, role_model_reward)) # break # role_model, role_model_effort, role_model_reward, role_model_utility = \ # self.sampling_based_explanations( # user, # self.role_model_users, # self.role_model_users_gt_labels, # self.role_model_users_pred, # user_gt_labels_sens[i], # user_predicted_labels_sens[i], # user_sens_group=1, # cost_sens_group=np.array([1]), # variable_to_optimize='utility', # variable_to_threshold='effort', # threshold_value=delta # ) # sens_utils.append(role_model_utility) # x_new, x_new_utility, x_new_effort, x_new_reward, x_new_gt, x_new_pred = \ # self.generate_new_feature_vector(model, user.flatten(), self.users_gt[index_in_users], self.users_preds[index_in_users], # role_model, role_model_gt, role_model_pred, # 1, optimizer, 'reward', # 'effort', delta) # sens_rewards.append(x_new_reward) # x_new, x_new_utility, x_new_effort, x_new_reward, x_new_gt, x_new_pred = \ # self.generate_new_feature_vector(model, user.flatten(), self.users_gt[index_in_users], self.users_preds[index_in_users], # role_model, role_model_gt, role_model_pred, # 1, optimizer, 'utility', # 'effort', delta) # sens_utils.append(x_new_utility) dir_up_cols_generator = cf.get_up_cols( exp.dataset_info[self.dataset]['variable_constraints'], self.feature_info) for dir_up_cols in dir_up_cols_generator: assert np.all( role_model[dir_up_cols] >= user.flatten()[dir_up_cols]) # sanity check for i, user in enumerate(filtered_users_nosens): print("Computing for user", np.where(sub_filter_nosens)[0][i]) user = np.array([user]) index_in_users = np.where(sub_filter_nosens)[0][i] # optimizer = ge.SamplingMethod(np.array([0]), self.feature_info, self.cost_funcs, self.cost_funcs_rev, # exp.dataset_info[self.dataset]['variable_constraints'], model, self.dataset) # role_model, role_model_gt, role_model_pred = optimizer.sampling_based_explanations( # user, # self.role_model_users, # self.role_model_users_gt_labels, # self.role_model_users_pred, # user_gt_labels_nosens[i], # user_predicted_labels_nosens[i], # user_sens_group=0, # return_only_user=True) role_model, role_model_effort, role_model_reward, role_model_utility = \ self.sampling_based_explanations( user, self.role_model_users, self.role_model_users_gt_labels, self.role_model_users_pred, user_gt_labels_nosens[i], user_predicted_labels_nosens[i], user_sens_group=0, cost_sens_group=np.array([0]), variable_to_optimize='reward', variable_to_threshold='effort', threshold_value=delta ) assert role_model_utility == role_model_reward - role_model_effort nosens_rewards.append(role_model_reward) print( "[Nosens] Model: {}, Effort threshold: {}, Effort value: {}, Max Reward: {}" .format(model, delta, role_model_effort, role_model_reward)) # break # role_model, role_model_effort, role_model_reward, role_model_utility = \ # self.sampling_based_explanations( # user, # self.role_model_users, # self.role_model_users_gt_labels, # self.role_model_users_pred, # user_gt_labels_nosens[i], # user_predicted_labels_nosens[i], # user_sens_group=0, # cost_sens_group=np.array([0]), # variable_to_optimize='utility', # variable_to_threshold='effort', # threshold_value=delta # ) # nosens_utils.append(role_model_utility) # x_new, x_new_utility, x_new_effort, x_new_reward, x_new_gt, x_new_pred = \ # self.generate_new_feature_vector(model, user.flatten(), self.users_gt[index_in_users], self.users_preds[index_in_users], # role_model, role_model_gt, role_model_pred, # 0, optimizer, 'reward', # 'effort', delta) # nosens_rewards.append(x_new_reward) # x_new, x_new_utility, x_new_effort, x_new_reward, x_new_gt, x_new_pred = \ # self.generate_new_feature_vector(model, user.flatten(), self.users_gt[index_in_users], self.users_preds[index_in_users], # role_model, role_model_gt, role_model_pred, # 0, optimizer, 'utility', # 'effort', delta) # nosens_utils.append(x_new_utility) dir_up_cols_generator = cf.get_up_cols( exp.dataset_info[self.dataset]['variable_constraints'], self.feature_info) for dir_up_cols in dir_up_cols_generator: assert np.all( role_model[dir_up_cols] >= user.flatten()[dir_up_cols]) # sanity check # sens_utils_with_effort.append(np.mean(sens_utils)) sens_reward_with_effort.append(np.mean(sens_rewards)) # nosens_utils_with_effort.append(np.mean(nosens_utils)) nosens_reward_with_effort.append(np.mean(nosens_rewards)) sens_reward_with_effort, nosens_reward_with_effort = np.array( sens_reward_with_effort), np.array(nosens_reward_with_effort) sens_utils_with_effort, nosens_utils_with_effort = sens_reward_with_effort - self.effort_deltas, nosens_reward_with_effort - self.effort_deltas model_to_utility_sens[model] = [ self.effort_deltas, sens_utils_with_effort ] model_to_utility_nosens[model] = [ self.effort_deltas, nosens_utils_with_effort ] model_to_reward_sens[model] = [ self.effort_deltas, sens_reward_with_effort ] model_to_reward_nosens[model] = [ self.effort_deltas, nosens_reward_with_effort ] for delta in self.reward_deltas: sens_efforts, nosens_efforts = [], [] for i, user in enumerate(filtered_users_sens): print("Computing for user", np.where(sub_filter_sens)[0][i]) user = np.array([user]) index_in_users = np.where(sub_filter_sens)[0][i] # optimizer = ge.SamplingMethod(np.array([1]), self.feature_info, self.cost_funcs, self.cost_funcs_rev, # exp.dataset_info[self.dataset]['variable_constraints'], model, self.dataset) # role_model, role_model_gt, role_model_pred = optimizer.sampling_based_explanations( # user, # self.role_model_users, # self.role_model_users_gt_labels, # self.role_model_users_pred, # user_gt_labels_sens[i], # user_predicted_labels_sens[i], # user_sens_group=1, # return_only_user=True) role_model, role_model_effort, role_model_reward, role_model_utility = \ self.sampling_based_explanations( user, self.role_model_users, self.role_model_users_gt_labels, self.role_model_users_pred, user_gt_labels_sens[i], user_predicted_labels_sens[i], user_sens_group=1, cost_sens_group=np.array([1]), variable_to_optimize='effort', variable_to_threshold='reward', threshold_value=delta ) sens_efforts.append(role_model_effort) # x_new, x_new_utility, x_new_effort, x_new_reward, x_new_gt, x_new_pred = \ # self.generate_new_feature_vector(model, user.flatten(), self.users_gt[index_in_users], self.users_preds[index_in_users], # role_model, role_model_gt, role_model_pred, # 1, optimizer, 'effort', # 'reward', delta) # sens_efforts.append(x_new_effort) dir_up_cols_generator = cf.get_up_cols( exp.dataset_info[self.dataset]['variable_constraints'], self.feature_info) for dir_up_cols in dir_up_cols_generator: assert np.all( role_model[dir_up_cols] >= user.flatten()[dir_up_cols]) # sanity check for i, user in enumerate(filtered_users_nosens): print("Computing for user", np.where(sub_filter_nosens)[0][i]) user = np.array([user]) index_in_users = np.where(sub_filter_nosens)[0][i] # optimizer = ge.SamplingMethod(np.array([0]), self.feature_info, self.cost_funcs, self.cost_funcs_rev, # exp.dataset_info[self.dataset]['variable_constraints'], model, self.dataset) # role_model, role_model_gt, role_model_pred = optimizer.sampling_based_explanations( # user, # self.role_model_users, # self.role_model_users_gt_labels, # self.role_model_users_pred, # user_gt_labels_nosens[i], # user_predicted_labels_nosens[i], # user_sens_group=0, # return_only_user=True) role_model, role_model_effort, role_model_reward, role_model_utility = \ self.sampling_based_explanations( user, self.role_model_users, self.role_model_users_gt_labels, self.role_model_users_pred, user_gt_labels_nosens[i], user_predicted_labels_nosens[i], user_sens_group=0, cost_sens_group=np.array([0]), variable_to_optimize='effort', variable_to_threshold='reward', threshold_value=delta ) nosens_efforts.append(role_model_effort) # x_new, x_new_utility, x_new_effort, x_new_reward, x_new_gt, x_new_pred = \ # self.generate_new_feature_vector(model, user.flatten(), self.users_gt[index_in_users], self.users_preds[index_in_users], # role_model, role_model_gt, role_model_pred, # 0, optimizer, 'effort', # 'reward', delta) # nosens_efforts.append(x_new_effort) dir_up_cols_generator = cf.get_up_cols( exp.dataset_info[self.dataset]['variable_constraints'], self.feature_info) for dir_up_cols in dir_up_cols_generator: assert np.all( role_model[dir_up_cols] >= user.flatten()[dir_up_cols]) # sanity check sens_effort_with_reward.append(np.nanmean(sens_efforts)) nosens_effort_with_reward.append(np.nanmean(nosens_efforts)) model_to_effort_sens[model] = [ self.reward_deltas, sens_effort_with_reward ] model_to_effort_nosens[model] = [ self.reward_deltas, nosens_effort_with_reward ] with open(self.res_file_path, 'a') as res_file: res_file.write("== {} ==\n\n".format(str(model))) res_file.write("{}\n\n{}\n\n{}\n\n".format( aeio.plot_one_var_vs_other(self.res_dir, model, self.effort_deltas, sens_utils_with_effort, nosens_utils_with_effort, 'Effort', 'Average Utility'), aeio.plot_one_var_vs_other(self.res_dir, model, self.effort_deltas, sens_reward_with_effort, nosens_reward_with_effort, 'Effort', 'Average Reward'), aeio.plot_one_var_vs_other(self.res_dir, model, self.reward_deltas, sens_effort_with_reward, nosens_effort_with_reward, 'Reward', 'Average Effort'))) # model_to_utility_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_utility_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_sens_fc.pkl') # model_to_utility_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_utility_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_nosens_fc.pkl') # model_to_reward_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_reward_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_reward_sens_fc.pkl') # model_to_reward_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_reward_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_reward_nosens_fc.pkl') # model_to_effort_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_effort_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_effort_nosens_fc.pkl') # model_to_effort_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_effort_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_effort_sens_fc.pkl') with open(self.res_file_path, 'a') as res_file: res_file.write("== All Models in One ==\n\n") res_file.write("{}\n\n{}\n\n{}\n\n".format( aeio.plot_one_var_vs_other_together(self.res_dir, model_to_utility_sens, model_to_utility_nosens, 'Effort', 'Average Utility'), aeio.plot_one_var_vs_other_together(self.res_dir, model_to_reward_sens, model_to_reward_nosens, 'Effort', 'Average Reward'), aeio.plot_one_var_vs_other_together(self.res_dir, model_to_effort_sens, model_to_effort_nosens, 'Reward', 'Average Effort'))) # out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png') # out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf') out.create_dir(self.res_dir + '/plots_pickled_data') joblib.dump( model_to_utility_sens, self.res_dir + '/plots_pickled_data/model_to_utility_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_sens_fc.pkl') joblib.dump( model_to_utility_nosens, self.res_dir + '/plots_pickled_data/model_to_utility_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_nosens_fc.pkl') joblib.dump( model_to_reward_sens, self.res_dir + '/plots_pickled_data/model_to_reward_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_reward_sens_fc.pkl') joblib.dump( model_to_reward_nosens, self.res_dir + '/plots_pickled_data/model_to_reward_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_reward_nosens_fc.pkl') joblib.dump( model_to_effort_nosens, self.res_dir + '/plots_pickled_data/model_to_effort_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_effort_nosens_fc.pkl') joblib.dump( model_to_effort_sens, self.res_dir + '/plots_pickled_data/model_to_effort_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_effort_sens_fc.pkl')
def evaluate_population_splits(datasets, methods): feature_split_order = {'Compas': ['sex', 'race', 'age']} #feature_split_order = {'Compas': ['sex', 'race', 'age', 'c'], # #'Adult': ['marital', 'relationship', 'workclass', 'education']} # 'Adult': ['marital', 'relationship']}#, 'workclass']}#, 'education']} methods.remove('Oracle') for dataset, sens_features in datasets: print("Evaluating dataset {}".format(dataset)) out_dir = 'results/intergroup_splits/' + dataset + '/' output.create_dir(out_dir) split_features = feature_split_order[dataset] class_env = ProbClassEnv(dataset, val_split=0.7) class_env.load_data() x_labels = None intergroup_inequality_fracs = {} intragroup_inequality_fracs = {} for method in methods: print("Training method", method) class_env.setup_model(method) eval_func = para.wrap_function(eval_seed_benefits) results = para.map_parallel(eval_func, seeds, invariant_data=(class_env, split_features)) x_labels = results[0][0] intergroup_fracs = [] intragroup_fracs = defaultdict(lambda: defaultdict(list)) for _, intergroup_frac, intragroup_frac in results: intergroup_fracs.append(intergroup_frac) # TODO: relies on ordered dicts for sens_comb, frac in zip(x_labels, intragroup_frac): for group, group_share in frac.items(): intragroup_fracs[sens_comb][group].append(group_share) #intergroup_inequality_fracs[method] = para.mean_with_conf(intergroup_fracs, axis=0) intergroup_inequality_fracs[method] = para.aggregate_results( intergroup_fracs, axis=0) intragroup_inequality_fracs[method] = { sens_comb: { group: para.aggregate_results(group_fracs, np.mean, axis=0) for group, group_fracs in fracs.items() } for sens_comb, fracs in intragroup_fracs.items() } iu.plot_curves(iu.FIG_TYPE_INTERGROUP_SPLITS, "comparison", x_labels, "Feature combinations", intergroup_inequality_fracs, "Contribution (%)", bars=True, colors=method_colors) # Plot the intragroup inequalities for the various groups for method in methods: for feature_comb, intergroup_ineq, intragroup_ineqs in zip( x_labels, intergroup_inequality_fracs[method], intragroup_inequality_fracs[method].values()): if isinstance(intergroup_ineq, tuple): intergroup_ineq = intergroup_ineq[0] # TODO: relies on dict order iu.plot_pie(iu.FIG_TYPE_INTERGROUP_SPLITS, "{}_{}_breakdown".format(method, feature_comb), [intergroup_ineq] + list(intragroup_ineqs.values()), ["between-group"] + list(intragroup_ineqs.keys())) iu.plot_results(out_dir, dataset) #, output_channel="show") wiki_file_loc = iu.get_wiki_file(out_dir) with open(wiki_file_loc, 'a') as wiki_file: iu.emit_curves( wiki_file, out_dir, dataset, iu.FIG_TYPE_INTERGROUP_SPLITS, "Contribution of between-group unfairness to the overall individual unfairness. The numbers in parentheses after the feature combinations denote the number of population subgroups obtained from splitting the population on all the features." )
def evaluate_inequality_decomposition(datasets, methods): for dataset, sens_features in datasets: print("Evaluating dataset {}".format(dataset)) out_dir = 'results/class_ineq_decomp/' + dataset + '/' output.create_dir(out_dir) class_env = ProbClassEnv(dataset, val_split=.7) class_env.load_data() x_range = np.arange(len(class_env.y_test) + 1) / len(class_env.y_test) x_label = "Fraction of rejected users ($\\tau$)" y1_label = "Between-group\nunfairness ($\mathcal{E}^2_\\beta$)" y2_label = "Accuracy" sens_feature_combs = iu.powerset(sens_features) method_plots = {",".join(sens_feature_comb): {'intergroup_ineq': {}, 'accuracy': {}} \ for sens_feature_comb in sens_feature_combs} for method in methods: print("Evaluating method", method) class_env.setup_model(method) class_env.train_model() #class_env.calibrate_probabilities(calibration_method) sens_feature_plots = {'intergroup_ineq': {}} for sens_feature_comb in sens_feature_combs: print("Evaluating sens feature combination", sens_feature_comb) decomp_res = class_env.evaluate_inequality_decomp( sens_feature_comb) # method plots method_key = ','.join(sens_feature_comb) method_plots[method_key]['intergroup_ineq'][( method, '(Unfairness)')] = decomp_res['intergroup_ineq'] method_plots[method_key]['accuracy'][( method, '(Accuracy)')] = decomp_res['accuracy'] # sens_feature plots sens_feature_label = ', '.join( sens_feature_comb) + " inequality" sens_feature_plots['intergroup_ineq'][ sens_feature_label] = decomp_res['intergroup_ineq'] if 'accuracy' not in sens_feature_plots: sens_feature_plots['accuracy'] = decomp_res['accuracy'] iu.plot_curves(iu.FIG_TYPE_INTERGROUP_INEQ, "method_{}".format(method), x_range, x_label, sens_feature_plots['intergroup_ineq'], y1_label, {"Accuracy": sens_feature_plots['accuracy']}, y2_label) linestyles = {'(Unfairness)': '-', '(Accuracy)': ':'} for sens_feature_comb, method_results in method_plots.items(): iu.plot_curves(iu.FIG_TYPE_INTERGROUP_INEQ, "feature_{}".format(sens_feature_comb), x_range, x_label, method_results['intergroup_ineq'], y1_label, method_results['accuracy'], y2_label, colors=method_colors, linestyles=linestyles) iu.plot_results(out_dir, dataset) wiki_file_loc = iu.get_wiki_file(out_dir) with open(wiki_file_loc, 'a') as wiki_file: iu.emit_curves( wiki_file, out_dir, dataset, iu.FIG_TYPE_INEQ_DECOMP, "Inequality decomposition of the overall GE_2 into intergroup- and intragroup-inequality" ) iu.emit_curves( wiki_file, out_dir, dataset, iu.FIG_TYPE_INTERGROUP_INEQ, "== Intergroup inequalities for methods and feature combinations ==" ) iu.clear_figures()
def evaluate_prob_classification(datasets, methods): for dataset, sens_features in datasets: print("Evaluating dataset {}".format(dataset)) out_dir = 'results/prob_class/' + dataset + '/' output.create_dir(out_dir) class_env = ProbClassEnv(dataset, val_split=0.7) class_env.load_data() sens_feature_combs = iu.powerset(sens_features) method_results = {} hyperparams = {} col_names = [] for method in methods: print('\nEvaluating {}'.format(method)) params = class_env.setup_model(method) hyperparams[method] = params for calibration_method in calibration_methods: eval_kernel = para.wrap_function(eval_prob_class_kernel) results = para.map_parallel( eval_kernel, seeds, invariant_data=(class_env, calibration_method, method, sens_feature_combs), run_parallel=True) accuracies, ineqs, col_names, rejection_res = \ para.extract_positions(results, range(4)) avg_ineqs = para.aggregate_results(ineqs, axis=0) table_row = [para.aggregate_results(accuracies, axis=0)] + \ list(avg_ineqs) col_names = ["Acc"] + col_names[0] def rejection_curve_aggregator(curves): agg_curves = defaultdict(list) for rejection_curves in curves: for metric, metric_res in rejection_curves.items(): agg_curves[metric].append(metric_res) agg_curves = {metric: para.aggregate_results(metric_res) \ for metric, metric_res in agg_curves.items()} return agg_curves curve_types = ['overall'] + [ ','.join(feature_comb) for feature_comb in sens_feature_combs ] rejection_curves = {curve_type: curves for curve_type, curves in \ zip(curve_types, para.extract_positions( rejection_res, curve_types))} avg_rejection_curves = {curve_type: rejection_curve_aggregator(rejection_res) for \ curve_type, rejection_res in \ rejection_curves.items()} for curve_type, curves in avg_rejection_curves.items(): color = method_colors[method] iu.plot_rejection_curves(curves, method, fig_name=curve_type, color=color) method_name = method #+ '_' + calibration_method method_results[method_name] = table_row iu.plot_results(out_dir, dataset) wiki_file_loc = iu.get_wiki_file(out_dir) with open(wiki_file_loc, 'a') as wiki_file: col_format = ['3'] * len(col_names) col_format[0] = '2' iu.write_wiki_results(wiki_file, col_names, method_results, col_format, hyperparams, regression_methods_info) iu.emit_acc_fairness_curves(wiki_file, out_dir, dataset) lorenz_desc = "error (y_hat - y)" iu.emit_lorenz_curves(wiki_file, out_dir, dataset, lorenz_desc) iu.clear_figures()
def evaluate_models(dataset, models, test_or_train, subsample_size=None, num_investigation_users=None): """ Main analysis function. Loads the selected dataset generates explanations for individuals and analyzes efforts. """ models_to_individual_explanation_strings_sens, models_to_individual_explanation_strings_nosens = {}, {} # global mapping res_dir = 'results/{}'.format(dataset) out.create_dir(res_dir) res_dir = res_dir if not FAIRNESS_CONSTRAINTS else '{}/FC'.format(res_dir) out.create_dir(res_dir) res_file_path = res_dir + '/res.txt' wiki_parent_path = "Actionable-Explanations/Simple-Explanations-{}".format( dataset) sens_group_desc = dataset_info[dataset]['sens_f'] learning_env = dec_rule_env.DecRuleEnv(dataset, sens_group_desc) learning_env.load_data() feature_info = learning_env.feature_info print("\n\nfeature_info original:{}\n\n".format(learning_env.feature_info)) x_test_original = learning_env.x_test y_test = (learning_env.y_test ).astype(bool if dataset_info[dataset]['prediction_task'] == CLASSIFICATION else float) x_train_original = learning_env.x_train y_train = (learning_env.y_train ).astype(bool if dataset_info[dataset]['prediction_task'] == CLASSIFICATION else float) scaler = MinMaxScaler() scaler.fit(x_train_original) x_train = scaler.transform(x_train_original) with open('processed_student_data.csv', 'w') as fp: pd.DataFrame(data=np.append(x_train, y_train.reshape(x_train.shape[0], 1), axis=1), columns=aeio.get_feature_names(feature_info) + ['G3']).to_csv(fp, index=False) x_test = scaler.transform(x_test_original) sens_group = ~learning_env.x_control[sens_group_desc[-1]] sens_group_train = ~learning_env.x_control_train[sens_group_desc[-1]] sens_group_test = ~learning_env.x_control_test[sens_group_desc[-1]] ds_statistics = get_dataset_statistics_temp( learning_env.y, sens_group, dataset_info[dataset]['prediction_task']) users = np.append(x_train, x_test, axis=0) user_gt_labels = np.append( y_train, y_test, axis=0).astype(bool if dataset_info[dataset]['prediction_task'] == CLASSIFICATION else float) ##Use these for the remaining analysis; in case you want to change analysis from test to train or vice versa this is the place to change the var assignments; ##also change vars `role_model_users_pred` and `users_preds` role_model_users = x_train # This should not change role_model_users_gt_labels = y_train # This should not change role_model_users_sens_group = sens_group_train # This should not change users = x_test if test_or_train == 'test' else x_train # change this based on which group's explanations are needed (test or train) users_sens_group = sens_group_test if test_or_train == 'test' else sens_group_train # change this based on which group's explanations are needed (test or train) users_gt = y_test if test_or_train == 'test' else y_train # change this based on which group's explanations are needed (test or train) # If not already found, search for common negative users for in-depth analysis common_negative_users_sens_filename = "./common_negative_users/{}/random_{}_users_sens.txt".format(dataset, test_or_train) if not FAIRNESS_CONSTRAINTS \ else "./common_negative_users/{}/random_{}_users_sens_fc.txt".format(dataset, test_or_train) common_negative_users_nosens_filename = "./common_negative_users/{}/random_{}_users_nosens.txt".format(dataset, test_or_train) if not FAIRNESS_CONSTRAINTS \ else "./common_negative_users/{}/random_{}_users_nosens_fc.txt".format(dataset, test_or_train) if not os.path.exists( common_negative_users_sens_filename) or not os.path.exists( common_negative_users_nosens_filename): out.create_dir('./common_negative_users') out.create_dir('./common_negative_users/{}'.format(dataset)) overall_negative_sens, overall_negative_nosens = None, None for m in models: clf = m exists, loaded_clf = aeio.load_model(clf, dataset) if exists: clf = loaded_clf print("Loaded {}...".format(str(clf))) else: print("Training {}...".format(str(clf))) clf.train(role_model_users, role_model_users_gt_labels) aeio.persist_model(clf, dataset) y_pred = clf.predict(users).astype( bool if dataset_info[dataset]['prediction_task'] == CLASSIFICATION else float) m_negatives_sens = set( np.where(np.logical_and(y_pred < users_gt, users_sens_group))[0]) m_negatives_nosens = set( np.where(np.logical_and(y_pred < users_gt, ~users_sens_group))[0]) if overall_negative_nosens is None: overall_negative_nosens, overall_negative_sens = m_negatives_nosens, m_negatives_sens else: overall_negative_sens = overall_negative_sens.intersection( m_negatives_sens) overall_negative_nosens = overall_negative_nosens.intersection( m_negatives_nosens) print("\n{}\n".format(np.array(list(overall_negative_sens)))) print("\n{}\n".format(np.array(list(overall_negative_nosens)))) np.savetxt(common_negative_users_sens_filename, np.array(list(overall_negative_sens))) np.savetxt(common_negative_users_nosens_filename, np.array(list(overall_negative_nosens))) with open(res_file_path, 'w') as res_file: res_file.write("Sensitive group: {}\n\n{}".format( sens_group_desc[1], ds_statistics)) # feature_desc = du.get_feature_descriptions(feature_info) feature_desc = get_feature_descriptions_temp(feature_info) res_file.write(feature_desc) analysis = [ ("group-efforts", role_model_users_sens_group) ] #[("overall-efforts", None)] # Only run for individual users for now # TODO: keep an eye on this mapping cost_groups = { cf.ONE_GROUP_IND: "all", 0: sens_group_desc[0], 1: sens_group_desc[1] } # cf.ONE_GROUP_IN = -1 for (analysis_name, cost_sens_group) in analysis: group_res_file_path = "{}/{}_res_{}.txt".format( res_dir, analysis_name, test_or_train) cost_funcs, feature_val_costs = dataset_info[dataset]['cost_funcs']( feature_info, role_model_users, cost_sens_group, dataset_info[dataset]['variable_constraints']) cost_funcs_rev, feature_val_costs_rev = dataset_info[dataset][ 'cost_funcs'](feature_info, role_model_users, cost_sens_group, dataset_info[dataset]['variable_constraints_rev']) print("{}\n".format(feature_val_costs)) print("{}\n".format(cost_funcs)) with open(group_res_file_path, 'w') as group_res_file: group_res_file.write( "== Cost functions ==\n\nCosts are computed as (fraction < new value) - (fraction < old value), where values are ordered in the direction of increasing effort it takes to reach them.\n\n" ) for sens_group_val, costs in feature_val_costs.items( ): # Code never goes into this loop for generic cost func sens_group_desc = cost_groups[sens_group_val] group_res_file.write( "=== Costs for group {}: ===\n\n{}\n".format( sens_group_desc, aeio.get_cost_func_desc(costs))) group_res_file.write( "=== Reverse costs for group {}: ===\n\n{}\n".format( sens_group_desc, aeio.get_cost_func_desc( feature_val_costs_rev[sens_group_val]))) # Randomly choose num_investigation_users to check their feature # values and the explanations generated for them np.random.seed(seed) investigation_users_sens = np.loadtxt(common_negative_users_sens_filename).astype(int)[:num_investigation_users] \ if num_investigation_users is not None else np.loadtxt(common_negative_users_sens_filename).astype(int) # np.random.randint(len(filtered_users_sens), size=num_investigation_users) # Hardcoded so as to have same accross all models investigation_users_nosens = np.loadtxt(common_negative_users_nosens_filename).astype(int)[:num_investigation_users] \ if num_investigation_users is not None else np.loadtxt(common_negative_users_nosens_filename).astype(int) # np.random.randint(len(filtered_users_nosens), size=num_investigation_users) # Hardcoded so as to have same accross all models # Values for individual users #print("Users:\n", group_desc) disparity_table_heading = ["model"] disparity_table_formats = [None] disparity_table_values = [] with open(group_res_file_path, 'a') as group_res_file: group_res_file.write("=== Individual explanations: ===\n\n") group_res_file.write( "All disparities are calculated as abs(sens_val - nosens_val)\n\n" ) for model in models: model_start = time.time() clf = model exists, loaded_clf = aeio.load_model(clf, dataset) if exists: clf = loaded_clf print("Loaded {}...".format(str(clf))) else: print("Training {}...".format(str(clf))) # TODO: ugly if isinstance(model, lm.FCLogReg): clf.train(role_model_users, role_model_users_gt_labels, learning_env.x_control_train) else: clf.train(role_model_users, role_model_users_gt_labels) aeio.persist_model(clf, dataset) model_end = time.time() performance_stats = eval_formula.eval_model( clf, users, users_gt, dataset_info[dataset]['prediction_task']) y_test_pred = clf.predict(users).astype( bool if dataset_info[dataset]['prediction_task'] == CLASSIFICATION else float) y_train_pred = clf.predict(role_model_users).astype( bool if dataset_info[dataset]['prediction_task'] == CLASSIFICATION else float) ####Common var names; change these if you want to change analysis from train to test or vice versa role_model_users_pred = y_train_pred # This should not change users_preds = y_test_pred if test_or_train == 'test' else y_train_pred # change this based on which group's explanations are needed (test or train) with open(res_file_path, 'a') as res_file: res_file.write("Performance of {}\n\n{}\n\n".format( str(clf), aeio.get_dict_listing(performance_stats))) res_file.write("Training {} took {:.2f} secs".format( str(clf), model_end - model_start)) investigation_explanations_sens, investigation_explanations_nosens = [], [] sub_filter_sens = np.zeros(len(users), dtype=bool) sub_filter_nosens = np.zeros(len(users), dtype=bool) sub_filter_sens[np.where(np.logical_and(users_sens_group, users_preds < users_gt))[0][:subsample_size] if \ subsample_size is not None else np.where(np.logical_and(users_sens_group, users_preds < users_gt))[0]] = 1 sub_filter_nosens[np.where(np.logical_and(~users_sens_group, users_preds < users_gt))[0][:subsample_size] if \ subsample_size is not None else np.where(np.logical_and(~users_sens_group, users_preds < users_gt))[0]] = 1 sub_filter_sens[investigation_users_sens] = 1 sub_filter_nosens[investigation_users_nosens] = 1 filtered_users_sens = users[sub_filter_sens] user_gt_labels_sens = users_gt[sub_filter_sens] user_predicted_labels_sens = users_preds[sub_filter_sens] filtered_users_nosens = users[sub_filter_nosens] user_gt_labels_nosens = users_gt[sub_filter_nosens] user_predicted_labels_nosens = users_preds[sub_filter_nosens] user_utility_sens, user_utility_nosens, anchor_indices_sens, anchor_indices_nosens = [], [], [], [] fp_count_sens, fp_count_nosens = 0, 0 all_users_flipped, all_users_flipped_labels = users.copy( ), users_gt.copy() ind_start = time.time() for i, user in enumerate(filtered_users_sens): # if i % 100 == 0: print("Computing for user", np.where(sub_filter_sens)[0][i]) user = np.array([user]) index_in_users = np.where(sub_filter_sens)[0][i] optimizer = ge.SamplingMethod( np.array([1]), feature_info, cost_funcs, cost_funcs_rev, dataset_info[dataset]['variable_constraints'], clf, dataset) new_feature_vector, utility, effort, false_positive, role_model_gt, anchor_index = optimizer.sampling_based_explanations( user, role_model_users, role_model_users_gt_labels, role_model_users_pred, user_gt_labels_sens[i], user_predicted_labels_sens[i], user_sens_group=1) dir_up_cols_generator = cf.get_up_cols( dataset_info[dataset]['variable_constraints'], feature_info) for dir_up_cols in dir_up_cols_generator: assert np.all(new_feature_vector[dir_up_cols] >= user.flatten()[dir_up_cols]) # sanity check new_predicted_label = clf.predict( [new_feature_vector])[0] if dataset_info[dataset][ 'prediction_task'] == REGRESSION else bool( clf.predict([new_feature_vector])[0]) all_users_flipped[index_in_users] = new_feature_vector all_users_flipped_labels[index_in_users] = role_model_gt if false_positive: fp_count_sens += 1 tar_nec_vars = np.where(new_feature_vector != user[0])[0] tar_vals = new_feature_vector[tar_nec_vars] old_vals = user[0][tar_nec_vars] user_utility_sens.append(utility) anchor_indices_sens.append(anchor_index) if np.where(sub_filter_sens)[0][i] in investigation_users_sens: feature_wise_effort, explanation = aeio.get_feature_wise_effort( feature_info, user[0], tar_nec_vars, tar_vals, cost_funcs, cost_funcs_rev, True, role_model_gt, new_predicted_label, user_gt_labels_sens[i], user_predicted_labels_sens[i]) individual_feature_costs_str = aeio.get_feature_wise_str( feature_wise_effort, explanation) user_explanation = \ " * User {} explanation, utility: {:.3f}, effort: {:.3f}\n{}\n * User gt label: {}, user_predicted_label:{}, role model gt label: {}, role model predicted label: {}; explanation:\n{}\n * Old feature vals for user {}:\n{}\n".format( np.where(np.where(sub_filter_sens)[0][i] == investigation_users_sens)[0][0], utility, effort, individual_feature_costs_str, user_gt_labels_sens[i], user_predicted_labels_sens[i], role_model_gt, new_predicted_label, aeio.get_conditions_str(feature_info, tar_nec_vars, tar_vals, scaler=scaler, level=3), np.where(np.where(sub_filter_sens)[0][i] == investigation_users_sens)[0][0], aeio.get_conditions_str(feature_info, tar_nec_vars, old_vals, scaler=scaler, level=3)) investigation_explanations_sens.append(user_explanation) for i, user in enumerate(filtered_users_nosens): # if i % 100 == 0: print("Computing for user", np.where(sub_filter_nosens)[0][i]) user = np.array([user]) index_in_users = np.where(sub_filter_nosens)[0][i] optimizer = ge.SamplingMethod( np.array([0]), feature_info, cost_funcs, cost_funcs_rev, dataset_info[dataset]['variable_constraints'], clf, dataset) new_feature_vector, utility, effort, false_positive, role_model_gt, anchor_index = optimizer.sampling_based_explanations( user, role_model_users, role_model_users_gt_labels, role_model_users_pred, user_gt_labels_nosens[i], user_predicted_labels_nosens[i], user_sens_group=0) dir_up_cols_generator = cf.get_up_cols( dataset_info[dataset]['variable_constraints'], feature_info) for dir_up_cols in dir_up_cols_generator: assert np.all(new_feature_vector[dir_up_cols] >= user.flatten()[dir_up_cols]) # education new_predicted_label = clf.predict( [new_feature_vector])[0] if dataset_info[dataset][ 'prediction_task'] == REGRESSION else bool( clf.predict([new_feature_vector])[0]) all_users_flipped[index_in_users] = new_feature_vector all_users_flipped_labels[index_in_users] = role_model_gt if false_positive: fp_count_nosens += 1 tar_nec_vars = np.where(new_feature_vector != user[0])[0] tar_vals = new_feature_vector[tar_nec_vars] old_vals = user[0][tar_nec_vars] user_utility_nosens.append(utility) anchor_indices_nosens.append(anchor_index) if np.where( sub_filter_nosens)[0][i] in investigation_users_nosens: feature_wise_effort, explanation = aeio.get_feature_wise_effort( feature_info, user[0], tar_nec_vars, tar_vals, cost_funcs, cost_funcs_rev, False, role_model_gt, new_predicted_label, user_gt_labels_nosens[i], user_predicted_labels_nosens[i]) individual_feature_costs_str = aeio.get_feature_wise_str( feature_wise_effort, explanation) user_explanation = \ " * User {} explanation, utility: {:.3f}, effort: {:.3f}\n{}\n * User gt label: {}, user_predicted_label:{}, role model gt label: {}, role model predicted label: {}; explanation:\n{}\n * Old feature vals for user {}:\n{}\n".format( np.where(np.where(sub_filter_nosens)[0][i] == investigation_users_nosens)[0][0], utility, effort, individual_feature_costs_str, user_gt_labels_nosens[i], user_predicted_labels_nosens[i], role_model_gt, new_predicted_label, aeio.get_conditions_str(feature_info, tar_nec_vars, tar_vals, scaler=scaler, level=3), np.where(np.where(sub_filter_nosens)[0][i] == investigation_users_nosens)[0][0], aeio.get_conditions_str(feature_info, tar_nec_vars, old_vals, scaler=scaler, level=3)) investigation_explanations_nosens.append(user_explanation) user_utility_sens, user_utility_nosens, anchor_indices_sens, anchor_indices_nosens = \ np.array(user_utility_sens), np.array(user_utility_nosens), np.array(anchor_indices_sens), np.array(anchor_indices_nosens) summary_of_useful_explanations = "Total explanations given: {} ({} sens group, {} non-sens group); Useful explanations (utility > 0): {} ({} sens group, {} non-sens group".format( len(filtered_users_sens) + len(filtered_users_nosens), len(filtered_users_sens), len(filtered_users_nosens), np.count_nonzero(user_utility_sens > 0) + np.count_nonzero(user_utility_nosens > 0), np.count_nonzero(user_utility_sens > 0), np.count_nonzero(user_utility_nosens > 0)) all_users_utilities = np.zeros(len(users)) all_users_utilities[sub_filter_sens] = user_utility_sens all_users_utilities[sub_filter_nosens] = user_utility_nosens all_user_anchors = np.full(len(users), fill_value=np.nan) all_user_anchors[sub_filter_sens] = anchor_indices_sens all_user_anchors[sub_filter_nosens] = anchor_indices_nosens out.create_dir('./flipped_datasets') out.create_dir('./flipped_datasets/{}'.format(dataset)) np.savetxt( './flipped_datasets/{}/{}_all_x_{}.txt'.format( dataset, clf.filename(), test_or_train), all_users_flipped) np.savetxt( './flipped_datasets/{}/{}_all_y_{}.txt'.format( dataset, clf.filename(), test_or_train), all_users_flipped_labels) np.savetxt( './flipped_datasets/{}/{}_utilities_{}.txt'.format( dataset, clf.filename(), test_or_train), all_users_utilities) np.savetxt( './flipped_datasets/{}/{}_anchors_{}.txt'.format( dataset, clf.filename(), test_or_train), all_user_anchors) # group_sens = users[investigation_users_sens] # group_efforts_sens = user_utility_sens[investigation_users_sens] user_explanations_sens = "".join( string for string in investigation_explanations_sens) models_to_individual_explanation_strings_sens[str( clf)] = user_explanations_sens # group_nosens = users[investigation_users_nosens] # group_efforts_nosens = user_utility_nosens[investigation_users_nosens] user_explanations_nosens = "".join( string for string in investigation_explanations_nosens) models_to_individual_explanation_strings_nosens[str( clf)] = user_explanations_nosens ind_end = time.time() if len(disparity_table_heading) <= 1: heading, formats, values = eval_formula.get_disparity_measures( users_gt, users_preds, users_sens_group, np.nanmean(user_utility_sens), np.nanmean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=True) disparity_table_heading += heading disparity_table_formats += formats else: values = eval_formula.get_disparity_measures( users_gt, users_preds, users_sens_group, np.nanmean(user_utility_sens), np.nanmean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=False) disparity_table_values.append([str(clf)] + values) with open(group_res_file_path, 'a') as group_res_file: group_res_file.write(" * For {}:\n\n".format(str(model))) group_res_file.write( " * Computation of explanations, for {} ({} sens, {} nonsens) \ individual users took {:.2f} seconds.\n\n * # False positive role models = {} sens, {} \ nonsens.\n\n".format( len(filtered_users_sens) + len(filtered_users_nosens), len(filtered_users_sens), len(filtered_users_nosens), ind_end - ind_start, fp_count_sens, fp_count_nosens)) group_res_file.write( " * {}\n\n".format(summary_of_useful_explanations)) with open(group_res_file_path, 'a') as group_res_file: group_res_file.write( "Disparity measures of different models:\n\n{}\n".format( out.get_table(disparity_table_heading, disparity_table_values, val_format=disparity_table_formats))) group_res_file.write( aeio.get_disparity_plots( res_dir, disparity_table_heading, disparity_table_values, filename='all_disp_in_one_{}'.format(test_or_train))) # group_res_file.write("Effort statistics for sens group:\n\n{}\n".format(ind_effort_stats_sens)) # group_res_file.write("Effort statistics for non-sens group:\n\n{}\n".format(ind_effort_stats_nosens)) # group_res_file.write("Top explanations for sens group:\n\n{}\n".format(top_explanations_sens)) # group_res_file.write("Top explanations for non-sens group:\n\n{}\n".format(top_explanations_nosens)) if num_investigation_users is None or num_investigation_users > 0: # group_res_file.write("User explanation examples for randomly selected sensitive users:\n\n{}\nExplanations for sensitive users in group:\n\n{}\n".format(group_desc_sens, user_explanations_sens)) for k, v in models_to_individual_explanation_strings_sens.items( ): group_res_file.write( "Randomly chosen sens users' explanations for {}:\n\n{}\n\n" .format(k, v)) for k, v in models_to_individual_explanation_strings_nosens.items( ): group_res_file.write( "Randomly chosen nonsens users' explanations for {}:\n\n{}\n\n" .format(k, v))
def generate_explanations_for_test(self, model, x_train_flipped, y_train_flipped, x_test_flipped, y_test_flipped, threshold_sens, threshold_nosens, cost_funcs, cost_funcs_rev): y_test_pred = model.predict(x_test_flipped).astype( bool if self.prediction_task == exp.CLASSIFICATION else float) y_train_pred = model.predict(x_train_flipped).astype( bool if self.prediction_task == exp.CLASSIFICATION else float) explanations_mask = np.zeros(len(x_test_flipped), dtype=bool) explanations_mask[np.where( y_test_pred < self.y_test)[0][:self.subsample_size_test] if self. subsample_size_test is not None else np.where( y_test_pred < self.y_test)[0]] = 1 filtered_users = x_test_flipped[explanations_mask] filtered_users_sens_group = self.sens_group_test[explanations_mask] filtered_users_gt_labels = y_test_flipped[explanations_mask] filtered_users_pred_labels = y_test_pred[explanations_mask] user_utilities = np.zeros(len(filtered_users)) procs, queue = [], Queue() x_test_flipped_twice, y_test_flipped_twice = x_test_flipped.copy( ), y_test_flipped.copy() for i, user in enumerate(filtered_users): # if i == 1986: # continue user = np.array([user]) index_in_users = np.where(explanations_mask)[0][i] # print("Computing for user", index_in_users) optimizer = ge.SamplingMethod( np.array([filtered_users_sens_group[i]]), self.feature_info, cost_funcs, cost_funcs_rev, exp.dataset_info[self.dataset]['variable_constraints'], model, self.dataset) # p = Process(target=self.get_explanations_test, args=(queue, i, index_in_users, optimizer, user, y_train_pred, filtered_users_gt_labels, # filtered_users_pred_labels, filtered_users_sens_group, x_train_flipped, y_train_flipped, )) # p.start() # procs.append(p) # if len(procs) == 5: # for p in procs: # i, index_in_users, user, new_feature_vector, utility, effort, false_positive, role_model_gt = queue.get() i, index_in_users, user, new_feature_vector, utility, effort, false_positive, role_model_gt, anchor = \ self.get_explanations_test(i, index_in_users, optimizer, user, y_train_pred, filtered_users_gt_labels, filtered_users_pred_labels, filtered_users_sens_group, x_train_flipped, y_train_flipped) print("User {} retrieved".format(i)) user_utilities[i] = utility x_test_flipped_twice[index_in_users] = new_feature_vector y_test_flipped_twice[index_in_users] = role_model_gt user_utilities = np.array(user_utilities) all_utilities = np.zeros(len(self.x_test)) all_utilities[explanations_mask] = user_utilities out.create_dir('./flipped_datasets') out.create_dir('./flipped_datasets/{}'.format(self.dataset)) np.savetxt( './flipped_datasets/{}/{}_x_test_twice_{}_{}.txt'.format( self.dataset, model.filename(), threshold_sens, threshold_nosens), x_test_flipped_twice) np.savetxt( './flipped_datasets/{}/{}_y_test_twice_{}_{}.txt'.format( self.dataset, model.filename(), threshold_sens, threshold_nosens), y_test_flipped_twice) np.savetxt( './flipped_datasets/{}/{}_new_utilities_test_{}_{}.txt'.format( self.dataset, model.filename(), threshold_sens, threshold_nosens), all_utilities)