def fair_clustering_large_cluster(dataset, config_file, data_dir, num_clusters, deltas, max_points, L=0, p_acc=1.0, ml_model_flag=False): config = configparser.ConfigParser(converters={'list': read_list}) config.read(config_file) # Read data in from a given csv_file found in config # df (pd.DataFrame) : holds the data df = read_data(config, dataset) # Subsample data if needed if max_points and len(df) > max_points: df = df.head(max_points) # Clean the data (bucketize text data) df, _ = clean_data(df, config, dataset) # variable_of_interest (list[str]) : variables that we would like to collect statistics for variable_of_interest = config[dataset].getlist("fairness_variable") # NOTE: this code only handles one color per vertex assert len(variable_of_interest) == 1 # Assign each data point to a color, based on config file # attributes (dict[str -> defaultdict[int -> list[int]]]) : holds indices of points for each color class # color_flag (dict[str -> list[int]]) : holds map from point to color class it belongs to (reverse of `attributes`) attributes, color_flag, prob_vecs, prob_thresh = {}, {}, {}, {} for variable in variable_of_interest: colors = defaultdict(list) this_color_flag = [0] * len(df) condition_str = variable + "_conditions" bucket_conditions = config[dataset].getlist(condition_str) # For each row, if the row passes the bucket condition, # then the row is added to that color class for i, row in df.iterrows(): for bucket_idx, bucket in enumerate(bucket_conditions): if eval(bucket)(row[variable]): colors[bucket_idx].append( i) # add the point to the list of its colors this_color_flag[ i] = bucket_idx # record the color for this given point # NOTE: colors is a dict, this_color_flag is a list attributes[variable] = colors color_flag[variable] = this_color_flag if ml_model_flag == False: prob_vecs[variable] = create_prob_vecs(len(df), p_acc, len(colors), this_color_flag) else: ml_model_path = 'MLModels' + '/' + dataset prob_vecs_path = ml_model_path + '_prob_vecs.npy' n = len(df) prob_vecs[variable] = np.load(prob_vecs_path)[0:n, :] # representation (dict[str -> dict[int -> float]]) : representation of each color compared to the whole dataset representation = {} for var in variable_of_interest: color_proportions = np.sum(prob_vecs[var], axis=0) / len(df) dict_ = {} for j in range(color_proportions.shape[0]): dict_.update({j: color_proportions[j]}) representation[var] = dict_ # Select only the desired columns selected_columns = config[dataset].getlist("columns") df = df[[col for col in selected_columns]] # NOTE: this code only handles one membership criterion (_, fair_vals), = representation.items() # NOTE: this handles the case when a color is missing in the sampled vertices num_colors = max(fair_vals.keys()) + 1 # Scale data if desired scaling = config["DEFAULT"].getboolean("scaling") if scaling: df = scale_data(df) # Cluster the data -- using the objective specified by clustering_method clustering_method = config["DEFAULT"]["clustering_method"] t1 = time.monotonic() initial_score, pred, cluster_centers = vanilla_clustering( df, num_clusters, clustering_method) t2 = time.monotonic() cluster_time = t2 - t1 print("Clustering time: {}".format(cluster_time)) # sizes (list[int]) : sizes of clusters sizes = [0 for _ in range(num_clusters)] for p in pred: sizes[p] += 1 # dataset_ratio : Ratios for colors in the dataset dataset_ratio = {} for attr, color_dict in attributes.items(): dataset_ratio[attr] = { int(color): len(points_in_color) / len(df) for color, points_in_color in color_dict.items() } # fairness_vars (list[str]) : Variables to perform fairness balancing on fairness_vars = config[dataset].getlist("fairness_variable") # NOTE: here is where you set the upper and lower bounds # NOTE: accross all different values within the same attribute you have the same multipliers up and down for delta in deltas: # alpha_i = a_val * (representation of color i in dataset) # beta_i = b_val * (representation of color i in dataset) alpha, beta = {}, {} a_val, b_val = 1 / (1 - delta), 1 - delta for var, bucket_dict in attributes.items(): alpha[var] = { k: a_val * representation[var][k] for k in bucket_dict.keys() } beta[var] = { k: b_val * representation[var][k] for k in bucket_dict.keys() } # NOTE: Sample color values if ml_model_flag == False: color_flag[var] = sample_colors(color_flag[var], num_colors, p_acc) else: color_flag[var] = sample_colors_ml_model(prob_vecs[var], num_colors) fp_color_flag, fp_alpha, fp_beta = (take_by_key( color_flag, fairness_vars), take_by_key(alpha, fairness_vars), take_by_key(beta, fairness_vars)) # Solves partial assignment and then performs rounding to get integral assignment t1 = time.monotonic() res, nf_time = fair_partial_assignment_large_cluster( df, cluster_centers, fp_alpha, fp_beta, fp_color_flag, clustering_method, num_colors, L) t2 = time.monotonic() lp_time = t2 - t1 ### Output / Writing data to a file # output is a dictionary which will hold the data to be written to the # outfile as key-value pairs. Outfile will be written in JSON format. output = {} # num_clusters for re-running trial output["num_clusters"] = num_clusters # Whether or not the LP found a solution output["partial_success"] = res["partial_success"] # Nonzero status -> error occurred output["partial_success"] = res["partial_success"] output["dataset_distribution"] = dataset_ratio # Save alphas and betas from trials output['prob_proportions'] = representation output["alpha"] = alpha output["beta"] = beta # Save original clustering score output["unfair_score"] = initial_score # Original Color Blind Assignments output["unfair_assignments"] = pred.tolist() # Clustering score after addition of fairness output["objective"] = res["objective"] # Clustering score after initial LP output["partial_fair_score"] = res["partial_objective"] # Save size of each cluster output["sizes"] = sizes output["attributes"] = attributes # These included at end because their data is large # Save points, colors for re-running trial # Partial assignments -- list bc. ndarray not serializable output["centers"] = [list(center) for center in cluster_centers] output["points"] = [list(point) for point in df.values] output["assignment"] = res["assignment"] output["partial_assignment"] = res["partial_assignment"] output["name"] = dataset output["clustering_method"] = clustering_method output["scaling"] = scaling output["delta"] = delta output["time"] = lp_time output["cluster_time"] = cluster_time # NOTE: record proprtions output['partial_proportions'] = res['partial_proportions'] output['proportions'] = res['proportions'] output['partial_proportions_normalized'] = res[ 'partial_proportions_normalized'] output['proportions_normalized'] = res['proportions_normalized'] # Record Lower Bound L output['Cluster_Size_Lower_Bound'] = L # Record Classifier Accurecy output['p_acc'] = p_acc # output['nf_time'] = nf_time # Record probability vecs for k, v in prob_vecs.items(): prob_vecs = v output['prob_vecs'] = prob_vecs.ravel().tolist() # Record Probability Vector # NOTE: TODO # Writes the data in `output` to a file in data_dir write_fairness_trial(output, data_dir) # Added because sometimes the LP for the next iteration solves so # fast that `write_fairness_trial` cannot write to disk time.sleep(1) return output
def fair_clustering_metric_membership(dataset, config_file, data_dir, num_clusters, deltas, max_points, L=0): num_colors = 2 config = configparser.ConfigParser(converters={'list': read_list}) config.read(config_file) # Read data in from a given csv_file found in config df = read_data(config, dataset) # Subsample data if needed if max_points and len(df) > max_points: df = df.head(max_points) # below if you wish to shuffle #df= df.sample( frac=1, random_state=1).reset_index(drop=True) # Clean the data (bucketize text data) df, _ = clean_data(df, config, dataset) # variable_of_interest (list[str]) : variables that we would like to collect statistics for variable_of_interest = config[dataset].getlist("fairness_variable") # NOTE: this code only handles one color per vertex assert len(variable_of_interest) == 1 # Assign each data point to a color, based on config file # attributes (dict[str -> defaultdict[int -> list[int]]]) : holds indices of points for each color class # color_flag (dict[str -> list[int]]) : holds map from point to color class it belongs to (reverse of `attributes`) attributes, color_flag, prob_vecs, prob_vals, prob_thresh = {}, {}, {}, {}, {} for variable in variable_of_interest: prob_vals[variable] = df[variable].tolist() (_, prob_vals_process), = prob_vals.items() min_val = min(prob_vals_process) prob_vals_process = [(p - min_val) for p in prob_vals_process] R_max = max(prob_vals_process) assert min(prob_vals_process) == 0 # put the data back in for k, v in prob_vals.items(): prob_vals[k] = prob_vals_process representation = {} for var in variable_of_interest: representation[var] = sum(prob_vals[var]) / len(df) # NOTE: this code only handles one membership criterion (_, fair_vals), = representation.items() # drop uneeded columns selected_columns = config[dataset].getlist("columns") df = df[[col for col in selected_columns]] # Scale data if desired scaling = config["DEFAULT"].getboolean("scaling") if scaling: df = scale_data(df) # Cluster the data -- using the objective specified by clustering_method clustering_method = config["DEFAULT"]["clustering_method"] t1 = time.monotonic() # NOTE: initial_score is the value of the objective at the solution # NOTE: This is where the color-blind algorithm is ran if type(num_clusters) is list: num_clusters = num_clusters[0] initial_score, pred, cluster_centers = vanilla_clustering( df, num_clusters, clustering_method) t2 = time.monotonic() cluster_time = t2 - t1 print("Clustering time: {}".format(cluster_time)) # For each point in the dataset, assign it to the cluster and color it belongs too cluster_color_proportions = np.zeros((num_clusters, num_colors)) # sizes (list[int]) : sizes of clusters sizes = [0 for _ in range(num_clusters)] for p in pred: sizes[p] += 1 # dataset_ratio : Ratios for colors in the dataset # fairness_vars (list[str]) : Variables to perform fairness balancing on fairness_vars = config[dataset].getlist("fairness_variable") # NOTE: here is where you set the upper and lower bounds # NOTE: accross all different values within the same attribute you have the same multipliers up and down for delta in deltas: # alpha_i = a_val * (representation of color i in dataset) # beta_i = b_val * (representation of color i in dataset) alpha, beta = {}, {} a_val, b_val = 1 / (1 - delta), 1 - delta #a_val, b_val = 1,1 # NOTE: 2 color case for var in variable_of_interest: alpha[var] = a_val * representation[var] beta[var] = b_val * representation[var] fp_color_flag = prob_vals fp_alpha = alpha fp_beta = beta # Solves partial assignment and then performs rounding to get integral assignment t1 = time.monotonic() res = fair_partial_assignment_2_color(df, cluster_centers, fp_alpha, fp_beta, fp_color_flag, clustering_method, num_colors, L) t2 = time.monotonic() lp_time = t2 - t1 ### Output / Writing data to a file # output is a dictionary which will hold the data to be written to the # outfile as key-value pairs. Outfile will be written in JSON format. output = {} # num_clusters for re-running trial output["num_clusters"] = num_clusters # Whether or not the LP found a solution output["partial_success"] = res["partial_success"] # Nonzero status -> error occurred output["status"] = res["partial_status"] #output["dataset_distribution"] = dataset_ratio # Save alphas and betas from trials output['prob_proportions'] = representation output["alpha"] = alpha output["beta"] = beta # Save size of each cluster output["sizes"] = sizes output["attributes"] = attributes # These included at end because their data is large # Save points, colors for re-running trial # Partial assignments -- list bc. ndarray not serializable ''' IMPORTANT ''' output["centers"] = [list(center) for center in cluster_centers] output["points"] = [list(point) for point in df.values] # Save original clustering score output["unfair_score"] = initial_score # Original Color Blind Assignments if type(pred) is not list: pred = pred.tolist() output["unfair_assignments"] = pred # Record Assignments output["partial_assignment"] = res["partial_assignment"] output["assignment"] = res["assignment"] # Clustering score after addition of fairness output["objective"] = res["objective"] # Clustering score after initial LP output["partial_objective"] = res["partial_objective"] output['prob_values'] = prob_vals # Record Lower Bound L output['Cluster_Size_Lower_Bound'] = L # Record probability vecs for k, v in prob_vecs.items(): prob_vecs = v # Record Probability Vector #output['prob_vecs'] = prob_vecs.ravel().tolist() # NOTE: TODO output["name"] = dataset output["clustering_method"] = clustering_method output["scaling"] = scaling output["delta"] = delta output["time"] = lp_time output["cluster_time"] = cluster_time # Record R_max output["R_max"] = R_max # Writes the data in `output` to a file in data_dir write_fairness_trial(output, data_dir) # Added because sometimes the LP for the next iteration solves so # fast that `write_fairness_trial` cannot write to disk time.sleep(1) return output
def fair_clustering(dataset, config_file, data_dir, num_clusters, deltas, max_points, violating, violation): config = configparser.ConfigParser(converters={'list': read_list}) config.read(config_file) # Read data in from a given csv_file found in config # df (pd.DataFrame) : holds the data df = read_data(config, dataset) # Subsample data if needed if max_points and len(df) > max_points: df = subsample_data(df, max_points) # Clean the data (bucketize text data) df, _, weight = clean_data(df, config, dataset) #print(weight) # variable_of_interest (list[str]) : variables that we would like to collect statistics for variable_of_interest = config[dataset].getlist("variable_of_interest") print("DATA READ") T0 = time.monotonic() # Assign each data point to a color, based on config file # attributes (dict[str -> defaultdict[int -> list[int]]]) : holds indices of points for each color class # color_flag (dict[str -> list[int]]) : holds map from point to color class it belongs to (reverse of `attributes`) attributes, color_flag = {}, {} for variable in variable_of_interest: colors = defaultdict(list) this_color_flag = [0] * len(df) condition_str = variable + "_conditions" bucket_conditions = config[dataset].getlist(condition_str) # For each row, if the row passes the bucket condition, # then the row is added to that color class for i, row in df.iterrows(): for bucket_idx, bucket in enumerate(bucket_conditions): if eval(bucket)(row[variable]): colors[bucket_idx].append(i) this_color_flag[i] = bucket_idx attributes[variable] = colors color_flag[variable] = this_color_flag print("COLOR BUILT") # representation (dict[str -> dict[int -> float]]) : representation of each color compared to the whole dataset representation = {} for var, bucket_dict in attributes.items(): #representation[var] = {k : (len(bucket_dict[k]) / len(df)) for k in bucket_dict.keys()} representation[var] = { k: ((weighted_size_by_idx(weight, bucket_dict[k])) / weighted_size(weight)) for k in bucket_dict.keys() } # Select only the desired columns selected_columns = config[dataset].getlist("columns") df = df[[col for col in selected_columns]] # Scale data if desired scaling = config["DEFAULT"].getboolean("scaling") if scaling: df = scale_data(df) # Cluster the data -- using the objective specified by clustering_method clustering_method = config["DEFAULT"]["clustering_method"] print("READY TO CLUSTER") if not violating: t1 = time.monotonic() initial_score, pred, cluster_centers = vanilla_clustering_weighted( df, weight, num_clusters, clustering_method) t2 = time.monotonic() cluster_time = t2 - t1 print("Clustering time: {}".format(cluster_time)) ### Calculate fairness statistics # fairness ( dict[str -> defaultdict[int-> defaultdict[int -> int]]] ) # fairness : is used to hold how much of each color belongs to each cluster #fairness = {} # For each point in the dataset, assign it to the cluster and color it belongs too #for attr, colors in attributes.items(): # fairness[attr] = defaultdict(partial(defaultdict, int)) # for i, row in enumerate(df.iterrows()): # cluster = pred[i] # for color in colors: # if i in colors[color]: # fairness[attr][cluster][color] += 1 # continue #print("FAIRNESS BUILT") # sizes (list[int]) : sizes of clusters sizes = [0 for _ in range(num_clusters)] for i, p in enumerate(pred): sizes[p] += weight[i] # ratios (dict[str -> dict[int -> list[float]]]): Ratios for colors in a cluster #ratios = {} #for attr, colors in attributes.items(): # attr_ratio = {} # for cluster in range(num_clusters): # attr_ratio[cluster] = [fairness[attr][cluster][color] / sizes[cluster] # for color in sorted(colors.keys())] # ratios[attr] = attr_ratio else: # These added so that output format is consistent among violating and # non-violating trials cluster_time, initial_score = 0, 0 fairness, ratios = {}, {} sizes, cluster_centers = [], [] # dataset_ratio : Ratios for colors in the dataset #dataset_ratio = {} #for attr, color_dict in attributes.items(): # dataset_ratio[attr] = {int(color) : len(points_in_color) / len(df) # for color, points_in_color in color_dict.items()} # fairness_vars (list[str]) : Variables to perform fairness balancing on fairness_vars = config[dataset].getlist("fairness_variable") for delta in deltas: # alpha_i = a_val * (representation of color i in dataset) # beta_i = b_val * (representation of color i in dataset) alpha, beta = {}, {} a_val, b_val = 1 / (1 - delta), 1 - delta for var, bucket_dict in attributes.items(): alpha[var] = { k: a_val * representation[var][k] for k in bucket_dict.keys() } beta[var] = { k: b_val * representation[var][k] for k in bucket_dict.keys() } # Only include the entries for the variables we want to perform fairness on # (in `fairness_vars`). The others are kept for statistics. fp_color_flag, fp_alpha, fp_beta = (take_by_key( color_flag, fairness_vars), take_by_key(alpha, fairness_vars), take_by_key(beta, fairness_vars)) # Solves partial assignment and then performs rounding to get integral assignment if not violating: t1 = time.monotonic() res = fair_partial_assignment(df, weight, cluster_centers, fp_alpha, fp_beta, fp_color_flag, clustering_method) t2 = time.monotonic() lp_time = t2 - t1 else: t1 = time.monotonic() res = violating_lp_clustering(df, num_clusters, fp_alpha, fp_beta, fp_color_flag, clustering_method, violation) t2 = time.monotonic() lp_time = t2 - t1 # Added so that output formatting is consistent among violating # and non-violating trials res["partial_objective"] = 0 res["partial_assignment"] = [] TOT_TIME = time.monotonic() - T0 print(TOT_TIME) ### Output / Writing data to a file # output is a dictionary which will hold the data to be written to the # outfile as key-value pairs. Outfile will be written in JSON format. output = {} # num_clusters for re-running trial #output["num_clusters"] = num_clusters # Whether or not the LP found a solution output["success"] = res["success"] # Nonzero status -> error occurred output["status"] = res["status"] #output["dataset_distribution"] = dataset_ratio # Save alphas and betas from trials output["alpha"] = alpha output["beta"] = beta # Save original clustering score output["unfair_score"] = initial_score # Clustering score after addition of fairness output["fair_score"] = res["objective"] # Clustering score after initial LP output["partial_fair_score"] = res["partial_objective"] # Save size of each cluster output["sizes"] = sizes #output["attributes"] = attributes # Save the ratio of each color in its cluster #output["ratios"] = ratios # These included at end because their data is large # Save points, colors for re-running trial # Partial assignments -- list bc. ndarray not serializable output["centers"] = [list(center) for center in cluster_centers] #output["points"] = [list(point) for point in df.values] #output["assignment"] = res["assignment"] #output["partial_assignment"] = res["partial_assignment"] output["name"] = dataset output["clustering_method"] = clustering_method output["scaling"] = scaling output["delta"] = delta output["time"] = lp_time output["total_time"] = TOT_TIME output["cluster_time"] = cluster_time output["violating"] = violating output["violation"] = violation # Writes the data in `output` to a file in data_dir write_fairness_trial(output, data_dir, dataset) # Added because sometimes the LP for the next iteration solves so # fast that `write_fairness_trial` cannot write to disk time.sleep(1)
def fair_clustering_2_color(dataset, config_file, data_dir, num_clusters, deltas, max_points, L=0, p_acc=1.0): # NOTE: thos code works for 2 colors num_colors = 2 config = configparser.ConfigParser(converters={'list': read_list}) config.read(config_file) # Read data in from a given csv_file found in config df = read_data(config, dataset) # Subsample data if needed if max_points and len(df) > max_points: # NOTE: comment the block and second and unccomment the second block. changed to exclude randomization effect #rows = [0,1,2,3,4,5,20,21,23,50,126,134,135] #df = df.iloc[rows,:] #df = df.reset_index() df = df.head(max_points) # below if you wish to shuffle # df= df.sample( frac=1, random_state=1).reset_index(drop=True) # Clean the data (bucketize text data) df, _ = clean_data(df, config, dataset) # variable_of_interest (list[str]) : variables that we would like to collect statistics for variable_of_interest = config[dataset].getlist("fairness_variable") # NOTE: this code only handles one color per vertex assert len(variable_of_interest) == 1 # Assign each data point to a color, based on config file # attributes (dict[str -> defaultdict[int -> list[int]]]) : holds indices of points for each color class # color_flag (dict[str -> list[int]]) : holds map from point to color class it belongs to (reverse of `attributes`) attributes, color_flag, prob_vecs, prob_vals, prob_vals_thresh, prob_thresh = {}, {}, {}, {}, {}, {} for variable in variable_of_interest: colors = defaultdict(list) this_color_flag = [0] * len(df) condition_str = variable + "_conditions" bucket_conditions = config[dataset].getlist(condition_str) # For each row, if the row passes the bucket condition, # then the row is added to that color class for i, row in df.iterrows(): for bucket_idx, bucket in enumerate(bucket_conditions): if eval(bucket)(row[variable]): colors[bucket_idx].append( i) # add the point to the list of its colors this_color_flag[ i] = bucket_idx # record the color for this given point attributes[variable] = colors color_flag[variable] = this_color_flag # NOT: generate probabilities according to the perturbation descired in section 5.2 prob_vals[variable] = [ perturb_2_color(color, p_acc) for color in this_color_flag ] # representation (dict[str -> dict[int -> float]]) : representation of each color compared to the whole dataset representation = {} for var in variable_of_interest: representation[var] = sum(prob_vals[var]) / len(df) (_, fair_vals), = representation.items() # drop uneeded columns selected_columns = config[dataset].getlist("columns") df = df[[col for col in selected_columns]] # Scale data if desired scaling = config["DEFAULT"].getboolean("scaling") if scaling: df = scale_data(df) # Cluster the data -- using the objective specified by clustering_method clustering_method = config["DEFAULT"]["clustering_method"] t1 = time.monotonic() # NOTE: initial_score is the value of the objective at the solution # NOTE: This is where the color-blind algorithm is ran if type(num_clusters) is list: num_clusters = num_clusters[0] initial_score, pred, cluster_centers = vanilla_clustering( df, num_clusters, clustering_method) t2 = time.monotonic() cluster_time = t2 - t1 print("Clustering time: {}".format(cluster_time)) # sizes (list[int]) : sizes of clusters sizes = [0 for _ in range(num_clusters)] for p in pred: sizes[p] += 1 # fairness_vars (list[str]) : Variables to perform fairness balancing on fairness_vars = config[dataset].getlist("fairness_variable") # NOTE: here is where you set the upper and lower bounds # NOTE: accross all different values within the same attribute you have the same multipliers up and down for delta in deltas: alpha, beta = {}, {} a_val, b_val = 1 / (1 - delta), 1 - delta # NOTE: 2 color case for var, bucket_dict in attributes.items(): alpha[var] = a_val * representation[var] beta[var] = b_val * representation[var] fp_color_flag = prob_vals fp_alpha = alpha fp_beta = beta # Solves partial assignment and then performs rounding to get integral assignment t1 = time.monotonic() res = fair_partial_assignment_2_color(df, cluster_centers, fp_alpha, fp_beta, fp_color_flag, clustering_method, num_colors, L) t2 = time.monotonic() lp_time = t2 - t1 ### Output / Writing data to a file # output is a dictionary which will hold the data to be written to the # outfile as key-value pairs. Outfile will be written in JSON format. output = {} # num_clusters for re-running trial output["num_clusters"] = num_clusters # Whether or not the LP found a solution output["partial_success"] = res["partial_success"] # Nonzero status -> error occurred output["partial_status"] = res["partial_status"] #output["dataset_distribution"] = dataset_ratio # Save alphas and betas from trials output['prob_proportions'] = representation output["alpha"] = alpha output["beta"] = beta # Save size of each cluster output["sizes"] = sizes output["attributes"] = attributes # These included at end because their data is large # Save points, colors for re-running trial # Partial assignments -- list bc. ndarray not serializable output["centers"] = [list(center) for center in cluster_centers] output["points"] = [list(point) for point in df.values] # Save original clustering score output["unfair_score"] = initial_score # Original Color Blind Assignments if type(pred) is not list: pred = pred.tolist() output["unfair_assignments"] = pred # Record Assignments output["partial_assignment"] = res["partial_assignment"] output["assignment"] = res["assignment"] # Clustering score after addition of fairness output["objective"] = res["objective"] # Clustering score after initial LP output["partial_objective"] = res["partial_objective"] output['prob_values'] = prob_vals # Record Lower Bound L output['Cluster_Size_Lower_Bound'] = L # Record Classifier Accurecy output['p_acc'] = p_acc # Record probability vecs output["name"] = dataset output["clustering_method"] = clustering_method output["scaling"] = scaling output["delta"] = delta output["time"] = lp_time output["cluster_time"] = cluster_time # Writes the data in `output` to a file in data_dir write_fairness_trial(output, data_dir) # Added because sometimes the LP for the next iteration solves so # fast that `write_fairness_trial` cannot write to disk time.sleep(1) return output