def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opposite", proximity_weight=0.5, diversity_weight=1.0, categorical_penalty=0.1, algorithm="DiverseCF", features_to_vary="all", permitted_range=None, yloss_type="log_loss", diversity_loss_type="dpp_style:inverse_dist", feature_weights="inverse_mad", stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="linear", verbose=True): """Generates diverse counterfactual explanations :param query_instance: A dictionary of feature names and values. Test point of interest. :param total_CFs: Total number of counterfactuals required. :param desired_class: Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :param proximity_weight: A positive float. Larger this weight, more close the counterfactuals are to the query_instance. :param diversity_weight: A positive float. Larger this weight, more diverse the counterfactuals are. :param categorical_penalty: A positive float. A weight to ensure that all levels of a categorical variable sums to 1. :param algorithm: Counterfactual generation algorithm. Either "DiverseCF" or "RandomInitCF". :param features_to_vary: Either a string "all" or a list of feature names to vary. :param permitted_range: Dictionary with continuous feature names as keys and permitted min-max range in list as values. Defaults to the range inferred from training data. If None, uses the parameters initialized in data_interface. :param yloss_type: Metric for y-loss of the optimization function. Takes "l2_loss" or "log_loss" or "hinge_loss". :param diversity_loss_type: Metric for diversity loss of the optimization function. Takes "avg_dist" or "dpp_style:inverse_dist". :param feature_weights: Either "inverse_mad" or a dictionary with feature names as keys and corresponding weights as values. Default option is "inverse_mad" where the weight for a continuous feature is the inverse of the Median Absolute Devidation (MAD) of the feature's values in the training set; the weight for a categorical feature is equal to 1 by default. :param stopping_threshold: Minimum threshold for counterfactuals target class probability. :param posthoc_sparsity_param: Parameter for the post-hoc operation on continuous features to enhance sparsity. :param posthoc_sparsity_algorithm: Perform either linear or binary search. Takes "linear" or "binary". Prefer binary search when a feature range is large (for instance, income varying from 10k to 1000k) and only if the features share a monotonic relationship with predicted outcome in the model. :param verbose: Parameter to determine whether to print 'Diverse Counterfactuals found!' :return: A CounterfactualExamples object to store and visualize the resulting counterfactual explanations (see diverse_counterfactuals.py). """ self.check_mad_validity(feature_weights) self.check_permitted_range(permitted_range) self.do_param_initializations(total_CFs, algorithm, features_to_vary, yloss_type, diversity_loss_type, feature_weights, proximity_weight, diversity_weight, categorical_penalty) query_instance, test_pred = self.find_counterfactuals(query_instance, desired_class, stopping_threshold, posthoc_sparsity_param, posthoc_sparsity_algorithm, verbose) return exp.CounterfactualExamples(self.data_interface, query_instance, test_pred, self.final_cfs, self.cfs_preds, self.final_cfs_sparse, self.cfs_preds_sparse, posthoc_sparsity_param, desired_class, encoding='label')
def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opposite", feature_weights="inverse_mad"): """Generates diverse counterfactual explanations :param query_instance: A dictionary of feature names and values. Test point of interest. :param total_CFs: Total number of counterfactuals required. :param desired_class: Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :return: A CounterfactualExamples object to store and visualize the resulting counterfactual explanations (see diverse_counterfactuals.py). """ # check feature MAD validity and throw warnings if feature_weights == "inverse_mad": self.data_interface.get_valid_mads(display_warnings=True, return_mads=False) query_instance, test_pred, final_cfs, cfs_preds = self.find_counterfactuals( query_instance, desired_class, total_CFs) return exp.CounterfactualExamples(self.data_interface, query_instance, test_pred, final_cfs, cfs_preds, desired_class=desired_class)
def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opposite", proximity_weight=0.5, diversity_weight=1.0, categorical_penalty=0.1, algorithm="DiverseCF", features_to_vary="all", yloss_type="hinge_loss", diversity_loss_type="dpp_style:inverse_dist", feature_weights="inverse_mad", optimizer="tensorflow:adam", learning_rate=0.05, min_iter=500, max_iter=5000, project_iter=0, loss_diff_thres=1e-5, loss_converge_maxiter=1, verbose=False, init_near_query_instance=True, tie_random=False, stopping_threshold=0.5, posthoc_sparsity_param=0.1): """Generates diverse counterfactual explanations :param query_instance: A dictionary of feature names and values. Test point of interest. :param total_CFs: Total number of counterfactuals required. :param desired_class: Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :param proximity_weight: A positive float. Larger this weight, more close the counterfactuals are to the query_instance. :param diversity_weight: A positive float. Larger this weight, more diverse the counterfactuals are. :param categorical_penalty: A positive float. A weight to ensure that all levels of a categorical variable sums to 1. :param algorithm: Counterfactual generation algorithm. Either "DiverseCF" or "RandomInitCF". :param features_to_vary: Either a string "all" or a list of feature names to vary. :param yloss_type: Metric for y-loss of the optimization function. Takes "l2_loss" or "log_loss" or "hinge_loss". :param diversity_loss_type: Metric for diversity loss of the optimization function. Takes "avg_dist" or "dpp_style:inverse_dist". :param feature_weights: Either "inverse_mad" or a dictionary with feature names as keys and corresponding weights as values. Default option is "inverse_mad" where the weight for a continuous feature is the inverse of the Median Absolute Devidation (MAD) of the feature's values in the training set; the weight for a categorical feature is equal to 1 by default. :param optimizer: Tensorflow optimization algorithm. Currently tested only with "tensorflow:adam". :param learning_rate: Learning rate for optimizer. :param min_iter: Min iterations to run gradient descent for. :param max_iter: Max iterations to run gradient descent for. :param project_iter: Project the gradients at an interval of these many iterations. :param loss_diff_thres: Minimum difference between successive loss values to check convergence. :param loss_converge_maxiter: Maximum number of iterations for loss_diff_thres to hold to declare convergence. Defaults to 1, but we assigned a more conservative value of 2 in the paper. :param verbose: Print intermediate loss value. :param init_near_query_instance: Boolean to indicate if counterfactuals are to be initialized near query_instance. :param tie_random: Used in rounding off CFs and intermediate projection. :param stopping_threshold: Minimum threshold for counterfactuals target class probability. :param posthoc_sparsity_param: Parameter for the post-hoc operation on continuous features to enhance sparsity. :return: A CounterfactualExamples object to store and visualize the resulting counterfactual explanations (see diverse_counterfactuals.py). """ # check feature MAD validity and throw warnings if feature_weights == "inverse_mad": self.data_interface.get_valid_mads(display_warnings=True, return_mads=False) if([total_CFs, algorithm, features_to_vary, yloss_type, diversity_loss_type, feature_weights, optimizer] != (self.cf_init_weights + self.loss_weights + self.optimizer_weights)): self.do_cf_initializations(total_CFs, algorithm, features_to_vary) self.do_loss_initializations(yloss_type, diversity_loss_type, feature_weights) self.do_optimizer_initializations(optimizer) """ Future Support: We have three main components in our tensorflow graph: (1) initialization of tf.variables (2) defining ops for loss function initializations, and (3) defining ops for optimizer initializations. Need to define methods to delete some nodes from a tensorflow graphs or update variables/ops in a tensorflow graph dynamically, so that only those components corresponding to the variables that are updated change. """ # check if hyperparameters are to be updated if not collections.Counter([proximity_weight, diversity_weight, categorical_penalty]) == collections.Counter(self.hyperparameters): self.update_hyperparameters(proximity_weight, diversity_weight, categorical_penalty) query_instance, test_pred = self.find_counterfactuals(query_instance, desired_class, learning_rate, min_iter, max_iter, project_iter, loss_diff_thres, loss_converge_maxiter, verbose, init_near_query_instance, tie_random, stopping_threshold, posthoc_sparsity_param) return exp.CounterfactualExamples(self.data_interface, query_instance, test_pred, self.final_cfs, self.cfs_preds, self.final_cfs_sparse, self.cfs_preds_sparse, posthoc_sparsity_param)
def generate_countefactuals(self, query_instance, total_CFs, desired_class="opposite"): # Converting query_instance into numpy array query_instance_org = query_instance query_instance = self.data_interface.prepare_query_instance( query_instance=query_instance, encode=True) query_instance = np.array([query_instance.iloc[0].values]) print(query_instance.shape[0]) if query_instance.shape[0] > self.batch_size: test_dataset = np.array_split(query_instance, query_instance.shape[0] // self.batch_size, axis=0) else: test_dataset = [query_instance] final_gen_cf = [] final_cf_pred = [] final_test_pred = [] for i in range(len(query_instance)): train_x = test_dataset[i] train_x = torch.tensor(train_x).float() train_y = torch.argmax(self.pred_model(train_x), dim=1) train_size += train_x.shape[0] curr_gen_cf = [] curr_cf_pred = [] curr_test_pred = [] for cf_count in range(total_CFs): recon_err, kl_err, x_true, x_pred, cf_label = model.compute_elbo( train_x, 1.0 - train_y, pred_model) curr_gen_cf.append(x_pred.numpy()) curr_cf_pred.append(cf_label.numpy()) curr_test_pred.append(train_y.numpy()) # Code for converting tensor countefactuals into pandas dataframe # x_pred= d.de_normalize_data( d.get_decoded_data(x_pred.detach().cpu().numpy()) ) # x_true= d.de_normalize_data( d.get_decoded_data(x_true.detach().cpu().numpy()) ) final_gen_cf.append(curr_gen_cf) final_cf_pred.append(curr_cf_pred) final_test_pred.append(curr_test_pred) #CF Gen out result = {} result['CF'] = final_gen_cf[0] result['CF-Pred'] = final_cf_pred[0] result['test-pred'] = torch.argmax() return exp.CounterfactualExamples(self.data_interface, query_instance_org, test_pred, final_gen_cf[0], final_cf_pred[0])
def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opposite", features_to_vary="all", permitted_range=None, training_points_only=True, feature_weights="inverse_mad", stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="linear", verbose=True): """Generates diverse counterfactual explanations :param query_instance: A dictionary of feature names and values. Test point of interest. :param total_CFs: Total number of counterfactuals required. :param desired_class: Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :param features_to_vary: Either a string "all" or a list of feature names to vary. :param permitted_range: Dictionary with continuous feature names as keys and permitted min-max range in list as values. Defaults to the range inferred from training data. If None, uses the parameters initialized in data_interface. :param training_points_only: Parameter to determine if the returned counterfactuals should be a subset of the training data points :param feature_weights: Either "inverse_mad" or a dictionary with feature names as keys and corresponding weights as values. Default option is "inverse_mad" where the weight for a continuous feature is the inverse of the Median Absolute Devidation (MAD) of the feature's values in the training set; the weight for a categorical feature is equal to 1 by default. :param stopping_threshold: Minimum threshold for counterfactuals target class probability. :param posthoc_sparsity_param: Parameter for the post-hoc operation on continuous features to enhance sparsity. :param posthoc_sparsity_algorithm: Perform either linear or binary search. Takes "linear" or "binary". Prefer binary search when a feature range is large (for instance, income varying from 10k to 1000k) and only if the features share a monotonic relationship with predicted outcome in the model. :param verbose: Parameter to determine whether to print 'Diverse Counterfactuals found!' :return: A CounterfactualExamples object to store and visualize the resulting counterfactual explanations (see diverse_counterfactuals.py). """ # check feature MAD validity and throw warnings if feature_weights == "inverse_mad": self.data_interface.get_valid_mads(display_warnings=True, return_mads=False) if features_to_vary == 'all': features_to_vary = self.data_interface.feature_names query_instance, test_pred, final_cfs, cfs_preds = self.find_counterfactuals( query_instance, desired_class, total_CFs, features_to_vary, permitted_range, training_points_only, stopping_threshold, posthoc_sparsity_param, posthoc_sparsity_algorithm, verbose) return exp.CounterfactualExamples(self.data_interface, query_instance, test_pred, final_cfs, cfs_preds, self.final_cfs_sparse, self.cfs_preds_sparse, posthoc_sparsity_param, desired_class)
def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opposite"): # Loading the latest trained CFVAE model self.cf_vae.load_state_dict(torch.load(self.save_path)) self.cf_vae.eval() query_instance = self.data_interface.get_ohe_min_max_normalized_data( query_instance) query_instance = np.array([query_instance.iloc[0].values]) if query_instance.shape[0] > self.batch_size: test_dataset = np.array_split(query_instance, query_instance.shape[0] // self.batch_size, axis=0) else: test_dataset = [query_instance] final_gen_cf = [] final_cf_pred = [] final_test_pred = [] for i in range(len(query_instance)): train_x = test_dataset[i] train_x = torch.tensor(train_x).float() train_y = torch.argmax(self.pred_model(train_x), dim=1) curr_gen_cf = [] curr_cf_pred = [] curr_test_pred = train_y.numpy() for _ in range(total_CFs): recon_err, kl_err, x_true, x_pred, cf_label = \ self.cf_vae.compute_elbo(train_x, 1.0-train_y, self.pred_model) while (cf_label == train_y): print(cf_label, train_y) recon_err, kl_err, x_true, x_pred, cf_label = \ self.cf_vae.compute_elbo(train_x, 1.0-train_y, self.pred_model) x_pred = x_pred.detach().numpy() # Converting mixed scores into one hot feature representations for v in self.cf_vae.encoded_categorical_feature_indexes: curr_max = x_pred[:, v[0]] curr_max_idx = v[0] for idx in v: if curr_max < x_pred[:, idx]: curr_max = x_pred[:, idx] curr_max_idx = idx for idx in v: if idx == curr_max_idx: x_pred[:, idx] = 1 else: x_pred[:, idx] = 0 cf_label = cf_label.detach().numpy() cf_label = np.reshape(cf_label, (cf_label.shape[0], 1)) curr_gen_cf.append(x_pred) curr_cf_pred.append(cf_label) final_gen_cf.append(curr_gen_cf) final_cf_pred.append(curr_cf_pred) final_test_pred.append(curr_test_pred) # CF Gen out result = {} result['query-instance'] = query_instance result['test-pred'] = final_test_pred[0][0] result['CF'] = final_gen_cf[0] result['CF-Pred'] = final_cf_pred[0] # do inverse transform of CFs to original user-fed format cfs = np.array([cfval[0] for cfval in result['CF']]) final_cfs_df = self.data_interface.get_inverse_ohe_min_max_normalized_data( cfs) cfs_preds = [ np.round(preds.flatten().tolist(), 3) for preds in result['CF-Pred'] ] cfs_preds = [item for sublist in cfs_preds for item in sublist] final_cfs_df[self.data_interface.outcome_name] = np.array(cfs_preds) test_instance_df = self.data_interface.get_inverse_ohe_min_max_normalized_data( result['query-instance']) test_instance_df[self.data_interface.outcome_name] = np.array( np.round(result['test-pred'], 3)) # Adding empty list for sparse cf gen and pred; adding 'NA' for the posthoc sparsity cofficient return exp.CounterfactualExamples(data_interface=self.data_interface, final_cfs_df=final_cfs_df, test_instance_df=test_instance_df, final_cfs_df_sparse=None, posthoc_sparsity_param=None, desired_class=desired_class)
def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opposite", proximity_weight=0.5, diversity_weight=1.0, categorical_penalty=0.1, algorithm="DiverseCF", features_to_vary="all", permitted_range=None, yloss_type="hinge_loss", diversity_loss_type="dpp_style:inverse_dist", feature_weights="inverse_mad", optimizer="tensorflow:adam", learning_rate=0.05, min_iter=500, max_iter=5000, project_iter=0, loss_diff_thres=1e-5, loss_converge_maxiter=1, verbose=False, init_near_query_instance=True, tie_random=False, stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="linear"): """Generates diverse counterfactual explanations :param query_instance: Test point of interest. A dictionary of feature names and values or a single row dataframe :param total_CFs: Total number of counterfactuals required. :param desired_class: Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :param proximity_weight: A positive float. Larger this weight, more close the counterfactuals are to the query_instance. :param diversity_weight: A positive float. Larger this weight, more diverse the counterfactuals are. :param categorical_penalty: A positive float. A weight to ensure that all levels of a categorical variable sums to 1. :param algorithm: Counterfactual generation algorithm. Either "DiverseCF" or "RandomInitCF". :param features_to_vary: Either a string "all" or a list of feature names to vary. param permitted_range: Dictionary with continuous feature names as keys and permitted min-max range in list as values. Defaults to the range inferred from training data. If None, uses the parameters initialized in data_interface. :param yloss_type: Metric for y-loss of the optimization function. Takes "l2_loss" or "log_loss" or "hinge_loss". :param diversity_loss_type: Metric for diversity loss of the optimization function. Takes "avg_dist" or "dpp_style:inverse_dist". :param feature_weights: Either "inverse_mad" or a dictionary with feature names as keys and corresponding weights as values. Default option is "inverse_mad" where the weight for a continuous feature is the inverse of the Median Absolute Devidation (MAD) of the feature's values in the training set; the weight for a categorical feature is equal to 1 by default. :param optimizer: Tensorflow optimization algorithm. Currently tested only with "tensorflow:adam". :param learning_rate: Learning rate for optimizer. :param min_iter: Min iterations to run gradient descent for. :param max_iter: Max iterations to run gradient descent for. :param project_iter: Project the gradients at an interval of these many iterations. :param loss_diff_thres: Minimum difference between successive loss values to check convergence. :param loss_converge_maxiter: Maximum number of iterations for loss_diff_thres to hold to declare convergence. Defaults to 1, but we assigned a more conservative value of 2 in the paper. :param verbose: Print intermediate loss value. :param init_near_query_instance: Boolean to indicate if counterfactuals are to be initialized near query_instance. :param tie_random: Used in rounding off CFs and intermediate projection. :param stopping_threshold: Minimum threshold for counterfactuals target class probability. :param posthoc_sparsity_param: Parameter for the post-hoc operation on continuous features to enhance sparsity. :param posthoc_sparsity_algorithm: Perform either linear or binary search. Takes "linear" or "binary". Prefer binary search when a feature range is large (for instance, income varying from 10k to 1000k) and only if the features share a monotonic relationship with predicted outcome in the model. :return: A CounterfactualExamples object to store and visualize the resulting counterfactual explanations (see diverse_counterfactuals.py). """ # check feature MAD validity and throw warnings if feature_weights == "inverse_mad": self.data_interface.get_valid_mads(display_warnings=True, return_mads=False) # check permitted range for continuous features if permitted_range is not None: # if not self.data_interface.check_features_range(permitted_range): # raise ValueError( # "permitted range of features should be within their original range") # else: self.data_interface.permitted_range = permitted_range self.minx, self.maxx = self.data_interface.get_minx_maxx( normalized=True) self.cont_minx = [] self.cont_maxx = [] for feature in self.data_interface.continuous_feature_names: self.cont_minx.append( self.data_interface.permitted_range[feature][0]) self.cont_maxx.append( self.data_interface.permitted_range[feature][1]) #if([total_CFs, algorithm, features_to_vary] != self.cf_init_weights): self.do_cf_initializations(total_CFs, algorithm, features_to_vary) if ([yloss_type, diversity_loss_type, feature_weights] != self.loss_weights): self.do_loss_initializations(yloss_type, diversity_loss_type, feature_weights) if ([proximity_weight, diversity_weight, categorical_penalty] != self.hyperparameters): self.update_hyperparameters(proximity_weight, diversity_weight, categorical_penalty) final_cfs_df, test_instance_df, final_cfs_df_sparse = self.find_counterfactuals( query_instance, desired_class, optimizer, learning_rate, min_iter, max_iter, project_iter, loss_diff_thres, loss_converge_maxiter, verbose, init_near_query_instance, tie_random, stopping_threshold, posthoc_sparsity_param, posthoc_sparsity_algorithm) return exp.CounterfactualExamples( data_interface=self.data_interface, final_cfs_df=final_cfs_df, test_instance_df=test_instance_df, final_cfs_df_sparse=final_cfs_df_sparse, posthoc_sparsity_param=posthoc_sparsity_param, desired_class=desired_class)
def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opposite", permitted_range=None, features_to_vary="all", stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="linear", sample_size=1000, random_seed=17): """Generate counterfactuals by randomly sampling features. :param query_instance: A dictionary of feature names and values. Test point of interest. :param total_CFs: Total number of counterfactuals required. :param desired_class: Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :param permitted_range: Dictionary with feature names as keys and permitted range in list as values. Defaults to the range inferred from training data. If None, uses the parameters initialized in data_interface. :param features_to_vary: Either a string "all" or a list of feature names to vary. :param stopping_threshold: Minimum threshold for counterfactuals target class probability. :param posthoc_sparsity_param: Parameter for the post-hoc operation on continuous features to enhance sparsity. :param posthoc_sparsity_algorithm: Perform either linear or binary search. Takes "linear" or "binary". Prefer binary search when a feature range is large (for instance, income varying from 10k to 1000k) and only if the features share a monotonic relationship with predicted outcome in the model. :param sample_size: Sampling size :param random_seed: Random seed for reproducibility """ # permitted range for continuous features if permitted_range is not None: if not self.data_interface.check_features_range(): raise ValueError( "permitted range of features should be within their original range" ) else: self.data_interface.permitted_range = permitted_range self.minx, self.maxx = self.data_interface.get_minx_maxx( normalized=True) self.cont_minx = [] self.cont_maxx = [] for feature in self.data_interface.continuous_feature_names: self.cont_minx.append( self.data_interface.permitted_range[feature][0]) self.cont_maxx.append( self.data_interface.permitted_range[feature][1]) # fixing features that are to be fixed self.total_CFs = total_CFs if features_to_vary == "all": self.fixed_features_values = {} else: self.fixed_features_values = {} for feature in self.data_interface.feature_names: if feature not in features_to_vary: self.fixed_features_values[feature] = query_instance[ feature] # number of output nodes of ML model temp_input = np.random.rand( 1, len(self.data_interface.encoded_feature_names)) self.num_ouput_nodes = len(self.model.get_output(temp_input)) # Prepares user defined query_instance for DiCE. query_instance = self.data_interface.prepare_query_instance( query_instance=query_instance, encode=True) query_instance = np.array([query_instance.iloc[0].values], dtype=np.float32) # find the predicted value of query_instance test_pred = self.predict_fn(query_instance)[0] if desired_class == "opposite": desired_class = 1.0 - round(test_pred) self.target_cf_class = desired_class self.stopping_threshold = stopping_threshold if self.target_cf_class == 0 and self.stopping_threshold > 0.5: self.stopping_threshold = 0.25 elif self.target_cf_class == 1 and self.stopping_threshold < 0.5: self.stopping_threshold = 0.75 # get random samples for each feature independently start_time = timeit.default_timer() samples = get_samples(self, self.fixed_features_values, sampling_random_seed=random_seed, sampling_size=sample_size) cfs = self.data_interface.prepare_query_instance( query_instance=samples, encode=True).values cf_preds = self.predict_fn(cfs) cfs_df = pd.DataFrame( np.append(cfs, np.array([cf_preds]).T, axis=1), columns=self.data_interface.encoded_feature_names + [self.data_interface.outcome_name]) # check validity of CFs cfs_df['validity'] = cfs_df[self.data_interface.outcome_name].apply( lambda pred: 1 if ((self.target_cf_class == 0 and pred <= self.stopping_threshold) or (self.target_cf_class == 1 and pred >= self.stopping_threshold )) else 0) self.total_CFs_found = cfs_df[cfs_df['validity'] == 1].shape[0] if self.total_CFs_found >= self.total_CFs: cfs_df = cfs_df[cfs_df['validity'] == 1].sample( n=self.total_CFs, random_state=random_seed) self.valid_cfs_found = True else: temp_df = cfs_df[cfs_df['validity'] == 0].sample( n=self.total_CFs - self.total_CFs_found, random_state=random_seed) cfs_df = pd.concat([cfs_df[cfs_df['validity'] == 1], temp_df], ignore_index=True) self.valid_cfs_found = False # convert to the format that is consistent with dice_tensorflow temp = cfs_df[self.data_interface.encoded_feature_names].values self.final_cfs = [np.array([arr]) for arr in temp] temp = cfs_df[[self.data_interface.outcome_name]].values self.cfs_preds = [np.array([arr]) for arr in temp] # post-hoc operation on continuous features to enhance sparsity - only for public data if posthoc_sparsity_param != None and posthoc_sparsity_param > 0 and 'data_df' in self.data_interface.__dict__: final_cfs_sparse = copy.deepcopy(self.final_cfs) cfs_preds_sparse = copy.deepcopy(self.cfs_preds) self.final_cfs_sparse, self.cfs_preds_sparse = self.do_posthoc_sparsity_enhancement( final_cfs_sparse, cfs_preds_sparse, query_instance, posthoc_sparsity_param, posthoc_sparsity_algorithm) else: self.final_cfs_sparse = None self.cfs_preds_sparse = None self.elapsed = timeit.default_timer() - start_time m, s = divmod(self.elapsed, 60) if self.valid_cfs_found: print('Diverse Counterfactuals found! total time taken: %02d' % m, 'min %02d' % s, 'sec') else: print( 'Only %d (required %d) Diverse Counterfactuals found for the given configuation, perhaps try with different values of proximity (or diversity) weights or learning rate...' % (self.total_CFs_found, self.total_CFs), '; total time taken: %02d' % m, 'min %02d' % s, 'sec') return exp.CounterfactualExamples( self.data_interface, query_instance, test_pred, self.final_cfs, self.cfs_preds, self.final_cfs_sparse, self.cfs_preds_sparse, posthoc_sparsity_param, desired_class)
def _generate_counterfactuals(self, query_instance, total_CFs, initialization="kdtree", desired_range=None, desired_class="opposite", proximity_weight=0.2, sparsity_weight=0.2, diversity_weight=5.0, categorical_penalty=0.1, algorithm="DiverseCF", features_to_vary="all", permitted_range=None, yloss_type="hinge_loss", diversity_loss_type="dpp_style:inverse_dist", feature_weights="inverse_mad", stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="binary", maxiterations=500, thresh=1e-2, verbose=False): """Generates diverse counterfactual explanations :param query_instance: A dictionary of feature names and values. Test point of interest. :param total_CFs: Total number of counterfactuals required. :param initialization: Method to use to initialize the population of the genetic algorithm :param desired_range: For regression problems. Contains the outcome range to generate counterfactuals in. :param desired_class: For classification problems. Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :param proximity_weight: A positive float. Larger this weight, more close the counterfactuals are to the query_instance. :param sparsity_weight: A positive float. Larger this weight, less features are changed from the query_instance. :param diversity_weight: A positive float. Larger this weight, more diverse the counterfactuals are. :param categorical_penalty: A positive float. A weight to ensure that all levels of a categorical variable sums to 1. :param algorithm: Counterfactual generation algorithm. Either "DiverseCF" or "RandomInitCF". :param features_to_vary: Either a string "all" or a list of feature names to vary. :param permitted_range: Dictionary with continuous feature names as keys and permitted min-max range in list as values. Defaults to the range inferred from training data. If None, uses the parameters initialized in data_interface. :param yloss_type: Metric for y-loss of the optimization function. Takes "l2_loss" or "log_loss" or "hinge_loss". :param diversity_loss_type: Metric for diversity loss of the optimization function. Takes "avg_dist" or "dpp_style:inverse_dist". :param feature_weights: Either "inverse_mad" or a dictionary with feature names as keys and corresponding weights as values. Default option is "inverse_mad" where the weight for a continuous feature is the inverse of the Median Absolute Devidation (MAD) of the feature's values in the training set; the weight for a categorical feature is equal to 1 by default. :param stopping_threshold: Minimum threshold for counterfactuals target class probability. :param posthoc_sparsity_param: Parameter for the post-hoc operation on continuous features to enhance sparsity. :param posthoc_sparsity_algorithm: Perform either linear or binary search. Takes "linear" or "binary". Prefer binary search when a feature range is large (for instance, income varying from 10k to 1000k) and only if the features share a monotonic relationship with predicted outcome in the model. :param maxiterations: Maximum iterations to run the genetic algorithm for. :param thresh: The genetic algorithm stops when the difference between the previous best loss and current best loss is less than thresh :param verbose: Parameter to determine whether to print 'Diverse Counterfactuals found!' :return: A CounterfactualExamples object to store and visualize the resulting counterfactual explanations (see diverse_counterfactuals.py). """ self.population_size = 10 * total_CFs self.start_time = timeit.default_timer() features_to_vary = self.setup(features_to_vary, permitted_range, query_instance, feature_weights) # Prepares user defined query_instance for DiCE. query_instance_orig = query_instance query_instance = self.data_interface.prepare_query_instance(query_instance=query_instance) query_instance = self.label_encode(query_instance) query_instance = np.array(query_instance.values[0]) self.x1 = query_instance # find the predicted value of query_instance test_pred = self.predict_fn(query_instance) self.test_pred = test_pred desired_class = self.misc_init(stopping_threshold, desired_class, desired_range, test_pred) query_instance_df_dummies = pd.get_dummies(query_instance_orig) for col in pd.get_dummies(self.data_interface.data_df[self.data_interface.feature_names]).columns: if col not in query_instance_df_dummies.columns: query_instance_df_dummies[col] = 0 self.do_param_initializations(total_CFs, initialization, desired_range, desired_class, query_instance, query_instance_df_dummies, algorithm, features_to_vary, permitted_range, yloss_type, diversity_loss_type, feature_weights, proximity_weight, sparsity_weight, diversity_weight, categorical_penalty, verbose) query_instance_df = self.find_counterfactuals(query_instance, desired_range, desired_class, features_to_vary, maxiterations, thresh, verbose) return exp.CounterfactualExamples(data_interface=self.data_interface, test_instance_df=query_instance_df, final_cfs_df=self.final_cfs_df, final_cfs_df_sparse=self.final_cfs_df_sparse, posthoc_sparsity_param=posthoc_sparsity_param, desired_range=desired_range, desired_class=desired_class, model_type=self.model.model_type)
def generate_counterfactuals( self, query_instance, total_CFs, desired_class="opposite", ): ## Loading the latest trained CFVAE model self.cf_vae.load_state_dict(torch.load(self.save_path)) self.cf_vae.eval() # Converting query_instance into numpy array query_instance_org = query_instance query_instance = self.data_interface.prepare_query_instance( query_instance=query_instance, encode=True) query_instance = np.array([query_instance.iloc[0].values]) print(query_instance.shape[0]) if query_instance.shape[0] > self.batch_size: test_dataset = np.array_split(query_instance, query_instance.shape[0] // self.batch_size, axis=0) else: test_dataset = [query_instance] final_gen_cf = [] final_cf_pred = [] final_test_pred = [] for i in range(len(query_instance)): train_x = test_dataset[i] train_x = torch.tensor(train_x).float() train_y = torch.argmax(self.pred_model(train_x), dim=1) curr_gen_cf = [] curr_cf_pred = [] curr_test_pred = train_y.numpy() for cf_count in range(total_CFs): recon_err, kl_err, x_true, x_pred, cf_label = self.cf_vae.compute_elbo( train_x, 1.0 - train_y, self.pred_model) while (cf_label == train_y): print(cf_label, train_y) recon_err, kl_err, x_true, x_pred, cf_label = self.cf_vae.compute_elbo( train_x, 1.0 - train_y, self.pred_model) x_pred = x_pred.detach().numpy() #Converting mixed scores into one hot feature representations for v in self.cf_vae.encoded_categorical_feature_indexes: curr_max = x_pred[:, v[0]] curr_max_idx = v[0] for idx in v: if curr_max < x_pred[:, idx]: curr_max = x_pred[:, idx] curr_max_idx = idx for idx in v: if idx == curr_max_idx: x_pred[:, idx] = 1 else: x_pred[:, idx] = 0 cf_label = cf_label.detach().numpy() cf_label = np.reshape(cf_label, (cf_label.shape[0], 1)) curr_gen_cf.append(x_pred) curr_cf_pred.append(cf_label) final_gen_cf.append(curr_gen_cf) final_cf_pred.append(curr_cf_pred) final_test_pred.append(curr_test_pred) #CF Gen out result = {} result['query-instance'] = query_instance[0] result['test-pred'] = final_test_pred[0][0] result['CF'] = final_gen_cf[0] result['CF-Pred'] = final_cf_pred[0] # Adding empty list for sparse cf gen and pred; adding 0 for the sparsity coffecient return exp.CounterfactualExamples(self.data_interface, result['query-instance'], result['test-pred'], result['CF'], result['CF-Pred'], None, None, 0)
def _generate_counterfactuals(self, query_instance, total_CFs, desired_range=None, desired_class="opposite", features_to_vary="all", permitted_range=None, sparsity_weight=1, feature_weights="inverse_mad", stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="linear", verbose=False): """Generates diverse counterfactual explanations :param query_instance: A dictionary of feature names and values. Test point of interest. :param total_CFs: Total number of counterfactuals required. :param desired_range: For regression problems. Contains the outcome range to generate counterfactuals in. :param desired_class: Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :param features_to_vary: Either a string "all" or a list of feature names to vary. :param permitted_range: Dictionary with continuous feature names as keys and permitted min-max range in list as values. Defaults to the range inferred from training data. If None, uses the parameters initialized in data_interface. :param sparsity_weight: Parameter to determine how much importance to give to sparsity :param feature_weights: Either "inverse_mad" or a dictionary with feature names as keys and corresponding weights as values. Default option is "inverse_mad" where the weight for a continuous feature is the inverse of the Median Absolute Devidation (MAD) of the feature's values in the training set; the weight for a categorical feature is equal to 1 by default. :param stopping_threshold: Minimum threshold for counterfactuals target class probability. :param posthoc_sparsity_param: Parameter for the post-hoc operation on continuous features to enhance sparsity. :param posthoc_sparsity_algorithm: Perform either linear or binary search. Takes "linear" or "binary". Prefer binary search when a feature range is large (for instance, income varying from 10k to 1000k) and only if the features share a monotonic relationship with predicted outcome in the model. :param verbose: Parameter to determine whether to print 'Diverse Counterfactuals found!' :return: A CounterfactualExamples object to store and visualize the resulting counterfactual explanations (see diverse_counterfactuals.py). """ data_df_copy = self.data_interface.data_df.copy() features_to_vary = self.setup(features_to_vary, permitted_range, query_instance, feature_weights) # Prepares user defined query_instance for DiCE. query_instance_orig = query_instance.copy() query_instance = self.data_interface.prepare_query_instance(query_instance=query_instance) # find the predicted value of query_instance test_pred = self.predict_fn(query_instance)[0] query_instance[self.data_interface.outcome_name] = test_pred desired_class = self.misc_init(stopping_threshold, desired_class, desired_range, test_pred) if desired_range != None: if desired_range[0] > desired_range[1]: raise ValueError("Invalid Range!") if desired_class == "opposite" and self.model.model_type == 'classifier': if self.num_output_nodes == 2: desired_class = 1.0 - test_pred elif self.num_output_nodes > 2: raise ValueError("Desired class can't be opposite if the number of classes is more than 2.") if isinstance(desired_class, int) and desired_class > self.num_output_nodes - 1: raise ValueError("Desired class should be within 0 and num_classes-1.") # Partitioned dataset and KD Tree for each class (binary) of the dataset self.dataset_with_predictions, self.KD_tree, self.predictions = self.build_KD_tree(data_df_copy, desired_range, desired_class, self.predicted_outcome_name) query_instance, cfs_preds = self.find_counterfactuals(data_df_copy, query_instance, query_instance_orig, desired_range, desired_class, total_CFs, features_to_vary, permitted_range, sparsity_weight, stopping_threshold, posthoc_sparsity_param, posthoc_sparsity_algorithm, verbose) self.cfs_preds = cfs_preds return exp.CounterfactualExamples(data_interface=self.data_interface, final_cfs_df=self.final_cfs_df, test_instance_df=query_instance, final_cfs_df_sparse=self.final_cfs_df_sparse, posthoc_sparsity_param=posthoc_sparsity_param, desired_range=desired_range, desired_class=desired_class, model_type=self.model.model_type)
def _generate_counterfactuals(self, query_instance, total_CFs, desired_range, desired_class, permitted_range, features_to_vary, stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="linear", sample_size=1000, random_seed=None, verbose=False): """Generate counterfactuals by randomly sampling features. :param query_instance: Test point of interest. A dictionary of feature names and values or a single row dataframe. :param total_CFs: Total number of counterfactuals required. :param desired_range: For regression problems. Contains the outcome range to generate counterfactuals in. :param desired_class: Desired counterfactual class - can take 0 or 1. Default value is "opposite" to the outcome class of query_instance for binary classification. :param permitted_range: Dictionary with feature names as keys and permitted range in list as values. Defaults to the range inferred from training data. If None, uses the parameters initialized in data_interface. :param features_to_vary: Either a string "all" or a list of feature names to vary. :param stopping_threshold: Minimum threshold for counterfactuals target class probability. :param posthoc_sparsity_param: Parameter for the post-hoc operation on continuous features to enhance sparsity. :param posthoc_sparsity_algorithm: Perform either linear or binary search. Takes "linear" or "binary". Prefer binary search when a feature range is large (for instance, income varying from 10k to 1000k) and only if the features share a monotonic relationship with predicted outcome in the model. :param sample_size: Sampling size :param random_seed: Random seed for reproducibility :returns: A CounterfactualExamples object that contains the dataframe of generated counterfactuals as an attribute. """ if permitted_range is None: # use the precomputed default self.feature_range = self.data_interface.permitted_range else: # compute the new ranges based on user input self.feature_range, feature_ranges_orig = self.data_interface.get_features_range( permitted_range) # Do predictions once on the query_instance and reuse across to reduce the number # inferences. model_predictions = self.predict_fn(query_instance) # number of output nodes of ML model self.num_output_nodes = None if self.model.model_type == ModelTypes.Classifier: self.num_output_nodes = model_predictions.shape[1] # query_instance need no transformation for generating CFs using random sampling. # find the predicted value of query_instance test_pred = model_predictions[0] if self.model.model_type == ModelTypes.Classifier: self.target_cf_class = self.infer_target_cfs_class( desired_class, test_pred, self.num_output_nodes) elif self.model.model_type == ModelTypes.Regressor: self.target_cf_range = self.infer_target_cfs_range(desired_range) # fixing features that are to be fixed self.total_CFs = total_CFs self.features_to_vary = features_to_vary if features_to_vary == "all": self.features_to_vary = self.data_interface.feature_names self.fixed_features_values = {} else: self.fixed_features_values = {} for feature in self.data_interface.feature_names: if feature not in features_to_vary: self.fixed_features_values[feature] = query_instance[ feature].iat[0] self.stopping_threshold = stopping_threshold if self.model.model_type == ModelTypes.Classifier: # TODO Generalize this for multi-class if self.target_cf_class == 0 and self.stopping_threshold > 0.5: self.stopping_threshold = 0.25 elif self.target_cf_class == 1 and self.stopping_threshold < 0.5: self.stopping_threshold = 0.75 # get random samples for each feature independently start_time = timeit.default_timer() random_instances = self.get_samples(self.fixed_features_values, self.feature_range, sampling_random_seed=random_seed, sampling_size=sample_size) # Generate copies of the query instance that will be changed one feature # at a time to encourage sparsity. cfs_df = None candidate_cfs = pd.DataFrame(np.repeat(query_instance.values, sample_size, axis=0), columns=query_instance.columns) # Loop to change one feature at a time, then two features, and so on. for num_features_to_vary in range(1, len(self.features_to_vary) + 1): selected_features = np.random.choice(self.features_to_vary, (sample_size, 1), replace=True) for k in range(sample_size): candidate_cfs.at[ k, selected_features[k][0]] = random_instances.at[ k, selected_features[k][0]] scores = self.predict_fn(candidate_cfs) validity = self.decide_cf_validity(scores) if sum(validity) > 0: rows_to_add = candidate_cfs[validity == 1] if cfs_df is None: cfs_df = rows_to_add.copy() else: cfs_df = cfs_df.append(rows_to_add) cfs_df.drop_duplicates(inplace=True) # Always change at least 2 features before stopping if num_features_to_vary >= 2 and len(cfs_df) >= total_CFs: break self.total_cfs_found = 0 self.valid_cfs_found = False if cfs_df is not None and len(cfs_df) > 0: if len(cfs_df) > total_CFs: cfs_df = cfs_df.sample(total_CFs) cfs_df.reset_index(inplace=True, drop=True) self.cfs_pred_scores = self.predict_fn(cfs_df) cfs_df[self.data_interface. outcome_name] = self.get_model_output_from_scores( self.cfs_pred_scores) self.total_cfs_found = len(cfs_df) self.valid_cfs_found = True if self.total_cfs_found >= self.total_CFs else False final_cfs_df = cfs_df[self.data_interface.feature_names + [self.data_interface.outcome_name]] final_cfs_df[self.data_interface.outcome_name] = \ final_cfs_df[self.data_interface.outcome_name].round(self.outcome_precision) self.cfs_preds = final_cfs_df[[self.data_interface.outcome_name ]].values self.final_cfs = final_cfs_df[ self.data_interface.feature_names].values else: final_cfs_df = None self.cfs_preds = None self.cfs_pred_scores = None self.final_cfs = None test_instance_df = self.data_interface.prepare_query_instance( query_instance) test_instance_df[self.data_interface.outcome_name] = \ np.array(np.round(self.get_model_output_from_scores((test_pred,)), self.outcome_precision)) # post-hoc operation on continuous features to enhance sparsity - only for public data if posthoc_sparsity_param is not None and posthoc_sparsity_param > 0 and \ self.final_cfs is not None and 'data_df' in self.data_interface.__dict__: final_cfs_df_sparse = final_cfs_df.copy() final_cfs_df_sparse = self.do_posthoc_sparsity_enhancement( final_cfs_df_sparse, test_instance_df, posthoc_sparsity_param, posthoc_sparsity_algorithm) else: final_cfs_df_sparse = None self.elapsed = timeit.default_timer() - start_time m, s = divmod(self.elapsed, 60) if self.valid_cfs_found: if verbose: print( 'Diverse Counterfactuals found! total time taken: %02d' % m, 'min %02d' % s, 'sec') else: if self.total_cfs_found == 0: print( 'No Counterfactuals found for the given configuration, perhaps try with different parameters...', '; total time taken: %02d' % m, 'min %02d' % s, 'sec') else: print( 'Only %d (required %d) ' % (self.total_cfs_found, self.total_CFs), 'Diverse Counterfactuals found for the given configuration, perhaps try with different parameters...', '; total time taken: %02d' % m, 'min %02d' % s, 'sec') return exp.CounterfactualExamples( data_interface=self.data_interface, final_cfs_df=final_cfs_df, test_instance_df=test_instance_df, final_cfs_df_sparse=final_cfs_df_sparse, posthoc_sparsity_param=posthoc_sparsity_param, desired_class=desired_class, desired_range=desired_range, model_type=self.model.model_type)