def __init__(self, action_set, **kwargs): """ :param action_set: ActionSet for features :param clf: scikit-learn linear classifier :param coefficients: vector of coefficients (only used when clf is not specified) :param intercept: set to 0.0 by default (only used when clf is not specified) :param solver: valid MIP solver """ # action_set assert isinstance(action_set, ActionSet) self.action_set = action_set # attach coefficients self.coefficients, self.intercept = parse_classifier_args(**kwargs) # align coefficients to action set self.action_set.align(self.coefficients) # set solver self.solver = kwargs.get('solver', DEFAULT_SOLVER) # setup recourse problem self.builder = RecourseBuilder(coefficients=self.coefficients, intercept=self.intercept, action_set=self.action_set, solver=self.solver) self._print_flag = kwargs.get('print_flag', self._default_print_flag)
def populate(self, total_items=10, enumeration_type='distinct_subsets', cost_type='local', time_limit=None, node_limit=None, display_flag=None): """ Generates a list of actions to flip the predicted value of the linear classifier from feature vector x. :param total_items: maximum # of actions to generate set as float('inf') to enumerate all possible actions :param enumeration_type: enumeration algorithm to use for the flipset must be a string in Flipset.valid_enumeration_types - 'distinct_subsets' - 'mutually_exclusive' :param cost_type: cost function to use for Flipset generation must be a string in Flipset.valid_cost_types options include: - local :param time_limit: max # of seconds to spend before stopping the solver at each iteration :param node_limit: max # of branch and bound nodes to process before stopping the solver at each iteration :param display_flag: True to display solver progress during enumeration :return: """ assert enumeration_type in self._valid_enumeration_types, \ 'enumeration_type must be one of %r' % self._valid_enumeration_types assert cost_type in self._valid_cost_types, \ 'cost_type must be one of %r' % self._valid_cost_types # print("before RecourseBuilder") if self._builder is None: self._builder = RecourseBuilder(action_set=self.action_set, x=self.x, coefficients=self._coefs, intercept=self._intercept, mip_cost_type=cost_type, solver=self._solver) # print("FLIPSET ACTIONS: ", self._build_er.actions) # print("end RecourseBuilder") items = self._builder.populate(total_items=total_items, enumeration_type=enumeration_type, time_limit=time_limit, node_limit=node_limit, display_flag=display_flag) self._add(items) return self
def recourse_builder(request, classifier, action_set): action_set.align(classifier) rb = RecourseBuilder(solver = request.param, action_set = action_set, clf = classifier) return rb
def recourse_builder_cbc(classifier, action_set): action_set.align(classifier) rb = RecourseBuilder(solver = _SOLVER_TYPE_CBC, action_set = action_set, clf = classifier) return rb
def my_recourse_builder_cplex_fake(my_actionset_fake): """CPLEX Recourse builder with fake values.""" x = [1, 1, 1] coefficients = [1, 2, 3] return RecourseBuilder(solver="cplex", action_set=my_actionset_fake, coefficients=coefficients, x=x)
def get_flipset_solutions(model, data, action_set, mip_cost_type='max', scaler=None, print_flag=True): """ Run a basic audit of a model on the training dataset. :param model: :param data: :param action_set: :param mip_cost_type: :param scaler: :return: """ if scaler is not None: yhat = model.predict(data['X_scaled']) coefficients, intercept = undo_coefficient_scaling( coefficients=np.array(model.coef_).flatten(), intercept=model.intercept_[0], scaler=scaler) else: yhat = model.predict(data['X']) coefficients, intercept = np.array( model.coef_).flatten(), model.intercept_[0] action_set.align(coefficients) # get defaults audit_results = [] predicted_neg = np.flatnonzero(yhat < 1) if any(predicted_neg): U = data['X'].iloc[predicted_neg].values fb = RecourseBuilder(coefficients=coefficients, intercept=intercept, action_set=action_set, x=U[0], mip_cost_type=mip_cost_type) # basic audit start_time = time.time() if print_flag: for i, u in enumerate(U): fb.x = u info = fb.fit() audit_results.append(info) print_log('cost[%06d] = %1.2f' % (i, info['total_cost'])) else: for i, u in enumerate(U): fb.x = u audit_results.append(fb.fit()) print_log('runtime: solved %i IPs in %1.1f seconds' % (i, time.time() - start_time)) return audit_results
def test_rb_onehot_encoding(data, solver): if len(data['categorical_names']) == 1: # pick only the indicator variables names = data['onehot_names'] k = len(names) X = data['X'][names] assert np.all(X.sum(axis=1) == 1) #setup classifier of the form #w = [3, -1, -1, -1,...] #t = -1 # score(x[0] = 1) = 3 -> yhat = +1 # score(x[j] = 1) = -2 -> yhat = -1 for j = 1,2,...,k coefs = -np.ones(k) coefs[0] = 3.0 intercept = -1.0 # setup action set a = ActionSet(X) a.add_constraint('subset_limit', names=names, lb=0, ub=1) a.set_alignment(coefficients=coefs, intercept=intercept) rb = RecourseBuilder(action_set=a, coefficients=coefs, intercept=intercept, solver=solver) for j in range(1, k): x = np.zeros(k) x[j] = 1.0 assert rb.score(x) < 0 # set point rb.x = x # find optimal action info = rb.fit() a = info['actions'] # validate solution x_new = x + a assert rb.score(x_new) > 0 assert np.isclose(a[j], -1.0) assert np.isclose(np.sum(x_new), 1.0)
class RecourseAuditor(object): """ Compute feasibility and cost of recourse over a sample of points that were denied access. (i.e. this method will not be run on data points that are already qualifying, (eg. y_pred > 0). """ _default_print_flag = True def __init__(self, action_set, **kwargs): """ :param action_set: ActionSet for features :param clf: scikit-learn linear classifier :param coefficients: vector of coefficients (only used when clf is not specified) :param intercept: set to 0.0 by default (only used when clf is not specified) :param solver: valid MIP solver """ # action_set assert isinstance(action_set, ActionSet) self.action_set = action_set # attach coefficients self.coefficients, self.intercept = parse_classifier_args(**kwargs) # align coefficients to action set self.action_set.align(self.coefficients) # set solver self.solver = kwargs.get('solver', DEFAULT_SOLVER) # setup recourse problem self.builder = RecourseBuilder(coefficients=self.coefficients, intercept=self.intercept, action_set=self.action_set, solver=self.solver) self._print_flag = kwargs.get('print_flag', self._default_print_flag) @property def print_flag(self): return self._print_flag @print_flag.setter def print_flag(self, flag): if flag is None: self._print_flag = bool(self._default_print_flag) elif isinstance(flag, bool): self._print_flag = bool(flag) else: raise AttributeError('print_flag must be boolean or None') def audit(self, X, y_desired=1): """ evaluate cost and feasibility of recourse for for each point in X that is not assigned a desired outcome :param X: feature matrix (np.array or pd.DataFrame) :param y_desired: desired label (+1 by default) :return: pd.DataFrame containing the feasibility and cost of recourse for each point in X rows that already attain desired outcome have entries: feasible = NaN & cost = NaN rows that are certified to have no recourse have entries: feasible = False & cost = Inf """ if isinstance(X, pd.DataFrame): raw_index = X.index.tolist() X = X.values else: raw_index = list(range(X.shape[0])) assert isinstance(X, np.ndarray) assert X.ndim == 2 assert X.shape[0] >= 1 assert X.shape[1] == len(self.coefficients) assert np.isfinite(X).all() assert float(y_desired) in {1.0, -1.0, 0.0} U, distinct_idx = np.unique(X, axis=0, return_inverse=True) scores = U.dot(self.coefficients) if y_desired > 0: audit_idx = np.less(scores, -self.intercept) else: audit_idx = np.greater_equal(scores, -self.intercept) audit_idx = np.flatnonzero(audit_idx) # solve recourse problem output = [] pbar = tqdm(total=len( audit_idx)) ## stop tqdm from playing badly in ipython notebook. for idx in audit_idx: self.builder.x = U[idx, :] info = self.builder.fit() info['idx'] = idx output.append({k: info[k] for k in ['feasible', 'cost', 'idx']}) pbar.update(1) pbar.close() # add in points that were not denied recourse df = pd.DataFrame(output) df = df.set_index('idx') # include unique points that attain desired label already df = df.reindex(range(U.shape[0])) # include duplicates of original points df = df.iloc[distinct_idx] df = df.reset_index(drop=True) df.index = raw_index return df
action_set[immutable_attributes].mutable = False action_set['CriticalAccountOrLoansElsewhere'].step_direction = -1 action_set['CheckingAccountBalance_geq_0'].step_direction = 1 # fit classifier, get median score, and get denied individuals. clf = LogisticRegression(max_iter=1000, solver='lbfgs') clf.fit(X, y) coefficients = clf.coef_[0] intercept = clf.intercept_[0] scores = pd.Series(clf.predict_proba(X)[:, 1]) p = scores.median() denied_individuals = scores.loc[lambda s: s <= p].index idx = denied_individuals[0] x = X.values[idx] ## CPLEX fb_cplex = RecourseBuilder(solver=_SOLVER_TYPE_CPX, coefficients=coefficients, intercept=intercept - (np.log(p / (1. - p))), action_set=action_set, x=x) fb_cplex.fit() ## CBC fb_cbc = RecourseBuilder(solver=_SOLVER_TYPE_CBC, coefficients=coefficients, intercept=intercept - (np.log(p / (1. - p))), action_set=action_set, x=x) fb_cbc.fit()
class Flipset(object): """ List of actions that will flip the predicted value of a classifier from x """ _valid_enumeration_types = VALID_ENUMERATION_TYPES _valid_cost_types = VALID_MIP_COST_TYPES df_column_names = [ 'cost', 'size', 'features', 'feature_idx', 'x', 'x_new', 'score_new', 'yhat_new', 'feasible', 'flipped' ] def __init__(self, x, action_set, solver=DEFAULT_SOLVER, **kwargs): # attach action set assert isinstance(action_set, ActionSet) self.action_set = action_set self._n_variables = len(action_set) self._variable_names = action_set.name self._solver = solver # attach feature vector assert isinstance(x, (list, np.ndarray)) self._x = np.array(x, dtype=np.float_).flatten() # attach coefficients self._coefs, self._intercept = parse_classifier_args(**kwargs) # initialize Flipset attributes self._builder = kwargs.get('builder') self._items = [] self._df = pd.DataFrame(columns=Flipset.df_column_names, dtype=object) self._sort_args = { 'by': ['size', 'cost', 'score_new'], 'inplace': True, 'axis': 0 } def __len__(self): """ :return: # of items in the flipset """ return len(self._items) def __str__(self): return str(self._df[['cost', 'size', 'features', 'x', 'x_new']]) def __repr__(self): s = [ 'Flipset with %d Items', '# items: %d' % len(self), 'x: %r' % self._x, 'w: (%s)' % self._coefs, 'items: %r' % self._items ] return '\n'.join(s) ### properties ### @property def x(self): """ :return: feature vector """ return self._x @property def items(self): """ Dictionary representation of Flipset This is a list containing mildly processed output from recourse.builder """ return self._items @property def df(self): """ Pandas DataFrame representation of Flipset Each row represents a different action to flip the prediction Rows are sorted according to the last arguments passed to the Flipset.sort() DESCRIPTION OF COLUMNS ---------------------- cost: cost of action size: # of altered features features: names of altered variables feature_idx: column indices of altered features x: values of x[j] for j in feature_idx x_new: values of x[j] + a[j]) for j in feature_idx score_new: values of score function at x_new = x + a = w0 + w.dot(x + a) yhat_new: value of prediction at x_new = x + a = f(score_new) feasible: True if x + a is a feasible action flipped: True if the f(x) != f(x+a) """ return self._df @property def yhat(self): return self._intercept + np.dot(self._coefs, self._x) def predict(self, actions=None): return np.sign(self.score(actions=actions)) def score(self, actions=None): if actions is not None: return self._intercept + np.dot(self._coefs, self._x + actions) else: return self._intercept + np.dot(self._coefs, self._x) #### API functions #### def populate(self, total_items=10, enumeration_type='distinct_subsets', cost_type='local', time_limit=None, node_limit=None, display_flag=None): """ Generates a list of actions to flip the predicted value of the linear classifier from feature vector x. :param total_items: maximum # of actions to generate set as float('inf') to enumerate all possible actions :param enumeration_type: enumeration algorithm to use for the flipset must be a string in Flipset.valid_enumeration_types - 'distinct_subsets' - 'mutually_exclusive' :param cost_type: cost function to use for Flipset generation must be a string in Flipset.valid_cost_types options include: - local :param time_limit: max # of seconds to spend before stopping the solver at each iteration :param node_limit: max # of branch and bound nodes to process before stopping the solver at each iteration :param display_flag: True to display solver progress during enumeration :return: """ assert enumeration_type in self._valid_enumeration_types, \ 'enumeration_type must be one of %r' % self._valid_enumeration_types assert cost_type in self._valid_cost_types, \ 'cost_type must be one of %r' % self._valid_cost_types # print("before RecourseBuilder") if self._builder is None: self._builder = RecourseBuilder(action_set=self.action_set, x=self.x, coefficients=self._coefs, intercept=self._intercept, mip_cost_type=cost_type, solver=self._solver) # print("FLIPSET ACTIONS: ", self._build_er.actions) # print("end RecourseBuilder") items = self._builder.populate(total_items=total_items, enumeration_type=enumeration_type, time_limit=time_limit, node_limit=node_limit, display_flag=display_flag) self._add(items) return self def sort(self, **kwargs): """ Reorders the items in the Flipset dataframe Arguments used to sort are saved :param sort_args: list of fields to use in sort, or arguments passed to pd.DataFrame.sort_values :return: """ if len(kwargs) == 0: self._df.sort_values(**self._sort_args) return if 'by' in kwargs: sort_names = kwargs['by'] else: sort_names = list(kwargs.keys()) assert isinstance(sort_names, list) assert len(sort_names) > 0 for s in sort_names: assert isinstance(s, str) assert s in self._df.columns sort_args = { 'by': sort_names, 'inplace': kwargs.get('inplace', True), 'axis': kwargs.get('axis', 0), } self._df.sort_values(**sort_args) self._sort_args = sort_args def view(self): """ prints Flipset as Pandas dataframe :return: """ return self._df def to_flat_df(self): """Flatten out the actionsets in the flipset to product either a latex or HTML representation.""" self.sort() tex_columns = ['features', 'x', 'x_new'] tex_df = self._df[tex_columns] # split components for each item tex_df = tex_df.reset_index().rename(columns={'index': 'item_id'}) df_list = [] for n in tex_columns: tmp = tex_df.set_index(['item_id'])[n].apply(pd.Series).stack() tmp = tmp.reset_index().rename(columns={'level_1': 'var_id'}) tmp_name = tmp.columns[-1] tmp = tmp.rename(columns={tmp_name: n}) df_list.append(tmp) # combine into a flattened list flat_df = df_list[0] for k in range(1, len(df_list)): flat_df = flat_df.merge(df_list[k]) # drop the merge index flat_df = flat_df.drop(columns=['var_id']) # index items by item_id flat_df = flat_df.sort_values(by='item_id') flat_df = flat_df.rename(columns={'item_id': 'item'}) return flat_df.set_index('item') def to_latex(self, name_formatter='\\textit'): """ converts current Flipset to Latex table :param name_formatter: :return: """ flat_df = self.to_flat_df() # add another column for the latex arrow symbol idx = flat_df.columns.tolist().index('x_new') flat_df.insert(loc=idx, column='to', value=['longrightarrow'] * len(flat_df)) # name headers flat_df = flat_df.rename( columns={ 'features': '\textsc{Feature Subset}', 'x': '\textsc{Current Values}', 'x_new': '\textsc{Required Values}' }) # get raw tex table table = flat_df.to_latex(multirow=True, index=True, escape=False, na_rep='-', column_format='rlccc') # manually wrap names with a formatter function if name_formatter is not None: for v in self._variable_names: table = table.replace(v, '%s{%s}' % (name_formatter, v)) # add the backslash for the arrow table = table.replace('longrightarrow', '$\\longrightarrow$') # minor embellishments table = table.split('\n') table[2] = table[2].replace('to', '') table[2] = table[2].replace('{}', '') table.pop(3) table.pop(3) return '\n'.join(table) def to_html(self): def _color_white_or_gray(row): color = 'white' if row.name[0] % 2 == 0 else 'lightgray' res = 'background-color: %s' % color return [res] * len(row) flat_df = self.to_flat_df() # add another column for the latex arrow symbol idx = flat_df.columns.tolist().index('x_new') flat_df.insert(loc=idx, column='to', value=['→'] * len(flat_df)) idx = (pd.DataFrame(flat_df.index).assign( row=lambda df: df.groupby('item').cumcount().pipe(lambda s: s + 1) ).pipe(lambda df: list(zip(df['item'], df['row'])))) idx = pd.MultiIndex.from_tuples(idx) flat_df.index = idx html = (flat_df.style.set_table_styles([{ "selector": "tr", "props": [('background-color', 'white')] }]).apply(_color_white_or_gray, axis=1).render()) return html #### item management #### def _add(self, items): """ :param items: adds new items to flipset :return: """ if isinstance(items, dict): items = [items] assert isinstance(items, list) items = list(map(lambda i: self._validate_item(i), items)) self._items.extend(items) self._add_to_df(items) def _validate_item(self, item): """ checks item to be added to the current Flipset :param item: raw flipset item :return: item in correct format """ assert isinstance(item, dict) required_fields = ['feasible', 'actions', 'cost'] for k in required_fields: assert k in item, 'item missing field %s' % k item['actions'] = self._validate_action(item['actions']) assert item['cost'] > 0.0, 'total cost must be positive' assert item['feasible'], 'item must be feasible' return item def _validate_action(self, a): """ checks action vector to the added to the current Flipset :param a: action vector :return: a or AssertionError """ a = np.array(a, dtype=np.float_).flatten() assert len( a ) == self._n_variables, 'action vector must have %d elements' % self.n_variables assert np.isfinite(a).all(), 'actions must be finite' assert np.count_nonzero( a) >= 1, 'at least one action element must be non zero' assert np.not_equal(self.yhat, self.predict( a)), 'actions do not flip the prediction from %d' % self.yhat return a def _add_to_df(self, items): if len(items) > 0: row_data = list(map(lambda item: self._item_to_df_row(item), items)) self._df = self._df.append(row_data, ignore_index=True, sort=True)[self._df.columns.tolist()] self.sort() def _item_to_df_row(self, item): """ converts item to a row in the data frame :param item: :return: """ x = self.x a = item['actions'] h = self.predict(a) nnz_idx = np.flatnonzero(a) row = { 'cost': float(item['cost']), 'size': len(nnz_idx), 'features': [self._variable_names[j] for j in nnz_idx], 'feature_idx': nnz_idx, 'x': x[nnz_idx], 'x_new': x[nnz_idx] + a[nnz_idx], 'score_new': self.score(a), 'yhat_new': h, 'feasible': item['feasible'], 'flipped': np.not_equal(h, self.yhat), } return row
def genExp(model_trained, factual_sample, norm_type, dataset_obj): start_time = time.time() # SIMPLE HACK!! # ActionSet() construction demands that all variables have a range to them. In # the case of one-hot ordinal variables (e.g., x2_ord_0, x2_ord_1, x2_ord_2)), # the first sub-category (i.e., x2_ord_0) will always have range(1,1), failing # the requirements of ActionSet(). Therefore, we add a DUMMY ROW to the data- # frame, which is a copy of another row (so not to throw off the range of other # attributes), but setting a 0 value to all _ord_ variables. (might as well do # this for all _cat_ variables as well). tmp_df = dataset_obj.data_frame_kurz sample_row = tmp_df.loc[0].to_dict() for attr_name_kurz in dataset_obj.getOneHotAttributesNames('kurz'): sample_row[attr_name_kurz] = 0 tmp_df = tmp_df.append(pd.Series(sample_row), ignore_index=True) df = tmp_df X = df.loc[:, df.columns != 'y'] # Enforce binary, categorical (including ordinal) variables only take on 2 values custom_bounds = { attr_name_kurz: (0, 100, 'p') for attr_name_kurz in np.union1d( dataset_obj.getOneHotAttributesNames('kurz'), dataset_obj.getBinaryAttributeNames('kurz')) } action_set = ActionSet(X=X, custom_bounds=custom_bounds) # action_set['x1'].mutable = False # x1 = 'Race' # In the current implementation, we only supports any/none actionability for attr_name_kurz in dataset_obj.getInputAttributeNames('kurz'): attr_obj = dataset_obj.attributes_kurz[attr_name_kurz] if attr_obj.actionability == 'none': action_set[attr_name_kurz].mutable = False elif attr_obj.actionability == 'any': continue # do nothing else: raise ValueError( f'Actionable Recourse does not support actionability type {attr_obj.actionability}' ) # Enforce search over integer-based grid for integer-based variables for attr_name_kurz in np.union1d( dataset_obj.getIntegerBasedAttributeNames('kurz'), dataset_obj.getBinaryAttributeNames('kurz'), ): action_set[attr_name_kurz].step_type = "absolute" action_set[attr_name_kurz].step_size = 1 coefficients = model_trained.coef_[0] intercept = model_trained.intercept_[0] if norm_type == 'one_norm': mip_cost_type = 'total' elif norm_type == 'infty_norm': mip_cost_type = 'max' else: raise ValueError( f'Actionable Recourse does not support norm_type {norm_type}') factual_sample_values = list(factual_sample.values()) # p = .8 rb = RecourseBuilder( optimizer="cplex", coefficients=coefficients, intercept=intercept, # - (np.log(p / (1. - p))), action_set=action_set, x=factual_sample_values, mip_cost_type=mip_cost_type) output = rb.fit() counterfactual_sample_values = np.add(factual_sample_values, output['actions']) counterfactual_sample = dict( zip(factual_sample.keys(), counterfactual_sample_values)) # factual_sample['y'] = False # counterfactual_sample['y'] = True counterfactual_sample['y'] = not factual_sample['y'] counterfactual_plausible = True # IMPORTANT: no need to check for integer-based / binary-based plausibility, # because those were set above when we said step_type = absolute! Just round! for attr_name_kurz in np.union1d( dataset_obj.getOneHotAttributesNames('kurz'), dataset_obj.getBinaryAttributeNames('kurz')): try: assert np.isclose(counterfactual_sample[attr_name_kurz], np.round(counterfactual_sample[attr_name_kurz])) counterfactual_sample[attr_name_kurz] = np.round( counterfactual_sample[attr_name_kurz]) except: distance = -1 counterfactual_plausible = False # return counterfactual_sample, distance # Perform plausibility-data-type check! Remember, all ordinal variables # have already been converted to categorical variables. It is important now # to check that 1 (and only 1) sub-category is activated in the resulting # counterfactual sample. already_considered = [] for attr_name_kurz in dataset_obj.getOneHotAttributesNames('kurz'): if attr_name_kurz not in already_considered: siblings_kurz = dataset_obj.getSiblingsFor(attr_name_kurz) activations_for_category = [ counterfactual_sample[attr_name_kurz] for attr_name_kurz in siblings_kurz ] sum_activations_for_category = np.sum(activations_for_category) if 'cat' in dataset_obj.attributes_kurz[attr_name_kurz].attr_type: if sum_activations_for_category == 1: continue else: # print('not plausible, fixing..', end='') # TODO: don't actually return early! Instead see the actual distance, # fingers crossed that we can say that not only is their method giving # counterfactuals are larger distances, but in a lot of cases they are # not data-type plausible # INSTEAD, do below: # Convert to correct categorical/ordinal activations so we can # compute the distance using already written function. # Turns out we need to do nothing, because the distance between # [0,1,0] and anything other than itself, (e.g., [1,1,0] or [1,0,1]) # is always 1 :) # continue distance = -1 counterfactual_plausible = False # return counterfactual_sample, distance elif 'ord' in dataset_obj.attributes_kurz[ attr_name_kurz].attr_type: # TODO: assert activations are in order... # if not, repeat as above... for idx in range(int(sum_activations_for_category)): if activations_for_category[idx] != 1: # Convert to correct categorical/ordinal activations so we can # compute the distance using already written function. # Find the max index of 1 in the array, and set everything before that to 1 # print('not plausible, fixing..', end='') # max_index_of_1 = np.where(np.array(activations_for_category) == 1)[0][-1] # for idx2 in range(max_index_of_1 + 1): # counterfactual_sample[siblings_kurz[idx2]] = 1 # break distance = -1 counterfactual_plausible = False # return counterfactual_sample, distance else: raise Exception( f'{attr_name_kurz} must include either `cat` or `ord`.') already_considered.extend(siblings_kurz) # TODO: convert to correct categorical/ordinal activations so we can compute # distance = output['cost'] # TODO: this must change / be verified!??? distance = normalizedDistance.getDistanceBetweenSamples( factual_sample, counterfactual_sample, norm_type, dataset_obj) # # TODO: post-feasibible needed???? NO # # make plausible by rounding all non-numeric-real attributes to # # nearest value in range # for idx, elem in enumerate(es_instance): # attr_name_kurz = dataset_obj.getInputAttributeNames('kurz')[idx] # attr_obj = dataset_obj.attributes_kurz[attr_name_kurz] # if attr_obj.attr_type != 'numeric-real': # # round() might give a value that is NOT in plausible. # # instead find the nearest plausible value # es_instance[idx] = min( # list(range(int(attr_obj.lower_bound), int(attr_obj.upper_bound) + 1)), # key = lambda x : abs(x - es_instance[idx]) # ) end_time = time.time() return { 'factual_sample': factual_sample, 'cfe_sample': counterfactual_sample, 'cfe_found': True, # TODO? 'cfe_plausible': counterfactual_plausible, 'cfe_distance': distance, 'cfe_time': end_time - start_time, }