def _check_mlp_oce(N=1, dataset='h', ordering=True): import pandas as pd from sklearn.neural_network import MLPClassifier from utils import DatasetHelper np.random.seed(1) D = DatasetHelper(dataset=dataset, feature_prefix_index=False) X_tr, X_ts, y_tr, y_ts = D.train_test_split() mdl = MLPClassifier(hidden_layer_sizes=(100, ), max_iter=500, activation='relu', alpha=0.0001) mdl = mdl.fit(X_tr, y_tr) denied = X_ts[mdl.predict(X_ts) == 1] B, M = interaction_matrix(X_tr, interaction_type='causal') oce = MLPOrderedActionExtractor(mdl, X_tr, feature_names=D.feature_names, feature_types=D.feature_types, feature_categories=D.feature_categories, feature_constraints=D.feature_constraints, target_name=D.target_name, target_labels=D.target_labels, interaction_matrix=M) for n, x in enumerate(denied[:N]): print( '# {}-th Denied Individual ---------------------------'.format(n + 1)) oa = oce.extract(x, K=6, gamma=1.0, ordering=ordering, cost_type='uniform', ordering_cost_type='uniform', log_name='./lp/mlp') if (oa != -1): print(oa)
def _check_forest_oce(N=1, dataset='h', ordering=True): import pandas as pd from sklearn.ensemble import RandomForestClassifier from utils import DatasetHelper, interaction_matrix np.random.seed(1) D = DatasetHelper(dataset=dataset, feature_prefix_index=False) X_tr, X_ts, y_tr, y_ts = D.train_test_split() mdl = RandomForestClassifier(n_estimators=100, max_depth=8) mdl = mdl.fit(X_tr, y_tr) denied = X_ts[mdl.predict(X_ts) == 1] B, M = interaction_matrix(X_tr, interaction_type='causal') oce = ForestOrderedActionExtractor( mdl, X_tr, feature_names=D.feature_names, feature_types=D.feature_types, feature_categories=D.feature_categories, feature_constraints=D.feature_constraints, target_name=D.target_name, target_labels=D.target_labels, interaction_matrix=M) for n, x in enumerate(denied[:N]): print( '# {}-th Denied Individual ---------------------------'.format(n + 1)) oa = oce.extract(x, K=6, gamma=1.0, ordering=ordering, cost_type='uniform', ordering_cost_type='uniform', use_threshold=True, log_name='./lp/forest') if (oa != -1): print(oa)
def _check_linear_oce(N=1, dataset='h', ordering=True): import pandas as pd from sklearn.linear_model import LogisticRegression from utils import DatasetHelper np.random.seed(1) D = DatasetHelper(dataset=dataset, feature_prefix_index=False) X_tr, X_ts, y_tr, y_ts = D.train_test_split() mdl = LogisticRegression(penalty='l2', C=1.0, solver='liblinear') mdl = mdl.fit(X_tr, y_tr) denied = X_ts[mdl.predict(X_ts) == 1] B, M = interaction_matrix(X_tr, interaction_type='causal') oce = LinearOrderedActionExtractor( mdl, X_tr, feature_names=D.feature_names, feature_types=D.feature_types, feature_categories=D.feature_categories, feature_constraints=D.feature_constraints, target_name=D.target_name, target_labels=D.target_labels, interaction_matrix=M) for n, x in enumerate(denied[:N]): print( '# {}-th Denied Individual ---------------------------'.format(n + 1)) oa = oce.extract(x, K=6, gamma=0.0, ordering=ordering, intervention=False, cost_type='DACE', ordering_cost_type='standard', log_name='./lp/linear') if (oa != -1): print(oa)
def demonstration(clf='lr', n=1, costs=['TLPS'], ordering_cost='uniform', verbose=True): N = 1000 c_21 = 1 c_32 = 6 c_34 = 4 c_54 = -0.5 names = ['Education', 'JobSkill', 'Income', 'WorkPerDay', 'HealthStatus'] x_1 = np.random.randint(1, 5, N) x_2 = c_21 * x_1 + np.random.randint(-1, 1, N) x_4 = np.random.randint(2, 6, N) * 2 x_3 = c_32 * x_2 + c_34 * x_4 + np.random.randint(-2, 2, N) x_5 = c_54 * x_4 + np.random.randint(6, 13, N) X = np.array([x_1, x_2, x_3, x_4, x_5]).T _, C = interaction_matrix(X, interaction_type='causal') w_3, w_5 = 1.0 / x_3.mean(), 1.0 / x_5.mean() y = (w_3 * X[:, 2] + w_5 * X[:, 4] < 2.0).astype(int) if (verbose): print('# Synthetic Example:') print('\t- x_1: Education : 1 ~ 5') print('\t- x_2: JobSkill : 1 ~ 10') print('\t- x_3: Income : 10 ~ 100') print('\t- x_4: WorkPerDay : 4 ~ 10') print('\t- x_5: HealthStatus : 1 ~ 10') print('# Causal Relationship:') print('\t- x_1 = e_1') print('\t- x_2 = {} * x_1 + e_2'.format(c_21)) print('\t- x_3 = {} * x_2 + {} * x_4 + e_3'.format(c_32, c_34)) print('\t- x_4 = e_4') print('\t- x_5 = {} * x_4 + e_5'.format(c_54)) C = np.array([[0.0] * 5, [c_21] + [0.0] * 4, [c_21 * c_32, c_32, 0.0, c_34, 0.0], [0.0] * 5, [0.0] * 3 + [c_54, 0.0]]) print('# Interaction Matrix: \n', C) B = np.array([[0.0] * 5, [c_21] + [0.0] * 4, [0.0, c_32, 0.0, c_34, 0.0], [0.0] * 5, [0.0] * 3 + [c_54, 0.0]]) # dot = make_dot(B, labels=names) # dot.attr('graph', fontname='arial') # dot.render('demonstration_causal_dag') if (clf == 'lr'): mdl = LogisticRegression(penalty='l2', C=1.0, fit_intercept=True, solver='liblinear', max_iter=10000) mdl = mdl.fit(X, y) # print('# Model Coef.: \n', mdl.coef_, (mdl.intercept_)) oce = LinearOrderedActionExtractor( mdl, X, feature_names=names, feature_types=['I', 'I', 'I', 'I', 'I'], feature_constraints=['INC'] * 2 + [''] * 3, target_name='Loan', target_labels=['Accept', 'Reject'], interaction_matrix=C) print('# Classifier: LogisticRegression') elif (clf == 'mlp'): mdl = MLPClassifier(hidden_layer_sizes=(30, ), max_iter=500, activation='relu', alpha=0.0001) mdl = mdl.fit(X, y) oce = MLPOrderedActionExtractor( mdl, X, feature_names=names, feature_types=['I', 'I', 'I', 'I', 'I'], feature_constraints=['INC'] * 2 + [''] * 3, target_name='Loan', target_labels=['Accept', 'Reject'], interaction_matrix=C) print('# Classifier: MultiLayerPerceptron') elif (clf == 'rf'): mdl = RandomForestClassifier(n_estimators=30, max_depth=6) mdl = mdl.fit(X, y) oce = ForestOrderedActionExtractor( mdl, X, feature_names=names, feature_types=['I', 'I', 'I', 'I', 'I'], feature_constraints=['INC'] * 2 + [''] * 3, target_name='Loan', target_labels=['Accept', 'Reject'], interaction_matrix=C) print('# Classifier: RandomForest') print() denied_individual = X[mdl.predict(X) == 1] for i, x in enumerate(denied_individual[:n]): print('# {}-th Denied Individual: '.format(i + 1)) print('\t- x_1: Education : {}'.format(x[0])) print('\t- x_2: JobSkill : {}'.format(x[1])) print('\t- x_3: Income : {}'.format(x[2])) print('\t- x_4: WorkPerDay : {}'.format(x[3])) print('\t- x_5: HealthStatus : {}'.format(x[4])) for cost in costs: print('## {} (non-order):'.format(cost)) oa = oce.extract(x, K=5, ordering=False, post_ordering=False, cost_type=cost) if (oa != -1): print(oa) print('## {} + C_ord: '.format(cost)) for g in [1.0]: oa = oce.extract(x, K=5, gamma=g, ordering=True, cost_type=cost, ordering_cost_type=ordering_cost) if (oa != -1): print(oa) print() print('---')
def extract(self, x, W=[], K=5, gamma=1.0, ordering=True, post_ordering=False, post_ordering_mode='greedy', intervention=False, cost_type='uniform', ordering_cost_type='uniform', use_threshold=True, solver='cplex', time_limit=300, log_stream=False, mdl_name='', log_name='', init_sols={}, verbose=False): self.x_ = x self.y_ = self.mdl_.predict(x.reshape(1, -1))[0] self.W_ = W if len(W) != 0 else list(range(self.D_)) self.K_ = min(K, self.D_) if (gamma == 0.0): ordering = False if (ordering == False): gamma = 0.0 self.gamma_ = gamma cost_type = 'normalize' if intervention else cost_type self.A_, self.C_, self.I_ = self.AC_.generateActions( x, cost_type=cost_type, use_threshold=use_threshold) self.lb_ = [np.min(A_d) for A_d in self.A_] self.ub_ = [np.max(A_d) for A_d in self.A_] C = self.M_ prob = pulp.LpProblem(mdl_name) # variables act = [ pulp.LpVariable('act_{}'.format(d), cat='Continuous', lowBound=self.lb_[d], upBound=self.ub_[d]) for d in range(self.D_) ] pi = [[ pulp.LpVariable('pi_{}_{}'.format(d, i), cat='Binary') for i in range(len(self.A_[d])) ] for d in range(self.D_)] xi = [ pulp.LpVariable('xi_{}'.format(t), cat='Continuous', lowBound=0, upBound=1) for t in range(self.T_) ] phi = [[ pulp.LpVariable('phi_{}_{}'.format(t, l), cat='Binary') for l in range(self.L_[t]) ] for t in range(self.T_)] if (ordering): LB, UB = self.getOrderingBounds(K, C) sigma = [[ pulp.LpVariable('sig_{}_{}'.format(k, d), cat='Binary') for d in range(self.D_) ] for k in range(self.K_)] pik = [[[ pulp.LpVariable('pik_{}_{}_{}'.format(k, d, i), cat='Binary') for i in range(len(self.A_[d])) ] for d in range(self.D_)] for k in range(self.K_)] epsilon = [[ pulp.LpVariable('ips_{}_{}'.format(k, d), cat='Continuous') for d in range(self.D_) ] for k in range(self.K_)] delta = [[ pulp.LpVariable('dlt_{}_{}'.format(k, d), cat='Continuous') for d in range(self.D_) ] for k in range(self.K_)] zeta = [ pulp.LpVariable('zta_{}'.format(k), cat='Continuous', lowBound=0) for k in range(self.K_) ] if (cost_type == 'SCM' or cost_type == 'DACE'): dist = [ pulp.LpVariable('dist_{}'.format(d), cat='Continuous', lowBound=0) for d in range(self.D_) ] # set initial values {val: [val_1, val_2, ...], ...} if (len(init_sols) != 0): for val, sols in init_sols.items(): if (val == 'act'): for d, v in enumerate(sols): act[d].setInitialValue(v) elif (val == 'pi'): for d, vs in enumerate(sols): for i, v in enumerate(vs): pi[d][i].setInitialValue(v) elif (val == 'xi'): for d, v in enumerate(sols): xi[d].setInitialValue(v) elif (val == 'phi'): for t, vs in enumerate(sols): for l, v in enumerate(vs): phi[t][l].setInitialValue(v) elif (val == 'sigma'): for l, d in enumerate(sols): for d_ in range(self.D_): v = 1 if d_ == d else 0 sigma[l][d].setInitialValue(v) for k in range(l + 1, self.K_): for d_ in range(self.D_): sigma[k][d].setInitialValue(0) # objective function if (ordering): if (cost_type == 'NONE'): prob += pulp.lpDot([self.gamma_] * self.K_, zeta) prob += pulp.lpDot([self.gamma_] * self.K_, zeta) >= 0 elif (cost_type == 'SCM' or cost_type == 'DACE'): prob += pulp.lpSum(dist) + pulp.lpDot([self.gamma_] * self.K_, zeta) prob += pulp.lpSum(dist) + pulp.lpDot([self.gamma_] * self.K_, zeta) >= 0 for d in range(self.D_): prob += dist[d] - pulp.lpDot(flatten(self.C_[d]), flatten(pi)) >= 0 prob += dist[d] + pulp.lpDot(flatten(self.C_[d]), flatten(pi)) >= 0 else: prob += pulp.lpDot(flatten(self.C_), flatten(pi)) + pulp.lpDot( [self.gamma_] * self.K_, zeta) prob += pulp.lpDot(flatten(self.C_), flatten(pi)) + pulp.lpDot( [self.gamma_] * self.K_, zeta) >= 0 else: if (cost_type == 'SCM' or cost_type == 'DACE'): prob += pulp.lpSum(dist) prob += pulp.lpSum(dist) >= 0 for d in range(self.D_): prob += dist[d] - pulp.lpDot(flatten(self.C_[d]), flatten(pi)) >= 0 prob += dist[d] + pulp.lpDot(flatten(self.C_[d]), flatten(pi)) >= 0 else: prob += pulp.lpDot(flatten(self.C_), flatten(pi)) prob += pulp.lpDot(flatten(self.C_), flatten(pi)) >= 0 # constraint: sum_{i} pi_{d,i} == 1 for d in range(self.D_): prob += pulp.lpSum(pi[d]) == 1 # constraint: sum_{d} pi_{d,i_d} >= D - K prob += pulp.lpSum( [pi[d][list(self.A_[d]).index(0)] for d in range(self.D_)]) >= self.D_ - self.K_ # constraint: sum_{d in G} a_{d,1} pi_{d,1} = 0 for G in self.feature_categories_: prob += pulp.lpDot( [self.A_[d][0] for d in G if len(self.A_[d]) != 0], [pi[d][0] for d in G if len(self.A_[d]) != 0]) == 0 # constraint: sum_{d} w_d xi_d + b >= 0 if (self.y_ == 0): prob += pulp.lpDot(self.coef_, xi) >= -self.intercept_ + 1e-8 else: prob += pulp.lpDot(self.coef_, xi) <= -self.intercept_ - 1e-8 # constraint: a_d = sum_{i} a_{d,i} pi_{d,i} if (intervention): _, B_ = interaction_matrix(self.X_, interaction_type='causal') # B_, _ = interaction_matrix(self.X_, interaction_type='causal') B = B_ + np.eye(self.D_) for d in range(self.D_): A_d = [[B[d][d_] * a for a in self.A_[d_]] for d_ in range(self.D_)] prob += act[d] - pulp.lpDot(flatten(A_d), flatten(pi)) == 0 else: for d in range(self.D_): prob += act[d] - pulp.lpDot(self.A_[d], pi[d]) == 0 # constraints (Tree Ensemble): for t in range(self.T_): # constraint: sum_{l} phi_{t,l} = 1 prob += pulp.lpSum(phi[t]) == 1 # constraint: xi_t = sum_{l} h_{t,l} phi_{t,l} prob += xi[t] - pulp.lpDot(self.H_[t], phi[t]) == 0 # constraint: D * phi_{t,l} <= sum_{d} sum_{i in I_{t,l,d}} pi_{d,i} for l in range(self.L_[t]): anc = self.AC_.ancestors_[t][l] prob += len(anc) * phi[t][l] - pulp.lpDot( flatten([self.I_[t][l][d] for d in anc]), flatten([pi[d] for d in anc])) <= 0 if (ordering): I_act = [(abs(a) > self.tol_).astype(int) for a in self.A_] # constraint: sigma_{k,d} = sum_{i} pi^(k)_{d,i} for k in range(self.K_): for d in range(self.D_): if (d in self.feature_categories_flatten_ and self.x_[d] == 1): prob += sigma[k][d] == 0 else: prob += pulp.lpDot(I_act[d], pik[k][d]) - sigma[k][d] == 0 # constraint: pis_{d,i} = sum_{k} pi^(k)_{d,i} for d in range(self.D_): for i in range(len(self.A_[d])): prob += pulp.lpSum([pik[k][d][i] for k in range(self.K_) ]) - pi[d][i] == 0 # constraint: sum_{d in G} a_{d,1} pi^(k)_{d,1} = 0 for G in self.feature_categories_: prob += pulp.lpDot( [self.A_[d][0] for d in G if len(self.A_[d]) != 0], [pik[k][d][0] for d in G if len(self.A_[d]) != 0]) == 0 # constraint: sum_{k} sigma_{k,d} <= 1 for d in range(self.D_): prob += pulp.lpSum([sigma[k][d] for k in range(self.K_)]) <= 1 # constraint: sum_{d} sigma_{k,d} <= 1 for k in range(self.K_): prob += pulp.lpSum(sigma[k]) <= 1 # constraint: sum_{d} sigma_{k,d} >= sum_{d} sigma_{k+1,d} for k in range(self.K_ - 1): prob += pulp.lpSum(sigma[k]) - pulp.lpSum(sigma[k + 1]) >= 0 # constraint: delta_{0,d} = 0 for d in range(self.D_): prob += delta[0][d] == 0 # constraint: delta_{k,d} = sum_{l}^{k-1} sum_{d'} C_{d,d'} epsilon_{l,d'} = sum_{d'} C_{d,d'} epsilon_{k-1,d'} + delta_{k-1,d} for k in range(1, self.K_): for d in range(self.D_): # prob += delta[k][d] - pulp.lpDot(list(C[d])*(k), flatten(epsilon[:k+1])) == 0 prob += delta[k][d] - delta[k - 1][d] - pulp.lpDot( C[d], epsilon[k - 1]) == 0 # constraint: epsilon_{k,d} >= sum_{i} a_{d,i} pi^(k)_{d,i} - delta_{k,d} - U_d (1 - sigma_{k,d}) # constraint: epsilon_{k,d} <= sum_{i} a_{d,i} pi^(k)_{d,i} - delta_{k,d} - L_d (1 - sigma_{k,d}) # constraint: epsilon_{k,d} >= L_d sigma_{k,d} # constraint: epsilon_{k,d} <= U_d sigma_{k,d} for k in range(self.K_): for d in range(self.D_): prob += epsilon[k][d] - pulp.lpDot(self.A_[d], pik[k][ d]) + delta[k][d] - UB[k][d] * sigma[k][d] >= -UB[k][d] prob += epsilon[k][d] - pulp.lpDot(self.A_[d], pik[k][ d]) + delta[k][d] - LB[k][d] * sigma[k][d] <= -LB[k][d] prob += epsilon[k][d] - LB[k][d] * sigma[k][d] >= 0 prob += epsilon[k][d] - UB[k][d] * sigma[k][d] <= 0 # constraint: zeta_k >= sum_{d} w_d * epsilon_{k,d} # constraint: zeta_k >= - sum_{d} w_d * epsilon_{k,d} weights = self.AC_.getFeatureWeight(cost_type=ordering_cost_type) for k in range(self.K_): prob += zeta[k] - pulp.lpDot(weights, epsilon[k]) >= 0 prob += zeta[k] + pulp.lpDot(weights, epsilon[k]) >= 0 if (len(log_name) != 0): prob.writeLP(log_name + '.lp') s = time.perf_counter() prob.solve(solver=pulp.CPLEX_PY(msg=log_stream, warm_start=(len(init_sols) != 0), timeLimit=time_limit, options=['set output clonelog -1'])) t = time.perf_counter() - s if (prob.status != 1): # prob.solve(solver=pulp.CPLEX_PY(msg=True)) return -1 obj = prob.objective.value() if (cost_type == 'SCM' or cost_type == 'DACE'): c_ordinal = np.sum([d.value() for d in dist]) else: c_ordinal = np.sum([ c * round(p.value()) for c, p in zip(flatten(self.C_), flatten(pi)) ]) c_ordering = np.sum([z.value() for z in zeta]) if ordering else 0.0 a = np.array([ np.sum([ self.A_[d][i] * round(pi[d][i].value()) for i in range(len(self.A_[d])) ]) for d in range(self.D_) ]) if (intervention): a_actual = np.array([a_.value() for a_ in act]) sig = np.array([[round(s.value()) for s in sigma[k]] for k in range(self.K_)]) if ordering else [] ret = OrderedAction( x, a, sig, gamma=gamma, time=t, obj=obj, c_ordinal=c_ordinal, c_ordering=c_ordering, target_name=self.target_name_, target_labels=self.target_labels_, label_before=int(self.y_), label_after=int(1 - self.y_), feature_names=self.feature_names_, feature_types=self.feature_types_, feature_categories=self.feature_categories_, interaction_matrix=self.M_, post_ordering=post_ordering, weights=self.AC_.getFeatureWeight(cost_type=ordering_cost_type), post_ordering_mode=post_ordering_mode) # save initial values self.init_sols_ = {} self.init_sols_['act'] = [a_ for a_ in a] self.init_sols_['pi'] = [] for pi_d in pi: self.init_sols_['pi'].append([round(p.value()) for p in pi_d]) self.init_sols_['xi'] = [np.clip(x.value(), 0, 1) for x in xi] self.init_sols_['phi'] = [] for phi_t in phi: self.init_sols_['phi'].append([round(p.value()) for p in phi_t]) if (ret.ordered_): self.init_sols_['sigma'] = ret.order_ return ret
def exp_real_sens(dataset='h', n=10, verbose=False, time_limit=300, costs=['TLPS', 'MAD', 'DACE', 'SCM']): from utils import DatasetHelper D = DatasetHelper(dataset=dataset, feature_prefix_index=False) X_tr, X_ts, y_tr, y_ts = D.train_test_split() B, M = interaction_matrix(X_tr, interaction_type='causal') mdl = LogisticRegression(penalty='l2', C=1.0, fit_intercept=True, solver='liblinear', max_iter=10000) mdl = mdl.fit(X_tr, y_tr) oce = LinearOrderedActionExtractor( mdl, X_tr, feature_names=D.feature_names, feature_types=D.feature_types, feature_categories=D.feature_categories, feature_constraints=D.feature_constraints, target_name=D.target_name, target_labels=D.target_labels, interaction_matrix=M) denied_individual = X_ts[mdl.predict(X_ts) == 1] gammas = [10**i for i in range(-3, 3)] res_dict = {} res_dict_ord = {} for key in costs: res_dict[key] = [] res_dict_ord[key] = [] for c in costs: for g in gammas: key = c + '_ORDER_{}'.format(g) res_dict[key] = [] res_dict_ord[key] = [] for i, x in enumerate(denied_individual[:n]): print('# {}-th Denied Individual:'.format(i + 1)) for cost in costs: print('## {}: '.format(cost)) oa = oce.extract(x, K=4, ordering=False, post_ordering=True, post_ordering_mode='greedy', cost_type=cost, ordering_cost_type='standard', time_limit=time_limit) if (oa != -1): print(oa) res_dict[cost].append(oa.c_ordinal_) res_dict_ord[cost].append(oa.c_ordering_) else: res_dict[cost].append(-1) res_dict_ord[cost].append(-1) print('## {} + C_order: '.format(cost)) for gamma in gammas: oa = oce.extract(x, K=4, gamma=gamma, ordering=True, cost_type=cost, ordering_cost_type='standard', time_limit=time_limit) if (oa != -1): print(oa) res_dict[cost + '_ORDER_{}'.format(gamma)].append( oa.c_ordinal_) res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append( oa.c_ordering_) else: res_dict[cost + '_ORDER_{}'.format(gamma)].append(-1) res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append(-1) if (verbose): print('---') print('# Results') print('+ ', res_dict) print('+ ', res_dict_ord) print('---') if (verbose == False): import pandas as pd res_dist = pd.DataFrame(res_dict) res_dist.to_csv('./res/{}_res_dist_sens.csv'.format(D.dataset_name), index=False) res_ord = pd.DataFrame(res_dict_ord) res_ord.to_csv('./res/{}_res_ord_sens.csv'.format(D.dataset_name), index=False)
def exp_synthetic(n=10, verbose=False): N = 1000 c_21 = 1 c_32 = 6 c_34 = 4 c_54 = -0.5 names = [ 'Education', 'JobSkill', 'Income(K)', 'WorkPerDay', 'HealthStatus' ] x_1 = np.random.randint(1, 5, N) x_2 = c_21 * x_1 + np.random.randint(-1, 1, N) x_4 = np.random.randint(2, 6, N) * 2 x_3 = c_32 * x_2 + c_34 * x_4 + np.random.randint(-2, 2, N) x_5 = c_54 * x_4 + np.random.randint(6, 13, N) X = np.array([x_1, x_2, x_3, x_4, x_5]).T _, C = interaction_matrix(X, interaction_type='causal') w_3, w_5 = 1.0 / x_3.mean(), 1.0 / x_5.mean() y = (w_3 * X[:, 2] + w_5 * X[:, 4] < 2.0).astype(int) mdl = LogisticRegression(penalty='l2', C=1.0, fit_intercept=True, solver='liblinear', max_iter=10000) mdl = mdl.fit(X, y) print('# Model Coef.: \n', mdl.coef_, (mdl.intercept_)) oce = LinearOrderedActionExtractor(mdl, X, feature_names=names, feature_types=['I', 'I', 'I', 'I', 'I'], feature_constraints=['INC'] * 2 + [''] * 3, target_name='Loan', target_labels=['Accept', 'Reject'], interaction_matrix=C) denied_individual = X[mdl.predict(X) == 1] costs = ['TLPS', 'MAD', 'DACE', 'SCM'] gammas = [1.0] if verbose else [0.1 + i * 0.1 for i in range(20)] res_dict = {} res_dict_ord = {} res_dict_time = {} for key in costs: res_dict[key] = [] res_dict_ord[key] = [] res_dict_time[key] = [] for c in costs: for g in gammas: key = c + '_ORDER_{}'.format(g) res_dict[key] = [] res_dict_ord[key] = [] res_dict_time[key] = [] for i, x in enumerate(denied_individual[:n]): print('# {}-th Denied Individual: '.format(i + 1), x) for cost in costs: print('## {}: '.format(cost)) oa = oce.extract(x, K=5, ordering=False, post_ordering=True, post_ordering_mode='greedy', cost_type=cost, ordering_cost_type='standard') if (oa != -1): print(oa) res_dict[cost].append(oa.c_ordinal_) res_dict_ord[cost].append(oa.c_ordering_) res_dict_time[cost].append(oa.time_) if (verbose): print('## {} + C_order: '.format(cost)) for gamma in gammas: oa = oce.extract(x, K=5, gamma=gamma, ordering=True, cost_type=cost, ordering_cost_type='standard') if (oa != -1): print(oa) res_dict[cost + '_ORDER_{}'.format(gamma)].append( oa.c_ordinal_) res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append( oa.c_ordering_) res_dict_time[cost + '_ORDER_{}'.format(gamma)].append( oa.time_) print('---') if (verbose == False): import pandas as pd res_dist = pd.DataFrame(res_dict) res_dist.to_csv('./res/synthetic_res_dist_lr.csv', index=False) res_ord = pd.DataFrame(res_dict_ord) res_ord.to_csv('./res/synthetic_res_ord_lr.csv', index=False) res_time = pd.DataFrame(res_dict_time) res_time.to_csv('./res/synthetic_res_time_lr.csv', index=False)
def exp_real(clf='lr', dataset='h', n=10, verbose=False, costs=['TLPS', 'MAD', 'DACE', 'SCM'], suf='', tol=1e-6): from utils import DatasetHelper D = DatasetHelper(dataset=dataset, feature_prefix_index=False) X_tr, X_ts, y_tr, y_ts = D.train_test_split() B, M = interaction_matrix(X_tr, interaction_type='causal') if (clf == 'lr'): mdl = LogisticRegression(penalty='l2', C=1.0, fit_intercept=True, solver='liblinear', max_iter=10000) mdl = mdl.fit(X_tr, y_tr) # print('# Model Coef.: \n', mdl.coef_, (mdl.intercept_)) oce = LinearOrderedActionExtractor( mdl, X_tr, feature_names=D.feature_names, feature_types=D.feature_types, feature_categories=D.feature_categories, feature_constraints=D.feature_constraints, target_name=D.target_name, target_labels=D.target_labels, interaction_matrix=M) elif (clf == 'mlp'): mdl = MLPClassifier(hidden_layer_sizes=(200, ), max_iter=500, activation='relu', alpha=0.0001) mdl = mdl.fit(X_tr, y_tr) oce = MLPOrderedActionExtractor( mdl, X_tr, feature_names=D.feature_names, feature_types=D.feature_types, feature_categories=D.feature_categories, feature_constraints=D.feature_constraints, target_name=D.target_name, target_labels=D.target_labels, interaction_matrix=M, tol=tol) elif (clf == 'rf'): h = 6 if dataset == 'g' else 4 mdl = RandomForestClassifier(n_estimators=100, max_depth=h) mdl = mdl.fit(X_tr, y_tr) oce = ForestOrderedActionExtractor( mdl, X_tr, feature_names=D.feature_names, feature_types=D.feature_types, feature_categories=D.feature_categories, feature_constraints=D.feature_constraints, target_name=D.target_name, target_labels=D.target_labels, interaction_matrix=M) denied_individual = X_ts[mdl.predict(X_ts) == 1] gammas = [1.0] res_dict = {} res_dict_ord = {} res_dict_time = {} for key in costs: res_dict[key] = [] res_dict_ord[key] = [] res_dict_time[key] = [] for c in costs: for g in gammas: key = c + '_ORDER_{}'.format(g) res_dict[key] = [] res_dict_ord[key] = [] res_dict_time[key] = [] for i, x in enumerate(denied_individual[:n]): print('# {}-th Denied Individual:'.format(i + 1)) for cost in costs: print('## {}: '.format(cost)) oa = oce.extract(x, K=4, ordering=False, post_ordering=True, post_ordering_mode='greedy', cost_type=cost, ordering_cost_type='standard', time_limit=300, log_stream=False) if (oa != -1): print(oa) res_dict[cost].append(oa.c_ordinal_) res_dict_ord[cost].append(oa.c_ordering_) res_dict_time[cost].append(oa.time_) else: res_dict[cost].append(-1) res_dict_ord[cost].append(-1) res_dict_time[cost].append(-1) print('## {} + C_order: '.format(cost)) for gamma in gammas: oa = oce.extract(x, K=4, gamma=gamma, ordering=True, cost_type=cost, ordering_cost_type='standard', time_limit=300, log_stream=False) if (oa != -1): print(oa) res_dict[cost + '_ORDER_{}'.format(gamma)].append( oa.c_ordinal_) res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append( oa.c_ordering_) res_dict_time[cost + '_ORDER_{}'.format(gamma)].append( oa.time_) else: res_dict[cost + '_ORDER_{}'.format(gamma)].append(-1) res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append(-1) res_dict_time[cost + '_ORDER_{}'.format(gamma)].append(-1) if (verbose): print('---') print('# Results') print('+ ', res_dict) print('+ ', res_dict_ord) print('+ ', res_dict_time) print('---') if (verbose == False): import pandas as pd res_dist = pd.DataFrame(res_dict) res_dist.to_csv('./res/{}_res_dist_{}_{}.csv'.format( D.dataset_name, clf, suf), index=False) res_ord = pd.DataFrame(res_dict_ord) res_ord.to_csv('./res/{}_res_ord_{}_{}.csv'.format( D.dataset_name, clf, suf), index=False) res_time = pd.DataFrame(res_dict_time) res_time.to_csv('./res/{}_res_time_{}_{}.csv'.format( D.dataset_name, clf, suf), index=False)