Пример #1
0
def _check_mlp_oce(N=1, dataset='h', ordering=True):
    import pandas as pd
    from sklearn.neural_network import MLPClassifier
    from utils import DatasetHelper
    np.random.seed(1)

    D = DatasetHelper(dataset=dataset, feature_prefix_index=False)
    X_tr, X_ts, y_tr, y_ts = D.train_test_split()
    mdl = MLPClassifier(hidden_layer_sizes=(100, ),
                        max_iter=500,
                        activation='relu',
                        alpha=0.0001)
    mdl = mdl.fit(X_tr, y_tr)
    denied = X_ts[mdl.predict(X_ts) == 1]
    B, M = interaction_matrix(X_tr, interaction_type='causal')
    oce = MLPOrderedActionExtractor(mdl,
                                    X_tr,
                                    feature_names=D.feature_names,
                                    feature_types=D.feature_types,
                                    feature_categories=D.feature_categories,
                                    feature_constraints=D.feature_constraints,
                                    target_name=D.target_name,
                                    target_labels=D.target_labels,
                                    interaction_matrix=M)

    for n, x in enumerate(denied[:N]):
        print(
            '# {}-th Denied Individual ---------------------------'.format(n +
                                                                           1))
        oa = oce.extract(x,
                         K=6,
                         gamma=1.0,
                         ordering=ordering,
                         cost_type='uniform',
                         ordering_cost_type='uniform',
                         log_name='./lp/mlp')
        if (oa != -1): print(oa)
Пример #2
0
def _check_forest_oce(N=1, dataset='h', ordering=True):
    import pandas as pd
    from sklearn.ensemble import RandomForestClassifier
    from utils import DatasetHelper, interaction_matrix
    np.random.seed(1)

    D = DatasetHelper(dataset=dataset, feature_prefix_index=False)
    X_tr, X_ts, y_tr, y_ts = D.train_test_split()
    mdl = RandomForestClassifier(n_estimators=100, max_depth=8)
    mdl = mdl.fit(X_tr, y_tr)
    denied = X_ts[mdl.predict(X_ts) == 1]
    B, M = interaction_matrix(X_tr, interaction_type='causal')
    oce = ForestOrderedActionExtractor(
        mdl,
        X_tr,
        feature_names=D.feature_names,
        feature_types=D.feature_types,
        feature_categories=D.feature_categories,
        feature_constraints=D.feature_constraints,
        target_name=D.target_name,
        target_labels=D.target_labels,
        interaction_matrix=M)

    for n, x in enumerate(denied[:N]):
        print(
            '# {}-th Denied Individual ---------------------------'.format(n +
                                                                           1))
        oa = oce.extract(x,
                         K=6,
                         gamma=1.0,
                         ordering=ordering,
                         cost_type='uniform',
                         ordering_cost_type='uniform',
                         use_threshold=True,
                         log_name='./lp/forest')
        if (oa != -1): print(oa)
Пример #3
0
def _check_linear_oce(N=1, dataset='h', ordering=True):
    import pandas as pd
    from sklearn.linear_model import LogisticRegression
    from utils import DatasetHelper
    np.random.seed(1)

    D = DatasetHelper(dataset=dataset, feature_prefix_index=False)
    X_tr, X_ts, y_tr, y_ts = D.train_test_split()
    mdl = LogisticRegression(penalty='l2', C=1.0, solver='liblinear')
    mdl = mdl.fit(X_tr, y_tr)
    denied = X_ts[mdl.predict(X_ts) == 1]
    B, M = interaction_matrix(X_tr, interaction_type='causal')
    oce = LinearOrderedActionExtractor(
        mdl,
        X_tr,
        feature_names=D.feature_names,
        feature_types=D.feature_types,
        feature_categories=D.feature_categories,
        feature_constraints=D.feature_constraints,
        target_name=D.target_name,
        target_labels=D.target_labels,
        interaction_matrix=M)

    for n, x in enumerate(denied[:N]):
        print(
            '# {}-th Denied Individual ---------------------------'.format(n +
                                                                           1))
        oa = oce.extract(x,
                         K=6,
                         gamma=0.0,
                         ordering=ordering,
                         intervention=False,
                         cost_type='DACE',
                         ordering_cost_type='standard',
                         log_name='./lp/linear')
        if (oa != -1): print(oa)
Пример #4
0
def demonstration(clf='lr',
                  n=1,
                  costs=['TLPS'],
                  ordering_cost='uniform',
                  verbose=True):
    N = 1000
    c_21 = 1
    c_32 = 6
    c_34 = 4
    c_54 = -0.5
    names = ['Education', 'JobSkill', 'Income', 'WorkPerDay', 'HealthStatus']
    x_1 = np.random.randint(1, 5, N)
    x_2 = c_21 * x_1 + np.random.randint(-1, 1, N)
    x_4 = np.random.randint(2, 6, N) * 2
    x_3 = c_32 * x_2 + c_34 * x_4 + np.random.randint(-2, 2, N)
    x_5 = c_54 * x_4 + np.random.randint(6, 13, N)
    X = np.array([x_1, x_2, x_3, x_4, x_5]).T
    _, C = interaction_matrix(X, interaction_type='causal')
    w_3, w_5 = 1.0 / x_3.mean(), 1.0 / x_5.mean()
    y = (w_3 * X[:, 2] + w_5 * X[:, 4] < 2.0).astype(int)

    if (verbose):
        print('# Synthetic Example:')
        print('\t- x_1: Education       : 1 ~ 5')
        print('\t- x_2: JobSkill        : 1 ~ 10')
        print('\t- x_3: Income          : 10 ~ 100')
        print('\t- x_4: WorkPerDay       : 4 ~ 10')
        print('\t- x_5: HealthStatus : 1 ~ 10')
        print('# Causal Relationship:')
        print('\t- x_1 = e_1')
        print('\t- x_2 = {} * x_1 + e_2'.format(c_21))
        print('\t- x_3 = {} * x_2 + {} * x_4 + e_3'.format(c_32, c_34))
        print('\t- x_4 = e_4')
        print('\t- x_5 = {} * x_4 + e_5'.format(c_54))
        C = np.array([[0.0] * 5, [c_21] + [0.0] * 4,
                      [c_21 * c_32, c_32, 0.0, c_34, 0.0], [0.0] * 5,
                      [0.0] * 3 + [c_54, 0.0]])
        print('# Interaction Matrix: \n', C)
        B = np.array([[0.0] * 5, [c_21] + [0.0] * 4,
                      [0.0, c_32, 0.0, c_34,
                       0.0], [0.0] * 5, [0.0] * 3 + [c_54, 0.0]])
        # dot = make_dot(B, labels=names)
        # dot.attr('graph', fontname='arial')
        # dot.render('demonstration_causal_dag')

    if (clf == 'lr'):
        mdl = LogisticRegression(penalty='l2',
                                 C=1.0,
                                 fit_intercept=True,
                                 solver='liblinear',
                                 max_iter=10000)
        mdl = mdl.fit(X, y)
        # print('# Model Coef.: \n', mdl.coef_, (mdl.intercept_))
        oce = LinearOrderedActionExtractor(
            mdl,
            X,
            feature_names=names,
            feature_types=['I', 'I', 'I', 'I', 'I'],
            feature_constraints=['INC'] * 2 + [''] * 3,
            target_name='Loan',
            target_labels=['Accept', 'Reject'],
            interaction_matrix=C)
        print('# Classifier: LogisticRegression')
    elif (clf == 'mlp'):
        mdl = MLPClassifier(hidden_layer_sizes=(30, ),
                            max_iter=500,
                            activation='relu',
                            alpha=0.0001)
        mdl = mdl.fit(X, y)
        oce = MLPOrderedActionExtractor(
            mdl,
            X,
            feature_names=names,
            feature_types=['I', 'I', 'I', 'I', 'I'],
            feature_constraints=['INC'] * 2 + [''] * 3,
            target_name='Loan',
            target_labels=['Accept', 'Reject'],
            interaction_matrix=C)
        print('# Classifier: MultiLayerPerceptron')
    elif (clf == 'rf'):
        mdl = RandomForestClassifier(n_estimators=30, max_depth=6)
        mdl = mdl.fit(X, y)
        oce = ForestOrderedActionExtractor(
            mdl,
            X,
            feature_names=names,
            feature_types=['I', 'I', 'I', 'I', 'I'],
            feature_constraints=['INC'] * 2 + [''] * 3,
            target_name='Loan',
            target_labels=['Accept', 'Reject'],
            interaction_matrix=C)
        print('# Classifier: RandomForest')
    print()

    denied_individual = X[mdl.predict(X) == 1]
    for i, x in enumerate(denied_individual[:n]):
        print('# {}-th Denied Individual: '.format(i + 1))
        print('\t- x_1: Education       : {}'.format(x[0]))
        print('\t- x_2: JobSkill        : {}'.format(x[1]))
        print('\t- x_3: Income          : {}'.format(x[2]))
        print('\t- x_4: WorkPerDay      : {}'.format(x[3]))
        print('\t- x_5: HealthStatus    : {}'.format(x[4]))
        for cost in costs:
            print('## {} (non-order):'.format(cost))
            oa = oce.extract(x,
                             K=5,
                             ordering=False,
                             post_ordering=False,
                             cost_type=cost)
            if (oa != -1): print(oa)
            print('## {} + C_ord: '.format(cost))
            for g in [1.0]:
                oa = oce.extract(x,
                                 K=5,
                                 gamma=g,
                                 ordering=True,
                                 cost_type=cost,
                                 ordering_cost_type=ordering_cost)
                if (oa != -1): print(oa)
        print()
    print('---')
Пример #5
0
    def extract(self,
                x,
                W=[],
                K=5,
                gamma=1.0,
                ordering=True,
                post_ordering=False,
                post_ordering_mode='greedy',
                intervention=False,
                cost_type='uniform',
                ordering_cost_type='uniform',
                use_threshold=True,
                solver='cplex',
                time_limit=300,
                log_stream=False,
                mdl_name='',
                log_name='',
                init_sols={},
                verbose=False):
        self.x_ = x
        self.y_ = self.mdl_.predict(x.reshape(1, -1))[0]

        self.W_ = W if len(W) != 0 else list(range(self.D_))
        self.K_ = min(K, self.D_)
        if (gamma == 0.0): ordering = False
        if (ordering == False): gamma = 0.0
        self.gamma_ = gamma

        cost_type = 'normalize' if intervention else cost_type
        self.A_, self.C_, self.I_ = self.AC_.generateActions(
            x, cost_type=cost_type, use_threshold=use_threshold)
        self.lb_ = [np.min(A_d) for A_d in self.A_]
        self.ub_ = [np.max(A_d) for A_d in self.A_]
        C = self.M_
        prob = pulp.LpProblem(mdl_name)

        # variables
        act = [
            pulp.LpVariable('act_{}'.format(d),
                            cat='Continuous',
                            lowBound=self.lb_[d],
                            upBound=self.ub_[d]) for d in range(self.D_)
        ]
        pi = [[
            pulp.LpVariable('pi_{}_{}'.format(d, i), cat='Binary')
            for i in range(len(self.A_[d]))
        ] for d in range(self.D_)]
        xi = [
            pulp.LpVariable('xi_{}'.format(t),
                            cat='Continuous',
                            lowBound=0,
                            upBound=1) for t in range(self.T_)
        ]
        phi = [[
            pulp.LpVariable('phi_{}_{}'.format(t, l), cat='Binary')
            for l in range(self.L_[t])
        ] for t in range(self.T_)]
        if (ordering):
            LB, UB = self.getOrderingBounds(K, C)
            sigma = [[
                pulp.LpVariable('sig_{}_{}'.format(k, d), cat='Binary')
                for d in range(self.D_)
            ] for k in range(self.K_)]
            pik = [[[
                pulp.LpVariable('pik_{}_{}_{}'.format(k, d, i), cat='Binary')
                for i in range(len(self.A_[d]))
            ] for d in range(self.D_)] for k in range(self.K_)]
            epsilon = [[
                pulp.LpVariable('ips_{}_{}'.format(k, d), cat='Continuous')
                for d in range(self.D_)
            ] for k in range(self.K_)]
            delta = [[
                pulp.LpVariable('dlt_{}_{}'.format(k, d), cat='Continuous')
                for d in range(self.D_)
            ] for k in range(self.K_)]
            zeta = [
                pulp.LpVariable('zta_{}'.format(k),
                                cat='Continuous',
                                lowBound=0) for k in range(self.K_)
            ]
        if (cost_type == 'SCM' or cost_type == 'DACE'):
            dist = [
                pulp.LpVariable('dist_{}'.format(d),
                                cat='Continuous',
                                lowBound=0) for d in range(self.D_)
            ]

        # set initial values {val: [val_1, val_2, ...], ...}
        if (len(init_sols) != 0):
            for val, sols in init_sols.items():
                if (val == 'act'):
                    for d, v in enumerate(sols):
                        act[d].setInitialValue(v)
                elif (val == 'pi'):
                    for d, vs in enumerate(sols):
                        for i, v in enumerate(vs):
                            pi[d][i].setInitialValue(v)
                elif (val == 'xi'):
                    for d, v in enumerate(sols):
                        xi[d].setInitialValue(v)
                elif (val == 'phi'):
                    for t, vs in enumerate(sols):
                        for l, v in enumerate(vs):
                            phi[t][l].setInitialValue(v)
                elif (val == 'sigma'):
                    for l, d in enumerate(sols):
                        for d_ in range(self.D_):
                            v = 1 if d_ == d else 0
                            sigma[l][d].setInitialValue(v)
                    for k in range(l + 1, self.K_):
                        for d_ in range(self.D_):
                            sigma[k][d].setInitialValue(0)

        # objective function
        if (ordering):
            if (cost_type == 'NONE'):
                prob += pulp.lpDot([self.gamma_] * self.K_, zeta)
                prob += pulp.lpDot([self.gamma_] * self.K_, zeta) >= 0
            elif (cost_type == 'SCM' or cost_type == 'DACE'):
                prob += pulp.lpSum(dist) + pulp.lpDot([self.gamma_] * self.K_,
                                                      zeta)
                prob += pulp.lpSum(dist) + pulp.lpDot([self.gamma_] * self.K_,
                                                      zeta) >= 0
                for d in range(self.D_):
                    prob += dist[d] - pulp.lpDot(flatten(self.C_[d]),
                                                 flatten(pi)) >= 0
                    prob += dist[d] + pulp.lpDot(flatten(self.C_[d]),
                                                 flatten(pi)) >= 0
            else:
                prob += pulp.lpDot(flatten(self.C_), flatten(pi)) + pulp.lpDot(
                    [self.gamma_] * self.K_, zeta)
                prob += pulp.lpDot(flatten(self.C_), flatten(pi)) + pulp.lpDot(
                    [self.gamma_] * self.K_, zeta) >= 0
        else:
            if (cost_type == 'SCM' or cost_type == 'DACE'):
                prob += pulp.lpSum(dist)
                prob += pulp.lpSum(dist) >= 0
                for d in range(self.D_):
                    prob += dist[d] - pulp.lpDot(flatten(self.C_[d]),
                                                 flatten(pi)) >= 0
                    prob += dist[d] + pulp.lpDot(flatten(self.C_[d]),
                                                 flatten(pi)) >= 0
            else:
                prob += pulp.lpDot(flatten(self.C_), flatten(pi))
                prob += pulp.lpDot(flatten(self.C_), flatten(pi)) >= 0

        # constraint: sum_{i} pi_{d,i} == 1
        for d in range(self.D_):
            prob += pulp.lpSum(pi[d]) == 1

        # constraint: sum_{d} pi_{d,i_d} >= D - K
        prob += pulp.lpSum(
            [pi[d][list(self.A_[d]).index(0)]
             for d in range(self.D_)]) >= self.D_ - self.K_

        # constraint: sum_{d in G} a_{d,1} pi_{d,1} = 0
        for G in self.feature_categories_:
            prob += pulp.lpDot(
                [self.A_[d][0] for d in G if len(self.A_[d]) != 0],
                [pi[d][0] for d in G if len(self.A_[d]) != 0]) == 0

        # constraint: sum_{d} w_d xi_d + b >= 0
        if (self.y_ == 0):
            prob += pulp.lpDot(self.coef_, xi) >= -self.intercept_ + 1e-8
        else:
            prob += pulp.lpDot(self.coef_, xi) <= -self.intercept_ - 1e-8

        # constraint: a_d = sum_{i} a_{d,i} pi_{d,i}
        if (intervention):
            _, B_ = interaction_matrix(self.X_, interaction_type='causal')
            # B_, _ = interaction_matrix(self.X_, interaction_type='causal')
            B = B_ + np.eye(self.D_)
            for d in range(self.D_):
                A_d = [[B[d][d_] * a for a in self.A_[d_]]
                       for d_ in range(self.D_)]
                prob += act[d] - pulp.lpDot(flatten(A_d), flatten(pi)) == 0
        else:
            for d in range(self.D_):
                prob += act[d] - pulp.lpDot(self.A_[d], pi[d]) == 0

        # constraints (Tree Ensemble):
        for t in range(self.T_):
            # constraint: sum_{l} phi_{t,l} = 1
            prob += pulp.lpSum(phi[t]) == 1
            # constraint: xi_t = sum_{l} h_{t,l} phi_{t,l}
            prob += xi[t] - pulp.lpDot(self.H_[t], phi[t]) == 0
            # constraint: D * phi_{t,l} <= sum_{d} sum_{i in I_{t,l,d}} pi_{d,i}
            for l in range(self.L_[t]):
                anc = self.AC_.ancestors_[t][l]
                prob += len(anc) * phi[t][l] - pulp.lpDot(
                    flatten([self.I_[t][l][d]
                             for d in anc]), flatten([pi[d]
                                                      for d in anc])) <= 0

        if (ordering):
            I_act = [(abs(a) > self.tol_).astype(int) for a in self.A_]

            # constraint: sigma_{k,d} = sum_{i} pi^(k)_{d,i}
            for k in range(self.K_):
                for d in range(self.D_):
                    if (d in self.feature_categories_flatten_
                            and self.x_[d] == 1):
                        prob += sigma[k][d] == 0
                    else:
                        prob += pulp.lpDot(I_act[d],
                                           pik[k][d]) - sigma[k][d] == 0

            # constraint: pis_{d,i} = sum_{k} pi^(k)_{d,i}
            for d in range(self.D_):
                for i in range(len(self.A_[d])):
                    prob += pulp.lpSum([pik[k][d][i] for k in range(self.K_)
                                        ]) - pi[d][i] == 0

            # constraint: sum_{d in G} a_{d,1} pi^(k)_{d,1} = 0
            for G in self.feature_categories_:
                prob += pulp.lpDot(
                    [self.A_[d][0] for d in G if len(self.A_[d]) != 0],
                    [pik[k][d][0] for d in G if len(self.A_[d]) != 0]) == 0

            # constraint: sum_{k} sigma_{k,d}  <= 1
            for d in range(self.D_):
                prob += pulp.lpSum([sigma[k][d] for k in range(self.K_)]) <= 1

            # constraint: sum_{d} sigma_{k,d}  <= 1
            for k in range(self.K_):
                prob += pulp.lpSum(sigma[k]) <= 1

            # constraint: sum_{d} sigma_{k,d}  >= sum_{d} sigma_{k+1,d}
            for k in range(self.K_ - 1):
                prob += pulp.lpSum(sigma[k]) - pulp.lpSum(sigma[k + 1]) >= 0

            # constraint: delta_{0,d} = 0
            for d in range(self.D_):
                prob += delta[0][d] == 0

            # constraint: delta_{k,d} = sum_{l}^{k-1} sum_{d'} C_{d,d'} epsilon_{l,d'} = sum_{d'} C_{d,d'} epsilon_{k-1,d'} + delta_{k-1,d}
            for k in range(1, self.K_):
                for d in range(self.D_):
                    # prob += delta[k][d] - pulp.lpDot(list(C[d])*(k), flatten(epsilon[:k+1])) == 0
                    prob += delta[k][d] - delta[k - 1][d] - pulp.lpDot(
                        C[d], epsilon[k - 1]) == 0

            # constraint: epsilon_{k,d} >= sum_{i} a_{d,i} pi^(k)_{d,i} - delta_{k,d} - U_d (1 - sigma_{k,d})
            # constraint: epsilon_{k,d} <= sum_{i} a_{d,i} pi^(k)_{d,i} - delta_{k,d} - L_d (1 - sigma_{k,d})
            # constraint: epsilon_{k,d} >= L_d sigma_{k,d}
            # constraint: epsilon_{k,d} <= U_d sigma_{k,d}
            for k in range(self.K_):
                for d in range(self.D_):
                    prob += epsilon[k][d] - pulp.lpDot(self.A_[d], pik[k][
                        d]) + delta[k][d] - UB[k][d] * sigma[k][d] >= -UB[k][d]
                    prob += epsilon[k][d] - pulp.lpDot(self.A_[d], pik[k][
                        d]) + delta[k][d] - LB[k][d] * sigma[k][d] <= -LB[k][d]
                    prob += epsilon[k][d] - LB[k][d] * sigma[k][d] >= 0
                    prob += epsilon[k][d] - UB[k][d] * sigma[k][d] <= 0

            # constraint: zeta_k >= sum_{d} w_d * epsilon_{k,d}
            # constraint: zeta_k >= - sum_{d} w_d * epsilon_{k,d}
            weights = self.AC_.getFeatureWeight(cost_type=ordering_cost_type)
            for k in range(self.K_):
                prob += zeta[k] - pulp.lpDot(weights, epsilon[k]) >= 0
                prob += zeta[k] + pulp.lpDot(weights, epsilon[k]) >= 0

        if (len(log_name) != 0): prob.writeLP(log_name + '.lp')
        s = time.perf_counter()
        prob.solve(solver=pulp.CPLEX_PY(msg=log_stream,
                                        warm_start=(len(init_sols) != 0),
                                        timeLimit=time_limit,
                                        options=['set output clonelog -1']))
        t = time.perf_counter() - s
        if (prob.status != 1):
            # prob.solve(solver=pulp.CPLEX_PY(msg=True))
            return -1
        obj = prob.objective.value()
        if (cost_type == 'SCM' or cost_type == 'DACE'):
            c_ordinal = np.sum([d.value() for d in dist])
        else:
            c_ordinal = np.sum([
                c * round(p.value())
                for c, p in zip(flatten(self.C_), flatten(pi))
            ])
        c_ordering = np.sum([z.value() for z in zeta]) if ordering else 0.0

        a = np.array([
            np.sum([
                self.A_[d][i] * round(pi[d][i].value())
                for i in range(len(self.A_[d]))
            ]) for d in range(self.D_)
        ])
        if (intervention): a_actual = np.array([a_.value() for a_ in act])
        sig = np.array([[round(s.value()) for s in sigma[k]]
                        for k in range(self.K_)]) if ordering else []
        ret = OrderedAction(
            x,
            a,
            sig,
            gamma=gamma,
            time=t,
            obj=obj,
            c_ordinal=c_ordinal,
            c_ordering=c_ordering,
            target_name=self.target_name_,
            target_labels=self.target_labels_,
            label_before=int(self.y_),
            label_after=int(1 - self.y_),
            feature_names=self.feature_names_,
            feature_types=self.feature_types_,
            feature_categories=self.feature_categories_,
            interaction_matrix=self.M_,
            post_ordering=post_ordering,
            weights=self.AC_.getFeatureWeight(cost_type=ordering_cost_type),
            post_ordering_mode=post_ordering_mode)

        # save initial values
        self.init_sols_ = {}
        self.init_sols_['act'] = [a_ for a_ in a]
        self.init_sols_['pi'] = []
        for pi_d in pi:
            self.init_sols_['pi'].append([round(p.value()) for p in pi_d])
        self.init_sols_['xi'] = [np.clip(x.value(), 0, 1) for x in xi]
        self.init_sols_['phi'] = []
        for phi_t in phi:
            self.init_sols_['phi'].append([round(p.value()) for p in phi_t])
        if (ret.ordered_):
            self.init_sols_['sigma'] = ret.order_

        return ret
Пример #6
0
def exp_real_sens(dataset='h',
                  n=10,
                  verbose=False,
                  time_limit=300,
                  costs=['TLPS', 'MAD', 'DACE', 'SCM']):
    from utils import DatasetHelper

    D = DatasetHelper(dataset=dataset, feature_prefix_index=False)
    X_tr, X_ts, y_tr, y_ts = D.train_test_split()
    B, M = interaction_matrix(X_tr, interaction_type='causal')

    mdl = LogisticRegression(penalty='l2',
                             C=1.0,
                             fit_intercept=True,
                             solver='liblinear',
                             max_iter=10000)
    mdl = mdl.fit(X_tr, y_tr)
    oce = LinearOrderedActionExtractor(
        mdl,
        X_tr,
        feature_names=D.feature_names,
        feature_types=D.feature_types,
        feature_categories=D.feature_categories,
        feature_constraints=D.feature_constraints,
        target_name=D.target_name,
        target_labels=D.target_labels,
        interaction_matrix=M)

    denied_individual = X_ts[mdl.predict(X_ts) == 1]
    gammas = [10**i for i in range(-3, 3)]
    res_dict = {}
    res_dict_ord = {}
    for key in costs:
        res_dict[key] = []
        res_dict_ord[key] = []
    for c in costs:
        for g in gammas:
            key = c + '_ORDER_{}'.format(g)
            res_dict[key] = []
            res_dict_ord[key] = []

    for i, x in enumerate(denied_individual[:n]):
        print('# {}-th Denied Individual:'.format(i + 1))

        for cost in costs:
            print('## {}: '.format(cost))
            oa = oce.extract(x,
                             K=4,
                             ordering=False,
                             post_ordering=True,
                             post_ordering_mode='greedy',
                             cost_type=cost,
                             ordering_cost_type='standard',
                             time_limit=time_limit)
            if (oa != -1):
                print(oa)
                res_dict[cost].append(oa.c_ordinal_)
                res_dict_ord[cost].append(oa.c_ordering_)
            else:
                res_dict[cost].append(-1)
                res_dict_ord[cost].append(-1)

            print('## {} + C_order: '.format(cost))
            for gamma in gammas:
                oa = oce.extract(x,
                                 K=4,
                                 gamma=gamma,
                                 ordering=True,
                                 cost_type=cost,
                                 ordering_cost_type='standard',
                                 time_limit=time_limit)
                if (oa != -1):
                    print(oa)
                    res_dict[cost + '_ORDER_{}'.format(gamma)].append(
                        oa.c_ordinal_)
                    res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append(
                        oa.c_ordering_)
                else:
                    res_dict[cost + '_ORDER_{}'.format(gamma)].append(-1)
                    res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append(-1)

        if (verbose): print('---')

    print('# Results')
    print('+ ', res_dict)
    print('+ ', res_dict_ord)
    print('---')

    if (verbose == False):
        import pandas as pd
        res_dist = pd.DataFrame(res_dict)
        res_dist.to_csv('./res/{}_res_dist_sens.csv'.format(D.dataset_name),
                        index=False)
        res_ord = pd.DataFrame(res_dict_ord)
        res_ord.to_csv('./res/{}_res_ord_sens.csv'.format(D.dataset_name),
                       index=False)
Пример #7
0
def exp_synthetic(n=10, verbose=False):
    N = 1000
    c_21 = 1
    c_32 = 6
    c_34 = 4
    c_54 = -0.5
    names = [
        'Education', 'JobSkill', 'Income(K)', 'WorkPerDay', 'HealthStatus'
    ]
    x_1 = np.random.randint(1, 5, N)
    x_2 = c_21 * x_1 + np.random.randint(-1, 1, N)
    x_4 = np.random.randint(2, 6, N) * 2
    x_3 = c_32 * x_2 + c_34 * x_4 + np.random.randint(-2, 2, N)
    x_5 = c_54 * x_4 + np.random.randint(6, 13, N)
    X = np.array([x_1, x_2, x_3, x_4, x_5]).T
    _, C = interaction_matrix(X, interaction_type='causal')
    w_3, w_5 = 1.0 / x_3.mean(), 1.0 / x_5.mean()
    y = (w_3 * X[:, 2] + w_5 * X[:, 4] < 2.0).astype(int)

    mdl = LogisticRegression(penalty='l2',
                             C=1.0,
                             fit_intercept=True,
                             solver='liblinear',
                             max_iter=10000)
    mdl = mdl.fit(X, y)
    print('# Model Coef.: \n', mdl.coef_, (mdl.intercept_))
    oce = LinearOrderedActionExtractor(mdl,
                                       X,
                                       feature_names=names,
                                       feature_types=['I', 'I', 'I', 'I', 'I'],
                                       feature_constraints=['INC'] * 2 +
                                       [''] * 3,
                                       target_name='Loan',
                                       target_labels=['Accept', 'Reject'],
                                       interaction_matrix=C)

    denied_individual = X[mdl.predict(X) == 1]
    costs = ['TLPS', 'MAD', 'DACE', 'SCM']
    gammas = [1.0] if verbose else [0.1 + i * 0.1 for i in range(20)]
    res_dict = {}
    res_dict_ord = {}
    res_dict_time = {}
    for key in costs:
        res_dict[key] = []
        res_dict_ord[key] = []
        res_dict_time[key] = []
    for c in costs:
        for g in gammas:
            key = c + '_ORDER_{}'.format(g)
            res_dict[key] = []
            res_dict_ord[key] = []
            res_dict_time[key] = []

    for i, x in enumerate(denied_individual[:n]):
        print('# {}-th Denied Individual: '.format(i + 1), x)

        for cost in costs:
            print('## {}: '.format(cost))
            oa = oce.extract(x,
                             K=5,
                             ordering=False,
                             post_ordering=True,
                             post_ordering_mode='greedy',
                             cost_type=cost,
                             ordering_cost_type='standard')
            if (oa != -1):
                print(oa)
                res_dict[cost].append(oa.c_ordinal_)
                res_dict_ord[cost].append(oa.c_ordering_)
                res_dict_time[cost].append(oa.time_)

            if (verbose): print('## {} + C_order: '.format(cost))
            for gamma in gammas:
                oa = oce.extract(x,
                                 K=5,
                                 gamma=gamma,
                                 ordering=True,
                                 cost_type=cost,
                                 ordering_cost_type='standard')
                if (oa != -1):
                    print(oa)
                    res_dict[cost + '_ORDER_{}'.format(gamma)].append(
                        oa.c_ordinal_)
                    res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append(
                        oa.c_ordering_)
                    res_dict_time[cost + '_ORDER_{}'.format(gamma)].append(
                        oa.time_)

        print('---')

    if (verbose == False):
        import pandas as pd
        res_dist = pd.DataFrame(res_dict)
        res_dist.to_csv('./res/synthetic_res_dist_lr.csv', index=False)
        res_ord = pd.DataFrame(res_dict_ord)
        res_ord.to_csv('./res/synthetic_res_ord_lr.csv', index=False)
        res_time = pd.DataFrame(res_dict_time)
        res_time.to_csv('./res/synthetic_res_time_lr.csv', index=False)
Пример #8
0
def exp_real(clf='lr',
             dataset='h',
             n=10,
             verbose=False,
             costs=['TLPS', 'MAD', 'DACE', 'SCM'],
             suf='',
             tol=1e-6):
    from utils import DatasetHelper

    D = DatasetHelper(dataset=dataset, feature_prefix_index=False)
    X_tr, X_ts, y_tr, y_ts = D.train_test_split()
    B, M = interaction_matrix(X_tr, interaction_type='causal')

    if (clf == 'lr'):
        mdl = LogisticRegression(penalty='l2',
                                 C=1.0,
                                 fit_intercept=True,
                                 solver='liblinear',
                                 max_iter=10000)
        mdl = mdl.fit(X_tr, y_tr)
        # print('# Model Coef.: \n', mdl.coef_, (mdl.intercept_))
        oce = LinearOrderedActionExtractor(
            mdl,
            X_tr,
            feature_names=D.feature_names,
            feature_types=D.feature_types,
            feature_categories=D.feature_categories,
            feature_constraints=D.feature_constraints,
            target_name=D.target_name,
            target_labels=D.target_labels,
            interaction_matrix=M)
    elif (clf == 'mlp'):
        mdl = MLPClassifier(hidden_layer_sizes=(200, ),
                            max_iter=500,
                            activation='relu',
                            alpha=0.0001)
        mdl = mdl.fit(X_tr, y_tr)
        oce = MLPOrderedActionExtractor(
            mdl,
            X_tr,
            feature_names=D.feature_names,
            feature_types=D.feature_types,
            feature_categories=D.feature_categories,
            feature_constraints=D.feature_constraints,
            target_name=D.target_name,
            target_labels=D.target_labels,
            interaction_matrix=M,
            tol=tol)
    elif (clf == 'rf'):
        h = 6 if dataset == 'g' else 4
        mdl = RandomForestClassifier(n_estimators=100, max_depth=h)
        mdl = mdl.fit(X_tr, y_tr)
        oce = ForestOrderedActionExtractor(
            mdl,
            X_tr,
            feature_names=D.feature_names,
            feature_types=D.feature_types,
            feature_categories=D.feature_categories,
            feature_constraints=D.feature_constraints,
            target_name=D.target_name,
            target_labels=D.target_labels,
            interaction_matrix=M)

    denied_individual = X_ts[mdl.predict(X_ts) == 1]
    gammas = [1.0]
    res_dict = {}
    res_dict_ord = {}
    res_dict_time = {}
    for key in costs:
        res_dict[key] = []
        res_dict_ord[key] = []
        res_dict_time[key] = []
    for c in costs:
        for g in gammas:
            key = c + '_ORDER_{}'.format(g)
            res_dict[key] = []
            res_dict_ord[key] = []
            res_dict_time[key] = []

    for i, x in enumerate(denied_individual[:n]):
        print('# {}-th Denied Individual:'.format(i + 1))

        for cost in costs:
            print('## {}: '.format(cost))
            oa = oce.extract(x,
                             K=4,
                             ordering=False,
                             post_ordering=True,
                             post_ordering_mode='greedy',
                             cost_type=cost,
                             ordering_cost_type='standard',
                             time_limit=300,
                             log_stream=False)
            if (oa != -1):
                print(oa)
                res_dict[cost].append(oa.c_ordinal_)
                res_dict_ord[cost].append(oa.c_ordering_)
                res_dict_time[cost].append(oa.time_)
            else:
                res_dict[cost].append(-1)
                res_dict_ord[cost].append(-1)
                res_dict_time[cost].append(-1)

            print('## {} + C_order: '.format(cost))
            for gamma in gammas:
                oa = oce.extract(x,
                                 K=4,
                                 gamma=gamma,
                                 ordering=True,
                                 cost_type=cost,
                                 ordering_cost_type='standard',
                                 time_limit=300,
                                 log_stream=False)
                if (oa != -1):
                    print(oa)
                    res_dict[cost + '_ORDER_{}'.format(gamma)].append(
                        oa.c_ordinal_)
                    res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append(
                        oa.c_ordering_)
                    res_dict_time[cost + '_ORDER_{}'.format(gamma)].append(
                        oa.time_)
                else:
                    res_dict[cost + '_ORDER_{}'.format(gamma)].append(-1)
                    res_dict_ord[cost + '_ORDER_{}'.format(gamma)].append(-1)
                    res_dict_time[cost + '_ORDER_{}'.format(gamma)].append(-1)

        if (verbose): print('---')

    print('# Results')
    print('+ ', res_dict)
    print('+ ', res_dict_ord)
    print('+ ', res_dict_time)
    print('---')

    if (verbose == False):
        import pandas as pd
        res_dist = pd.DataFrame(res_dict)
        res_dist.to_csv('./res/{}_res_dist_{}_{}.csv'.format(
            D.dataset_name, clf, suf),
                        index=False)
        res_ord = pd.DataFrame(res_dict_ord)
        res_ord.to_csv('./res/{}_res_ord_{}_{}.csv'.format(
            D.dataset_name, clf, suf),
                       index=False)
        res_time = pd.DataFrame(res_dict_time)
        res_time.to_csv('./res/{}_res_time_{}_{}.csv'.format(
            D.dataset_name, clf, suf),
                        index=False)