Exemplo n.º 1
0
def main():
    random.seed(0)

    # X, y = make_moons(n_samples=1000, noise=0.3, random_state=0)
    # X = StandardScaler().fit_transform(X)
    #
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    #
    # class_name = 'class'
    # columns = ['class', 'X0', 'X1']
    # df = pd.DataFrame(np.concatenate((y_train.reshape(-1, 1), X_train), axis=1), columns=columns)
    #
    # features_type = {'X0': 'double', 'X1': 'double', 'class': 'string'}
    # discrete = ['class']
    # continuous = ['X0', 'X1']
    # discrete_no_class = list(discrete)
    # discrete_no_class.remove(class_name)
    # possible_outcomes = list(df[class_name].unique())
    # if features_type[class_name] == 'string':
    #     possible_outcomes = [str(po) for po in possible_outcomes]
    # _, label_encoder = label_encode(df, discrete)
    #
    # columns_tmp = list(columns)
    # columns_tmp.remove(class_name)
    # idx_features = {i: col for i, col in enumerate(columns_tmp)}
    #
    # dataset = {
    #     'class_name': class_name,
    #     'columns': columns,
    #     'features_type': features_type,
    #     'discrete': discrete,
    #     'continuous': continuous,
    #     'label_encoder': label_encoder,
    #     'possible_outcomes': possible_outcomes,
    #     'idx_features': idx_features,
    # }

    dataset_name = 'german_credit.csv'
    path_data = './datasets/'
    dataset = prepare_german_dataset(dataset_name, path_data)

    X, y = dataset['X'], dataset['y']
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    class_name = dataset['class_name']
    columns = dataset['columns']
    discrete = dataset['discrete']
    continuous = dataset['continuous']
    features_type = dataset['features_type']
    possible_outcomes = dataset['possible_outcomes']
    label_encoder = dataset['label_encoder']

    yX = np.concatenate((y_train.reshape(-1, 1), X_train), axis=1)
    data = list()
    for i, col in enumerate(columns):
        data_col = yX[:, i]
        data_col = data_col.astype(int) if col in discrete else data_col
        data_col = data_col.astype(
            int) if features_type[col] == 'integer' else data_col
        data.append(data_col)
    data = map(list, map(None, *data))
    dfZ = pd.DataFrame(data=data, columns=columns)
    dfZ = label_decode(dfZ, discrete, label_encoder)

    dt, dt_dot = pyyadt.fit(dfZ,
                            class_name,
                            columns,
                            features_type,
                            discrete,
                            continuous,
                            filename='pyyadt_test',
                            path='./',
                            sep=';',
                            log=False)

    dt_dot.write_png('pyyadt_test.png')
    # img = Image.open('pyyadt_test.png')
    # img.show()

    # y_pred_cc, leaf_nodes = pyyadt.predict(dt, dfZ.to_dict('records'), class_name, features_type,
    #                                        discrete, continuous)

    idx_record2explain = 5  # 4
    x = dfZ.to_dict('records')[idx_record2explain]
    print x
    cc_outcome, rule, tree_path = pyyadt.predict_rule(dt, x, class_name,
                                                      features_type, discrete,
                                                      continuous)

    print cc_outcome
    for k, v in rule[1].iteritems():
        print k, v
    print tree_path
    print '-------------------'

    diff_outcome = get_diff_outcome(cc_outcome, possible_outcomes)
    counterfactuals = pyyadt.get_counterfactuals(dt, tree_path, rule,
                                                 diff_outcome, class_name,
                                                 continuous, features_type)

    print counterfactuals

    for delta in counterfactuals:
        xcf = pyyadt.apply_counterfactual(x, delta, continuous, discrete,
                                          features_type)
        ycf, _, _ = pyyadt.predict_rule(dt, xcf, class_name, features_type,
                                        discrete, continuous)
        print delta, ycf, xcf
Exemplo n.º 2
0
Arquivo: lore.py Projeto: msetzu/LORE
def explain(idx_record2explain,
            X2E,
            dataset,
            blackbox,
            ng_function=genetic_neighborhood,
            discrete_use_probabilities=False,
            continuous_function_estimation=False,
            returns_infos=False):
    random.seed(0)
    class_name = dataset['class_name']
    columns = dataset['columns']
    discrete = dataset['discrete']
    continuous = dataset['continuous']
    features_type = dataset['features_type']
    label_encoder = dataset['label_encoder']
    possible_outcomes = dataset['possible_outcomes']

    # Dataset Preprocessing
    dataset['feature_values'] = calculate_feature_values(
        X2E,
        columns,
        class_name,
        discrete,
        continuous,
        discrete_use_probabilities=discrete_use_probabilities,
        continuous_function_estimation=continuous_function_estimation)

    dfZ, x = dataframe2explain(X2E, dataset, idx_record2explain, blackbox)

    # Generate Neighborhood
    dfZ, Z = ng_function(dfZ, x, blackbox, dataset)

    # Build Decision Tree
    dt, dt_dot = pyyadt.fit(dfZ,
                            class_name,
                            columns,
                            features_type,
                            discrete,
                            continuous,
                            filename=dataset['name'],
                            path='./',
                            sep=';',
                            log=False)

    # Apply Black Box and Decision Tree on instance to explain
    bb_outcome = blackbox.predict(x.reshape(1, -1))[0]

    dfx = build_df2explain(blackbox, x.reshape(1, -1),
                           dataset).to_dict('records')[0]
    cc_outcome, rule, tree_path = pyyadt.predict_rule(dt, dfx, class_name,
                                                      features_type, discrete,
                                                      continuous)

    # Apply Black Box and Decision Tree on neighborhood
    y_pred_bb = blackbox.predict(Z)
    y_pred_cc, leaf_nodes = pyyadt.predict(dt, dfZ.to_dict('records'),
                                           class_name, features_type, discrete,
                                           continuous)

    # Update labels if necessary
    if class_name in label_encoder:
        cc_outcome = label_encoder[class_name].transform(np.array([cc_outcome
                                                                   ]))[0]

    if class_name in label_encoder:
        y_pred_cc = label_encoder[class_name].transform(y_pred_cc)

    # Extract Coutnerfactuals
    diff_outcome = get_diff_outcome(bb_outcome, possible_outcomes)
    counterfactuals = pyyadt.get_counterfactuals(dt, tree_path, rule,
                                                 diff_outcome, class_name,
                                                 continuous, features_type)

    explanation = (rule, counterfactuals)

    infos = {
        'bb_outcome': bb_outcome,
        'cc_outcome': cc_outcome,
        'y_pred_bb': y_pred_bb,
        'y_pred_cc': y_pred_cc,
        'dfZ': dfZ,
        'Z': Z,
        'dt': dt,
        'tree_path': tree_path,
        'leaf_nodes': leaf_nodes,
        'diff_outcome': diff_outcome
    }

    if returns_infos:
        return explanation, infos

    return explanation
Exemplo n.º 3
0
def main():
    random.seed(0)

    X, y = make_moons(n_samples=1000, noise=0.3, random_state=0)
    X = StandardScaler().fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    class_name = 'class'
    columns = ['class', 'X0', 'X1']
    df = pd.DataFrame(np.concatenate((y_train.reshape(-1, 1), X_train),
                                     axis=1),
                      columns=columns)

    features_type = {'X0': 'double', 'X1': 'double', 'class': 'string'}
    discrete = ['class']
    continuous = ['X0', 'X1']
    discrete_no_class = list(discrete)
    discrete_no_class.remove(class_name)
    possible_outcomes = list(df[class_name].unique())
    _, label_encoder = label_encode(df, discrete)

    columns_tmp = list(columns)
    columns_tmp.remove(class_name)
    idx_features = {i: col for i, col in enumerate(columns_tmp)}

    dataset = {
        'class_name': class_name,
        'columns': columns,
        'features_type': features_type,
        'discrete': discrete,
        'continuous': continuous,
        'label_encoder': label_encoder,
        'possible_outcomes': possible_outcomes,
        'idx_features': idx_features,
    }

    # dataset_name = 'german_credit.csv'
    # path_data = './datasets/'
    # dataset = prepare_german_dataset(dataset_name, path_data)
    #
    # X, y = dataset['X'], dataset['y']
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    #
    # class_name = dataset['class_name']
    # columns = dataset['columns']
    # discrete = dataset['discrete']
    # continuous = dataset['continuous']
    # features_type = dataset['features_type']
    # label_encoder = dataset['label_encoder']

    yX = np.concatenate((y_train.reshape(-1, 1), X_train), axis=1)
    data = list()
    for i, col in enumerate(columns):
        data_col = yX[:, i]
        data_col = data_col.astype(int) if col in discrete else data_col
        data_col = data_col.astype(
            int) if features_type[col] == 'integer' else data_col
        data.append(data_col)
    data = map(list, map(None, *data))
    dfZ = pd.DataFrame(data=data, columns=columns)
    dfZ = label_decode(dfZ, discrete, label_encoder)

    dt, dt_dot = pyyadt.fit(dfZ,
                            class_name,
                            columns,
                            features_type,
                            discrete,
                            continuous,
                            filename='pyyadt_test',
                            path='./',
                            sep=';',
                            log=False)

    dt_dot.write_png('pyyadt_test.png')
    # img = Image.open('pyyadt_test.png')
    # img.show()

    y_pred_cc, leaf_nodes = pyyadt.predict(dt, dfZ.to_dict('records'),
                                           class_name, features_type, discrete,
                                           continuous)

    idx_record2explain = 11
    print dfZ.to_dict('records')[idx_record2explain]
    cc_outcome, rule, tree_path = pyyadt.predict_rule(
        dt,
        dfZ.to_dict('records')[idx_record2explain], class_name, features_type,
        discrete, continuous)

    print cc_outcome
    print rule
    print tree_path

    print pyyadt.get_covered_record_index(tree_path, leaf_nodes)[:10]