def compare_lime(self): lime = self.create_lime_surrogate(self.explainer.last_instance, self.explainer.dataset, self.explainer.clf) data = construct_test_data_around_instance( self.explainer.dataset, self.explainer.touchpoints[0], max_distance=self.max_distance) clf_pred = self.explainer.clf.predict(data) srg_pred = lime.predict(data) self.lime_score_db = accuracy_score(srg_pred, clf_pred) data = construct_test_data_around_instance( self.explainer.dataset, self.explainer.last_instance, max_distance=self.max_distance) clf_pred = self.explainer.clf.predict(data) srg_pred = lime.predict(data) self.lime_score_instance = accuracy_score(srg_pred, clf_pred) print('LIME surrogate around DB', self.lime_score_db) print('LIME surrogate around instance', self.lime_score_instance) print('----------------------------- \n')
def export_decision_tree(self): data = construct_test_data_around_instance( self.explainer.dataset, self.explainer.touchpoints[0], max_distance=self.max_distance) clf_pred = self.explainer.clf.predict(data) X_train, X_test, Y_train, Y_test = train_test_split(data, clf_pred, test_size=0.2, random_state=1000) tree = DecisionTreeClassifier(max_depth=3) tree.fit(X_train, Y_train) self.surrogate_features = np.array(self.feature_names)[np.flip( np.argsort(np.abs(tree.feature_importances_)))][0:10] data_db = construct_test_data_around_instance( self.explainer.dataset, self.explainer.touchpoints[0], max_distance=self.max_distance) tree_pred = tree.predict(X_test) clf_pred = self.explainer.clf.predict(X_test) export_tree(tree, 'exports/db_tree.pdf', self.feature_names) self.tree_surrogate = tree self.tree_score_db = accuracy_score(tree_pred, clf_pred) print('accuracy tree around DB', self.tree_score_db) print( 'LOCAL tree feature importance ', list( zip( np.array(self.feature_names)[np.flip( np.argsort(np.abs(tree.feature_importances_))[-10:])], np.flip(tree.feature_importances_[np.argsort( np.abs(tree.feature_importances_))][-10:])))) data = construct_test_data_around_instance( self.explainer.dataset, self.explainer.last_instance, max_distance=self.max_distance) tree_pred = tree.predict(data) clf_pred = self.explainer.clf.predict(data) self.tree_score_instance = accuracy_score(tree_pred, clf_pred) print('accuracy tree around instance', self.tree_score_instance) print('----------------------------- \n')
def lars_features_local(self): data_subset = construct_test_data_around_instance( self.explainer.dataset, self.explainer.last_instance, max_distance=self.max_distance) labels = self.explainer.clf.predict(data_subset) features = get_primary_features( data_subset, labels, num_features=self.explainer.num_features) print('FEATURE IMPORTANCE LARS locally around instance') print(np.array(self.feature_names)[features])
def compare_surrogate(self): # data = sample_normal(self.explainer.touchpoints, 500, 2) # Compare around decision boundary data = construct_test_data_around_instance( self.explainer.dataset, self.explainer.touchpoints[0], max_distance=self.max_distance) clf_pred = self.explainer.clf.predict(data) srg_pred = self.explainer.sg.surrogate.predict(data) sg = self.explainer.sg.surrogate self.linear_surrogate = self.explainer.sg.surrogate self.linear_score_db = accuracy_score(srg_pred, clf_pred) print( 'LOCAL LINEAR feature importance ', list( zip( np.array(self.feature_names)[np.flip( np.argsort(np.abs(sg.coef_[0]))[-10:])], np.flip(sg.coef_[0][np.argsort(np.abs( sg.coef_[0]))][-10:])))) # Compare around original distance data = construct_test_data_around_instance( self.explainer.dataset, self.explainer.last_instance, max_distance=self.max_distance) clf_pred = self.explainer.clf.predict(data) srg_pred = self.explainer.sg.surrogate.predict(data) self.linear_score_instance = accuracy_score(srg_pred, clf_pred) print('accuracy surrogate around DB', self.linear_score_db) print('accuracy surrogate around instance', self.linear_score_instance) print('----------------------------- \n')
def sample_around_instance_from_dataset(self, border_touchpoints, num_samples, max_distance=0.5): """ :param border_touchpoints: :param num_samples: :return: """ result = np.array(border_touchpoints) num_per_point = int(num_samples / len(border_touchpoints)) for point in border_touchpoints: set = utils.construct_test_data_around_instance(self.dataset, point, max_distance=max_distance, size=num_per_point) result = np.append(result, set, axis=0) return result
def support_with_random_sampling(self, instance, counterfactual, num_support=10): max_distance = 0.3 while True: sample = construct_test_data_around_instance(self.data, instance, max_distance=max_distance) if len(sample) == 0: max_distance += 0.3 continue pred = self.clf.predict(sample) sample = sample[pred == 1] # TODO: Change to dynamic if len(sample) > num_support: return sample[0:num_support] else: max_distance += 0.3
def random(self, instance, target_value=1): counterfact = None max_distance = 0.3 while not counterfact: print(max_distance) sample = construct_test_data_around_instance( self.data, instance, max_distance=max_distance) if len(sample) == 0: max_distance += 0.3 continue pred = self.clf.predict(sample) sample = sample[pred == target_value] if len(sample) > 0: counterfact = sample[0] break else: max_distance += 0.3 return counterfact
def feature_importance(self): """ Trains a local surrogate random forest and returns its feature importance :return: feature importance of surrogate random forest """ data_subset = construct_test_data_around_instance( self.explainer.dataset, self.explainer.touchpoints[0], max_distance=0.6) pred = self.explainer.clf.predict(data_subset) rf = RandomForestClassifier(n_estimators=100) rf.fit(data_subset, pred) p, b, c = ti.predict(rf, self.explainer.last_instance.reshape(1, -1)) c = c[0] print('FEATURE IMPORTANCES RF around DB: \n') for c, feature in sorted(zip(c[:, 0], self.feature_names), key=lambda x: -abs(x[0]))[0:10]: print(feature, c) print('------------------------- \n')