file, generation=20, scale=20, conjunction=False, maxsat_on=True, tailor=False, fitness_func='Pro') param = m.pso() phi = param[0] theta = param[1] psi = param[2] k = param[3] ex = Extractor(clf, phi, theta, psi) ex.extract_forest_paths() ex.rule_filter() print('max_rule', ex.max_rule, 'max_node', ex.max_node) print("original path number: ", ex.n_original_leaves_num) print('original scale: ', ex.scale) print("original path number after rule filter: ", len(ex._forest_values)) sat = Z3Process(ex, k) sat.leaves_partition() sat.maxsat() sat.run_filter() print("original path number after maxsat: ", sat.n_rules_after_max, " after filter: ", sat.n_rules_after_filter, '\n') print('classes:', clf.classes_) f = FormulaeEstimator(sat, conjunction=True, classes=clf.classes_)
def explain(self, param, label='', auc_plot=False): print('------------ Explanation -------------') self._file.write('------------ Explanation -------------\n') phi = param[0] theta = param[1] psi = param[2] k = param[3] start1 = time() ex = Extractor(self._clf, phi, theta, psi) ex.extract_forest_paths() ex.rule_filter() print('max_rule', ex.max_rule, 'max_node', ex.max_node) print('min_rule', ex.min_rule, 'min_node', ex.min_node) end1 = time() print("EX Running time: %s seconds" % (end1 - start1)) print("original path number: ", ex.n_original_leaves_num) print("original scale: ", ex.scale) print("path number after rule filter: ", len(ex._forest_values)) self._file.write('original path number: {}\n'.format( ex.n_original_leaves_num)) self._file.write('original scale: {}\n'.format(ex.scale)) self._file.write('path number after rule filter: {}\n'.format( len(ex._forest_values))) start2 = time() sat = Z3Process(ex, k) sat.leaves_partition() if self._maxsat_on is True: sat.maxsat() print("path number after maxsat: ", sat.n_rules_after_max, " after filter: ", sat.n_rules_after_filter, '\n') self._file.write( 'path number after maxsat: {}\tafter filter: {}\n\nclasses:\t{}\n\n' .format(sat.n_rules_after_max, sat.n_rules_after_filter, self._clf.classes_)) else: print('no maxsat') self._file.write('/no MAX-SAT\n') sat.run_filter() end2 = time() print("SAT Running time: %s seconds" % (end2 - start2)) print('classes:', self._clf.classes_) start3 = time() f = FormulaeEstimator(sat, conjunction=self._conjunction, classes=self._clf.classes_) f.get_formulae_text(self._file) print('\n------------ Performance -------------') self._file.write('\n------------ Performance -------------\n') c_ans = self._clf.predict(self._X_test) ans = f.classify_samples(self._X_test) end3 = time() print("ET Running time: %s seconds" % (end3 - start3)) RF_accuracy = accuracy_score(self._y_test, c_ans) EX_accuracy = accuracy_score(self._y_test, ans) performance = accuracy_score(c_ans, ans) no_ans = 0 overlap = 0 for each in f.sat_group: if len(each) > 1: overlap += 1 elif len(each) == 0: no_ans += 1 if label == '': # 计算AUC label = self._clf.classes_[0] fpr, tpr, thresholds = roc_curve(self._y_test, self._clf.predict_proba( self._X_test)[:, 1], pos_label=label) ori_auc = auc(fpr, tpr) ex_test = f.classify_samples_values(self._X_test) efpr, etpr, ethresholds = roc_curve(self._y_test, ex_test[:, 1], pos_label=label) ex_auc = auc(efpr, etpr) print('sample size:\t', len(self._y_test)) self._file.write('sample size:\t{}\n'.format(len(self._y_test))) print('RF accuracy:\t', RF_accuracy) self._file.write('RF accuracy:\t{}\n'.format(RF_accuracy)) print('RF AUC:\t\t\t', ori_auc) self._file.write('RF AUC:\t\t\t{:.2f}\n'.format(ori_auc)) # print('错误结果覆盖:', f_count) print('EX accuracy:\t', EX_accuracy) self._file.write('EX accuracy:\t{}\n'.format(EX_accuracy)) print('EX AUC:\t\t\t', ex_auc) self._file.write('EX AUC:\t\t\t{:.2f}\n'.format(ex_auc)) print('Coverage:\t\t', (len(self._y_test) - no_ans) / len(self._y_test)) self._file.write('Coverage:\t\t{}\n'.format( (len(self._y_test) - no_ans) / len(self._y_test))) print('Overlap:\t\t', overlap / len(self._y_test)) self._file.write('Overlap:\t\t{}\n'.format(overlap / len(self._y_test))) print('*Performance:\t', performance) self._file.write('*Performance:\t{}\n'.format(performance)) if auc_plot is True: plt.plot(fpr, tpr, linewidth=2, label="RF ROC curve (area = {:.2f})".format(ori_auc)) plt.plot(efpr, etpr, linewidth=2, label="Explain ROC curve (area = {:.2f})".format(ex_auc)) plt.xlabel("false positive rate") plt.ylabel("true positive rate") plt.ylim(0, 1.05) plt.xlim(0, 1.05) plt.legend(loc=4) # 图例的位置 plt.show()