Exemplo n.º 1
0
                    file,
                    generation=20,
                    scale=20,
                    conjunction=False,
                    maxsat_on=True,
                    tailor=False,
                    fitness_func='Pro')
    param = m.pso()
    phi = param[0]
    theta = param[1]
    psi = param[2]
    k = param[3]

    ex = Extractor(clf, phi, theta, psi)
    ex.extract_forest_paths()
    ex.rule_filter()
    print('max_rule', ex.max_rule, 'max_node', ex.max_node)
    print("original path number: ", ex.n_original_leaves_num)
    print('original scale: ', ex.scale)
    print("original path number after rule filter: ", len(ex._forest_values))

    sat = Z3Process(ex, k)
    sat.leaves_partition()
    sat.maxsat()
    sat.run_filter()

    print("original path number after maxsat: ", sat.n_rules_after_max,
          " after filter: ", sat.n_rules_after_filter, '\n')
    print('classes:', clf.classes_)

    f = FormulaeEstimator(sat, conjunction=True, classes=clf.classes_)
Exemplo n.º 2
0
    def explain(self, param, label='', auc_plot=False):
        print('------------ Explanation -------------')
        self._file.write('------------ Explanation -------------\n')
        phi = param[0]
        theta = param[1]
        psi = param[2]
        k = param[3]

        start1 = time()
        ex = Extractor(self._clf, phi, theta, psi)
        ex.extract_forest_paths()

        ex.rule_filter()

        print('max_rule', ex.max_rule, 'max_node', ex.max_node)
        print('min_rule', ex.min_rule, 'min_node', ex.min_node)
        end1 = time()
        print("EX Running time: %s seconds" % (end1 - start1))

        print("original path number: ", ex.n_original_leaves_num)
        print("original scale: ", ex.scale)
        print("path number after rule filter: ", len(ex._forest_values))
        self._file.write('original path number: {}\n'.format(
            ex.n_original_leaves_num))
        self._file.write('original scale: {}\n'.format(ex.scale))
        self._file.write('path number after rule filter: {}\n'.format(
            len(ex._forest_values)))

        start2 = time()
        sat = Z3Process(ex, k)
        sat.leaves_partition()
        if self._maxsat_on is True:
            sat.maxsat()
            print("path number after maxsat: ", sat.n_rules_after_max,
                  " after filter: ", sat.n_rules_after_filter, '\n')
            self._file.write(
                'path number after maxsat: {}\tafter filter: {}\n\nclasses:\t{}\n\n'
                .format(sat.n_rules_after_max, sat.n_rules_after_filter,
                        self._clf.classes_))
        else:
            print('no maxsat')
            self._file.write('/no MAX-SAT\n')
        sat.run_filter()
        end2 = time()

        print("SAT Running time: %s seconds" % (end2 - start2))

        print('classes:', self._clf.classes_)

        start3 = time()
        f = FormulaeEstimator(sat,
                              conjunction=self._conjunction,
                              classes=self._clf.classes_)
        f.get_formulae_text(self._file)
        print('\n------------ Performance -------------')
        self._file.write('\n------------ Performance -------------\n')
        c_ans = self._clf.predict(self._X_test)
        ans = f.classify_samples(self._X_test)
        end3 = time()
        print("ET Running time: %s seconds" % (end3 - start3))

        RF_accuracy = accuracy_score(self._y_test, c_ans)
        EX_accuracy = accuracy_score(self._y_test, ans)
        performance = accuracy_score(c_ans, ans)

        no_ans = 0
        overlap = 0
        for each in f.sat_group:
            if len(each) > 1:
                overlap += 1
            elif len(each) == 0:
                no_ans += 1

        if label == '':  # 计算AUC
            label = self._clf.classes_[0]

        fpr, tpr, thresholds = roc_curve(self._y_test,
                                         self._clf.predict_proba(
                                             self._X_test)[:, 1],
                                         pos_label=label)
        ori_auc = auc(fpr, tpr)

        ex_test = f.classify_samples_values(self._X_test)
        efpr, etpr, ethresholds = roc_curve(self._y_test,
                                            ex_test[:, 1],
                                            pos_label=label)
        ex_auc = auc(efpr, etpr)

        print('sample size:\t', len(self._y_test))
        self._file.write('sample size:\t{}\n'.format(len(self._y_test)))

        print('RF accuracy:\t', RF_accuracy)
        self._file.write('RF accuracy:\t{}\n'.format(RF_accuracy))

        print('RF AUC:\t\t\t', ori_auc)
        self._file.write('RF AUC:\t\t\t{:.2f}\n'.format(ori_auc))

        # print('错误结果覆盖:', f_count)
        print('EX accuracy:\t', EX_accuracy)
        self._file.write('EX accuracy:\t{}\n'.format(EX_accuracy))

        print('EX AUC:\t\t\t', ex_auc)
        self._file.write('EX AUC:\t\t\t{:.2f}\n'.format(ex_auc))

        print('Coverage:\t\t',
              (len(self._y_test) - no_ans) / len(self._y_test))
        self._file.write('Coverage:\t\t{}\n'.format(
            (len(self._y_test) - no_ans) / len(self._y_test)))

        print('Overlap:\t\t', overlap / len(self._y_test))
        self._file.write('Overlap:\t\t{}\n'.format(overlap /
                                                   len(self._y_test)))

        print('*Performance:\t', performance)
        self._file.write('*Performance:\t{}\n'.format(performance))

        if auc_plot is True:
            plt.plot(fpr,
                     tpr,
                     linewidth=2,
                     label="RF ROC curve (area = {:.2f})".format(ori_auc))

            plt.plot(efpr,
                     etpr,
                     linewidth=2,
                     label="Explain ROC curve (area = {:.2f})".format(ex_auc))

            plt.xlabel("false positive rate")

            plt.ylabel("true positive rate")

            plt.ylim(0, 1.05)

            plt.xlim(0, 1.05)

            plt.legend(loc=4)  # 图例的位置

            plt.show()