Exemplo n.º 1
0
    def pso(self):
        start_pso = time()
        np.set_printoptions(precision=3)
        print('------------ P S O -------------')
        self._file.write('------------ P S O -------------\n')
        ex = Extractor(self._clf)
        ex.extract_forest_paths()

        # ex.count_quality()
        #
        # self._quality, self._ig = ex.opt_get_quality()
        # ex.opt_clear_quality()

        RF_res = self._clf.predict(self._X_test)
        sample_num = len(self._y_test) if self.fitness_func == 'Opt' else 1
        w_max = 0.9
        w_min = 0.4
        c1, c2 = 1.6, 1.6  # 学习因子
        max_gen = self._generation  # 最大进化次数
        sizepop = self._scale  # 种群规模

        # v_min = [-0.1, -0.1, -0.1, -3]  # 速度限制范围
        # v_max = [0.1, 0.1, 0.1, 3]
        # pop_min = [0, 0, 0.1, -2]    # 位置限制范围
        # pop_max = [1, 1, 1, 30]

        v_min = [-0.1, -0.1, -0.1, -5]  # 速度限制范围
        v_max = [0.1, 0.1, 0.1, 5]
        pop_min = [0, 0.2, 0.1, 1]  # 位置限制范围
        pop_max = [1, 1, 1, 50]  # k值上限暂设30

        np.random.seed(10)
        pop = np.zeros([4, sizepop])  # 初始位置
        pop[:2] = np.random.uniform(0.2, 1, (2, sizepop))
        pop[2] = np.random.uniform(0, 1, (1, sizepop))
        pop[3] = np.random.uniform(1, 50, (1, sizepop))
        # pop[3] = np.random.uniform(10, 20, (1, sizepop))  # k值上限暂设30
        if self._tailor is False:
            pop[3] = -1
        v = np.random.uniform(-0.1, 0.1,
                              (4, sizepop)) * [[1], [1], [1], [50]]  # 初始化种群速度
        # v = np.random.uniform(-0.1, 0.1, (4, sizepop)) * [[1], [1], [1], [10]]  # 初始化种群速度

        offset, r_num = self.pso_function_parallel(ex, pop,
                                                   RF_res)  # parallel or not
        if self.fitness_func == 'Pro':  # 计算适应度
            fitness = self.pro_fitness(offset, r_num)
        else:
            fitness = self.opt_fitness(offset, r_num)

        i = np.argmax(fitness)  # 找最好的个体

        g_best = pop  # 记录个体最优位置
        z_best = pop[:, i]  # 群体最优位置
        fitness_gbest = fitness  # 记录个体最优适应度
        fitness_zbest = fitness[i]  # 群体最优适应度
        print('initial best: ', i, '\t', pop[:, i], '\t',
              offset[i] / sample_num, r_num[i], 'fitness:', fitness[i])
        self._file.write('0:\t{}\t{}\t{:.2f}\t{} fitness: {:.2f}\n'.format(
            i, pop[:, i], (offset[i] / sample_num), r_num[i], fitness[i]))

        t = 0  # 进化次数
        record = np.zeros(max_gen)  # 记录群体最优适应度
        while t < max_gen:
            # 惯性参数w更新
            w = w_max - (w_max - w_min) / max_gen * t

            # 速度更新
            v = w * v + c1 * np.random.random() * (g_best - pop) + c2 * np.random.random() * \
                (z_best.reshape(4, 1) - pop)
            for i in range(4):  # 速度限制
                v[i][v[i] > v_max[i]] = v_max[i]
                v[i][v[i] < v_min[i]] = v_min[i]

            # 位置更新
            pop = pop + v
            # pop[pop > pop_max] = pop_max  # 位置限制
            # pop[pop < pop_min] = pop_min
            for i in range(4):  # 位置限制
                pop[i][pop[i] > pop_max[i]] = pop_max[i]
                pop[i][pop[i] < pop_min[i]] = pop_min[i]

            if self._tailor is False:
                pop[3] = -1

            offset, r_num = self.pso_function_parallel(
                ex, pop, RF_res)  # parallel or not
            # fitness = offset / len(self._X_test) * 100 / r_num  # 计算适应度
            if self.fitness_func == 'Pro':  # 计算适应度
                fitness = self.pro_fitness(offset, r_num)
            else:
                fitness = self.opt_fitness(offset, r_num)

            # 个体最优位置更新
            index = fitness > fitness_gbest
            fitness_gbest[index] = fitness[index]
            g_best[:, index] = pop[:, index]

            # 群体最优更新
            j = np.argmax(fitness)
            # print(offset)

            print(t + 1, j, '\t', pop[:, j], '\t', offset[j] / sample_num,
                  r_num[j], 'fitness:', fitness[j])  # 打印适应度值
            self._file.write('{}:\t{}\t{}\t{:.2f}\t{} fitness: {:.2f}'.format(
                t + 1, j, pop[:, j], offset[j] / sample_num, r_num[j],
                fitness[j]))
            if fitness[j] > fitness_zbest:
                z_best = pop[:, j]
                fitness_zbest = fitness[j]
                print('new record: ', fitness[j])
                self._file.write('\t*new record*')
            self._file.write('\n')
            record[t] = fitness_zbest  # 记录群体最优度的变化

            t += 1
        print('optimal parameters', z_best)
        self._file.write('optimal parameters: {}\n'.format(z_best))

        end_pso = time()
        print('pso time:', end_pso - start_pso)
        self._file.write('pso time: {}\n\n'.format(end_pso - start_pso))

        return z_best
Exemplo n.º 2
0
                    y_test,
                    file,
                    generation=20,
                    scale=20,
                    conjunction=False,
                    maxsat_on=True,
                    tailor=False,
                    fitness_func='Pro')
    param = m.pso()
    phi = param[0]
    theta = param[1]
    psi = param[2]
    k = param[3]

    ex = Extractor(clf, phi, theta, psi)
    ex.extract_forest_paths()
    ex.rule_filter()
    print('max_rule', ex.max_rule, 'max_node', ex.max_node)
    print("original path number: ", ex.n_original_leaves_num)
    print('original scale: ', ex.scale)
    print("original path number after rule filter: ", len(ex._forest_values))

    sat = Z3Process(ex, k)
    sat.leaves_partition()
    sat.maxsat()
    sat.run_filter()

    print("original path number after maxsat: ", sat.n_rules_after_max,
          " after filter: ", sat.n_rules_after_filter, '\n')
    print('classes:', clf.classes_)
Exemplo n.º 3
0
    def explain(self, param, label='', auc_plot=False):
        print('------------ Explanation -------------')
        self._file.write('------------ Explanation -------------\n')
        phi = param[0]
        theta = param[1]
        psi = param[2]
        k = param[3]

        start1 = time()
        ex = Extractor(self._clf, phi, theta, psi)
        ex.extract_forest_paths()

        ex.rule_filter()

        print('max_rule', ex.max_rule, 'max_node', ex.max_node)
        print('min_rule', ex.min_rule, 'min_node', ex.min_node)
        end1 = time()
        print("EX Running time: %s seconds" % (end1 - start1))

        print("original path number: ", ex.n_original_leaves_num)
        print("original scale: ", ex.scale)
        print("path number after rule filter: ", len(ex._forest_values))
        self._file.write('original path number: {}\n'.format(
            ex.n_original_leaves_num))
        self._file.write('original scale: {}\n'.format(ex.scale))
        self._file.write('path number after rule filter: {}\n'.format(
            len(ex._forest_values)))

        start2 = time()
        sat = Z3Process(ex, k)
        sat.leaves_partition()
        if self._maxsat_on is True:
            sat.maxsat()
            print("path number after maxsat: ", sat.n_rules_after_max,
                  " after filter: ", sat.n_rules_after_filter, '\n')
            self._file.write(
                'path number after maxsat: {}\tafter filter: {}\n\nclasses:\t{}\n\n'
                .format(sat.n_rules_after_max, sat.n_rules_after_filter,
                        self._clf.classes_))
        else:
            print('no maxsat')
            self._file.write('/no MAX-SAT\n')
        sat.run_filter()
        end2 = time()

        print("SAT Running time: %s seconds" % (end2 - start2))

        print('classes:', self._clf.classes_)

        start3 = time()
        f = FormulaeEstimator(sat,
                              conjunction=self._conjunction,
                              classes=self._clf.classes_)
        f.get_formulae_text(self._file)
        print('\n------------ Performance -------------')
        self._file.write('\n------------ Performance -------------\n')
        c_ans = self._clf.predict(self._X_test)
        ans = f.classify_samples(self._X_test)
        end3 = time()
        print("ET Running time: %s seconds" % (end3 - start3))

        RF_accuracy = accuracy_score(self._y_test, c_ans)
        EX_accuracy = accuracy_score(self._y_test, ans)
        performance = accuracy_score(c_ans, ans)

        no_ans = 0
        overlap = 0
        for each in f.sat_group:
            if len(each) > 1:
                overlap += 1
            elif len(each) == 0:
                no_ans += 1

        if label == '':  # 计算AUC
            label = self._clf.classes_[0]

        fpr, tpr, thresholds = roc_curve(self._y_test,
                                         self._clf.predict_proba(
                                             self._X_test)[:, 1],
                                         pos_label=label)
        ori_auc = auc(fpr, tpr)

        ex_test = f.classify_samples_values(self._X_test)
        efpr, etpr, ethresholds = roc_curve(self._y_test,
                                            ex_test[:, 1],
                                            pos_label=label)
        ex_auc = auc(efpr, etpr)

        print('sample size:\t', len(self._y_test))
        self._file.write('sample size:\t{}\n'.format(len(self._y_test)))

        print('RF accuracy:\t', RF_accuracy)
        self._file.write('RF accuracy:\t{}\n'.format(RF_accuracy))

        print('RF AUC:\t\t\t', ori_auc)
        self._file.write('RF AUC:\t\t\t{:.2f}\n'.format(ori_auc))

        # print('错误结果覆盖:', f_count)
        print('EX accuracy:\t', EX_accuracy)
        self._file.write('EX accuracy:\t{}\n'.format(EX_accuracy))

        print('EX AUC:\t\t\t', ex_auc)
        self._file.write('EX AUC:\t\t\t{:.2f}\n'.format(ex_auc))

        print('Coverage:\t\t',
              (len(self._y_test) - no_ans) / len(self._y_test))
        self._file.write('Coverage:\t\t{}\n'.format(
            (len(self._y_test) - no_ans) / len(self._y_test)))

        print('Overlap:\t\t', overlap / len(self._y_test))
        self._file.write('Overlap:\t\t{}\n'.format(overlap /
                                                   len(self._y_test)))

        print('*Performance:\t', performance)
        self._file.write('*Performance:\t{}\n'.format(performance))

        if auc_plot is True:
            plt.plot(fpr,
                     tpr,
                     linewidth=2,
                     label="RF ROC curve (area = {:.2f})".format(ori_auc))

            plt.plot(efpr,
                     etpr,
                     linewidth=2,
                     label="Explain ROC curve (area = {:.2f})".format(ex_auc))

            plt.xlabel("false positive rate")

            plt.ylabel("true positive rate")

            plt.ylim(0, 1.05)

            plt.xlim(0, 1.05)

            plt.legend(loc=4)  # 图例的位置

            plt.show()