Exemplo n.º 1
0
def one_step():
    """
    一步到位运行所有对比方法
    """
    x, y = read_data.get_data([6], -1, "ecoli.dat", show_info=True)

    k = 5   # 交叉验证次数
    # 期望每折交叉验证样本数量 >= 100
    # while len(y) / k < 100:
    #     x, y = read_data.upsampling_copy(x, y, 1)
    #     print("复制一份后:%d/%d" % (len(y[y == 1]), len(y[y == 0])))

    print("|%-20s|%-20s|%-20s|%-20s|%-20s" % ("", "f1score", "auc", "gmean", "bACC"))
    print("|%-20s|%-20s|%-20s|%-20s|%-20s" % ("----", "----", "----", "----", "----"))

    method = ("KNN", "DT", "RandomForest", "AdaBoost", "EasyEnsemble", "BalancedBagging")
    sampling = ("RUS", "SMOTE")
    for m in method:
        if m in ("KNN", "DT"):
            for s in sampling:
                result = kFoldTest(x.copy(), y.copy(), sampler=s, classifier=m, k=k)
                print(result[1])
        else:
            result = kFoldTest(x.copy(), y.copy(), sampler="", classifier=m, k=k)
            print(result[1])
Exemplo n.º 2
0
def main():
    params = Params()
    if params.GPU:
        configure_gpu()
    images, angles = get_data(params.parametrized)
    params.set_angles(angles)

    X_train, X_test, y_train, y_test = train_test_split(images,
                                                        angles,
                                                        test_size=0.2,
                                                        random_state=42)

    generator, discriminator = train(params, X_train, y_train)
    save_generator(params, generator)
def one_step():
    """
    一步到位运行所有对比方法

    """
    x, y = read_data.get_data([2], -1, "zoo.dat", show_info=True)

    k = 5
    while len(y[y == 0]) / k < 50:
        x, y = read_data.upsampling_copy(x, y, 1)
        print("复制一份后:%d/%d" % (len(y[y == 1]), len(y[y == 0])))

    print("|%-20s|%-20s|%-20s|%-20s|%-20s|%-20s|%-20s" % ("", "val_acc", "val_precision", "val_recall", "val_f1", "auc_value", "val_gmean"))
    print("|%-20s|%-20s|%-20s|%-20s|%-20s|%-20s|%-20s" % ("----", "----", "----", "----", "----", "----", "----"))
    classify_method = ("KNN", "DT", "SVC")
    sampling_method = ("",)

    for classifier in classify_method:
        for sampler in sampling_method:
            result = compare.kFoldTest(x.copy(), y.copy(), sampler=sampler, classifier=classifier, k=k)
            print(result[0])
        # 测试
        y_proba = clf.predict_proba(x_val)
        y_pred = np.argmax(y_proba, axis=1)

        # 进化前的表现
        save_metric(val_history, y_val, y_pred, y_proba)
        print("进化前:")
        show_last_data(val_history)

        # 进化
        s = equation.get_S_matrix(x_val)
        y_proba = PSOEvolutor(s).evolve(y_proba, max_steps=200)
        y_pred = np.argmax(y_proba, axis=1)

        # 进化后的表现
        save_metric(evo_history, y_val, y_pred, y_proba)
        print("进化后:")
        show_last_data(evo_history)

    # 统计,求平均值和标准差
    print("进化前平均:")
    show_mean_data(val_history)
    print("进化后平均:")
    show_mean_data(evo_history)


if __name__ == '__main__':
    x, y = read_data.get_data([0, 6], -1, "yeast.dat", show_info=True)

    kFoldEvolution(x, y)
Exemplo n.º 5
0
        :param x: 集合
        :return: 欧式距离,矩阵
        """

        dis = np.zeros((len(x), len(x)))
        for i, xi in enumerate(x):
            for j, xj in enumerate(x):
                dis[i][j] = np.sqrt(np.sum(np.square(xi - xj)))

        return dis

    def get_neighbor(self, d):
        """
        获取最近的 k 个邻居编号

        :param d: 所有邻居的距离
        :return: k 个最近邻居的编号,从近到远
        """
        t = sorted(enumerate(d), key=lambda x: x[1])
        t = [x[0] for x in t]

        return t[:self.k]


if __name__ == '__main__':
    x, y = read_data.get_data([1], -1, "banana.dat", show_info=True)

    x_neg = x[y == 0]
    print("采样前:%d" % len(x_neg))
    x_neg = SmoteSampler(5).resample(x_neg, 100)
    print("采样后:%d" % len(x_neg))
Exemplo n.º 6
0
                y = -1
            else:
                y = 1
            if y_pred[i] == 0:
                h = -1
            else:
                h = 1

            Z += C[Y[i]] * D[i] * np.exp(-alpha * y * h)

        return Z


if __name__ == '__main__':
    # 获取原始数据
    x, y = get_data([0, 6], -1, "1到5/yeast.dat")

    x, y = shuffle_data(x, y)
    x = np.array(x)
    y = np.array(y, dtype=np.int8)
    # y[y == 0] = -1

    # 代价项
    C = {1: 0.1, 0: 1}

    x_train = x[:-100]
    y_train = y[:-100]

    x_val = x[-100:]
    y_val = y[-100:]
        print("进化前平均:")
        experiment_helper.show_mean_data(val_history)
        print("进化后平均:")
        experiment_helper.show_mean_data(evo_history)
        print("结合后平均:")
        experiment_helper.show_mean_data(mean_history)
        auc_record["%d:%d" % (n_under, n_up)] = get_auc(mean_history)
        print(auc_record)
    else:
        experiment_helper.show_mean_data(val_history)


def get_auc(history):
    return np.mean(history["auc"])


auc_record = {}
if __name__ == '__main__':
    N_under = (5, 10, 15, 20)
    N_up = (5, 10, 15, 20)
    for i in N_under:
        for j in N_up:
            x, y = read_data.get_data([6], -1, "yeast.dat", show_info=False)

            # 期望每折交叉验证样本数量 >= 100
            # for i in range(1):
            #     x, y = read_data.upsampling_copy(x, y, 1)
            #     print("复制一份后:%d/%d" % (len(y[y == 1]), len(y[y == 0])))

            kFoldEvolution(x, y, evolution=True, n_under=i, n_up=j)