def Test_dbn():
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\
        mnist.test.labels

    opts = DLOption(10, 1., 100, 0.0, 0., 0.)
    dbn = DBN([400, 100], opts, trX)
    errs = dbn.train()
    print(errs)

    nn = NN([100], opts, trX, trY)
    nn = NN([400, 100], opts, trX, trY)
    nn.load_from_dbn(dbn)
    nn.train()
    print(np.mean(np.argmax(teY, axis=1) == nn.predict(teX)))
Exemplo n.º 2
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2016 Peng Liu <*****@*****.**>
#
# Distributed under terms of the GNU GPL3 license.
"""
Test some function.
"""

import input_data
from opts import DLOption
from dbn_tf import DBN
from nn_tf import NN
import numpy as np

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\
    mnist.test.labels

opts = DLOption(10, 1., 100, 0.0, 0., 0.)
dbn = DBN([400, 100], opts, trX)
dbn.train()
nn = NN([100], opts, trX, trY)
nn = NN([400, 100], opts, trX, trY)
nn.load_from_dbn(dbn)
nn.train()
print(np.mean(np.argmax(teY, axis=1) == nn.predict(teX)))
Exemplo n.º 3
0
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2016 Peng Liu <*****@*****.**>
#
# Distributed under terms of the GNU GPL3 license.

"""
Test some function.
"""

import input_data
from opts import DLOption
from dbn_tf import DBN
from nn_tf import NN
import numpy as np


mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\
    mnist.test.labels

opts = DLOption(10, 1., 100, 0.0, 0., 0.)
dbn = DBN([400, 100], opts, trX)
dbn.train()
nn = NN([100], opts, trX, trY)
nn = NN([400, 100], opts, trX, trY)
nn.load_from_dbn(dbn)
nn.train()
print np.mean(np.argmax(teY, axis=1) == nn.predict(teX))
Exemplo n.º 4
0
def train(do_k_fold, out_dir, log_f):
    if do_k_fold:
        utils.print_out("# do k_fold k=%d" % k_fold, log_f)
        k_fold_val = 0
        k_fold_tra = 0
        [k_TP, k_TN, k_FP, k_FN, k_SE, k_SP, k_MCC, k_ACC] = [0, 0, 0, 0, 0, 0, 0, 0]
        for i in range(k_fold):
            trX_=[]
            trY_=[]
            for j in range(k_fold):
                if j == i: continue
                trX_.append(k_fold_X[j])
                trY_.append(k_fold_Y[j])
            trX_ = np.concatenate(trX_)
            trY_ = np.concatenate(trY_)
            utils.print_out("#k_fold %d" % i, log_f)
            utils.print_out("#do DBN ...", log_f)
            dbn = DBN()
            dbn.train(trX_)
            utils.print_out("#end DBN", log_f)
            utils.print_out("#do caps ...", log_f)
            capsNet = CapsNet(is_training=True, dbn=dbn)

            i_k_fold_val, i_k_fold_tra = capsNet.train(trX_, trY_, k_fold_X[i], k_fold_Y[i], None, log_f)
            TP, TN, FP, FN, SE, SP, MCC, ACC = eva(capsNet, k_fold_X[i], k_fold_Y[i])
            print(i,", TP:", TP)
            print(i,", TN:", TN)
            print(i,", FP:", FP)
            print(i,", FN:", FN)
            print(i,", SE:", SE)
            print(i,", SP:", SP)
            print(i,", MCC:", MCC)
            print(i,", ACC: ", ACC)
            k_TP += TP
            k_TN += TN
            k_FP += FP
            k_FN += FN
            k_SE += SE
            k_SP += SP
            k_MCC += MCC
            k_ACC += ACC

        print("TP :", k_TP / 5)
        print("TN :", k_TN / 5)
        print("FP :", k_FP / 5)
        print("FN :", k_FN / 5)
        print("SE :", k_SE / 5)
        print("SP :", k_SP / 5)
        print("MCC: ", k_MCC / 5)
        print("ACC: ", k_ACC / 5)
    else:
        utils.print_out("#do DBN ...", log_f)
        dbn = DBN()
        dbn.train(trX)
        utils.print_out("#end DBN", log_f)
        utils.print_out("#do caps ...", log_f)
        utils.print_out("#test instead val set for test ...", log_f)
        capsNet = CapsNet(is_training=isTraining, dbn=dbn)
        if isTraining:
            i_k_fold_val, i_k_fold_tra = capsNet.train(trX, trY, teX, teY, "./board", log_f)
            utils.print_out("#end caps", log_f)

            tr_TP, tr_TN, tr_FP, tr_FN, tr_SE, tr_SP, tr_MCC, tr_ACC = eva(capsNet, trX, trY)
            val_TP, val_TN, val_FP, val_FN, val_SE, val_SP, val_MCC, val_ACC = eva(capsNet, vaX, vaY)
            te_P, te_TN, te_FP, te_FN, te_SE, te_SP, te_MCC,te_ACC = eva(capsNet, teX, teY)
            utils.print_out('train : TP:%.3f;   TN:%.3f;      FP:%.3f;     FN:%.3f;  SE:%.3f  SP:%.3f   MCC:%.3f  P:%.3f' \
                            %(tr_TP, tr_TN, tr_FP, tr_FN, tr_SE, tr_SP, tr_MCC, tr_ACC), log_f)
            utils.print_out('val : TP:%.3f;   TN:%.3f;      FP:%.3f;      FN:%.3f;  SE:%.3f  SP:%.3f   MCC:%.3f P:%.3f' \
                            % (val_TP, val_TN, val_FP, val_FN, val_SE, val_SP, val_MCC, val_ACC), log_f)
            utils.print_out('test : TP:%.3f;   TN:%.3f;      FP:%.3f;      FN:%.3f;  SE:%.3f  SP:%.3f   MCC:%.3f P:%.3f' \
                            % (te_P, te_TN, te_FP, te_FN, te_SE, te_SP, te_MCC, te_ACC), log_f)

        else:
            import csv
            csvFile = open("./"+train_datadir+"/"+setFileNames[1], "r")
            reader = csv.reader(csvFile)  # 返回的是迭代类型
            data = []
            for item in reader:
                data.append(item[0])
            csvFile.close()
            data = data[1:]

            utils.print_out("#end caps", log_f)
            pre_Y= pre(capsNet, vaX).tolist()[0]
            import pandas as pd

            dataFrame = pd.DataFrame({ "0_name": data,"1_class": pre_Y})
            dataFrame.to_csv('./data_set/test_dir/180831-result.csv', index=False, sep=",")
Exemplo n.º 5
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2016 Peng Liu <*****@*****.**>
#
# Distributed under terms of the GNU GPL3 license.
"""
Test some function.
"""

import input_data
from opts import DLOption
from dbn_tf import DBN
from nn_tf import NN
import numpy as np

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\
    mnist.test.labels

opts = DLOption(10, 1., 100, 0.0, 0., 0.)
dbn = DBN([400, 100], opts, trX)
# dbn.train()
nn = NN([100], opts, trX, trY)
nn = NN([400, 100], opts, trX, trY)
# nn.load_from_dbn(dbn)
nn.train()
print(np.mean(np.argmax(teY, axis=1) == nn.predict(teX)))
Exemplo n.º 6
0
    y_train[key] = np.zeros(n_sample)

    for i in range(n_sample):
        X_train[key][i] = np.concatenate(
            (dm[key][int(train_index[i, 0])], gm[key][int(train_index[i, 1])]))
        y_train[key][i] = int(train_index[i, 2])

    X_train[key] = normalization(X_train[key])

    for b in range(n_batches):
        X_test[key][b * batch_size:(b + 1) * batch_size] = normalization(
            X_test[key][b * batch_size:(b + 1) * batch_size])

    dbn[key] = DBN(hidden_layers_structure=[hidb, hidb, hidb],
                   weight_cost=0.001,
                   batch_size=4,
                   n_epoches=30,
                   learning_rate_rbm=[0.0005, 1e-2, 1e-2],
                   rbm_gauss_visible=True)

    dbn[key].fit(X_train[key])
    X_train[key] = dbn[key].transform(X_train[key])

    for b in range(n_batches):
        X_test[key][b * batch_size:(b + 1) * batch_size] = dbn[key].transform(
            X_test[key][b * batch_size:(b + 1) * batch_size])

#save the Data
np.save('X_train_go', X_train[key])
np.save('y_train', y_train[key])
np.save('X_test_go', X_test[key])
Exemplo n.º 7
0
def svmBagging(SamplingMethode):
    # 读取数据
    train = pd.read_csv("../data/data.csv", header=0)
    # 将数据都变为int型
    for col in train.columns:
        for i in range(1000):
            train[col][i] = int(train[col][i])

    # 归一化处理
    min_max_scaler = preprocessing.MinMaxScaler()
    train = min_max_scaler.fit_transform(train)

    # 分割为train和test两个数据集
    train, test = train_test_split(train, test_size=0.2)

    #À travers Upsampling and LowSampling, Équilibrer les données,
    # C'est à dire, le nombre de bons clients soit le même que le nombre de mauvais clients

    # Le différence entre eux, c'est UpSampling élève le nombre de mauvais clients just qu'aux égal bon clients
    if SamplingMethode == 'upSampling':
        # 这里做上采样
        X_train, y_train = upSampling(train)
        y_train = y_train.reshape(len(y_train), 1)
        train = np.append(y_train, X_train, axis=1)
        print("Apres UpSampling, la quantité des données équal: ", len(train))

    # LowSampling réduit le nombre de bon clients just qu'aux égal mauvais clients
    elif SamplingMethode == 'lowSamoling':
        train = pd.DataFrame(train)
        train = np.array(lowSampling(train))
        print("Apres LowSampling, la quantité des données équal: ", len(train))

    # 切割出EI数据集
    #
    EI = np.array(RepetitionRandomSampling(train, len(train), 0.5))
    EI_train = EI[:, 1:]
    EI_test = EI[:, 0]

    clf_svm = [svm.SVC(kernel='rbf', gamma='scale', C=1.75) for _ in range(40)]
    #clf_svm = [fsvmClass.HYP_SVM(kernel='polynomial', C=1.5, P=1.5) for _ in range(40)]

    # clf_svm = [HYP_SVM(C=1.5) for _ in range(2)]
    bag = Bagging(40, clf_svm, 0.5)
    svms = bag.MutModel_clf(np.array(train), np.array(test))

    result = list()
    for each in svms:
        result.append(each.predict(EI_train))
    #chaque colonne est le résultat de chaque svms

    result = np.array(result)
    trX = np.transpose(result)  #transpose pour chaque ligne est le résultat
    trX = trX.astype(np.float32)
    trY = to_categorical(EI_test)  # devenir une binary class
    trY = trY.astype(np.float32)

    # DLOption为一个用于存储模型超参数的类。
    # 按顺序来是: epoches, learning_rate, batchsize, momentum, penaltyL2,dropoutProb
    opts = DLOption(300, 0.01, 64, 0.01, 0., 0.2)

    # DBN类代表DBN网络类,参数分别为sizes, opts, X
    # 这里的[400,200,100]表示有三层RBM,每一层输出为400,200和100
    dbn = DBN([400, 100], opts, trX)
    # DBN训练
    dbn.train()

    # 这里初始化三层全联接层,前两层全联接层使用已训练好的RBM参数填入进去,最后一层进行fine-turn训练
    # 输入参数分别为sizes, opts, X, Y
    # nn = NN([100], opts, trX, trY)
    # 这里创建的三层,前两层的输出与上面保持一致
    nn = NN([400, 100], opts, trX, trY)
    # 这里加载已经训练好的dbn参数
    nn.load_from_dbn(dbn)
    # 训练最后一层输出层,达到分类效果
    nn.train()

    testX = test[:, 1:]
    testY = test[:, 0]
    test_result = list()
    for each in svms:
        test_result.append(each.predict(testX))
    test_result = np.array(test_result)
    teX = np.transpose(test_result)
    teX = teX.astype(np.float32)
    teY = testY.astype(np.float32)

    # score = Voting(result)

    cm = confusion_matrix(teY, nn.predict(teX))
    sns.heatmap(cm, annot=True, fmt='')
    plt.title('DBN')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    acc = len(teY[teY == nn.predict(teX)]) / len(teY)
    sp = cm[0, 0] / (cm[0, 0] + cm[0, 1])
    se = cm[1, 1] / (cm[1, 0] + cm[1, 1])

    print("accuracy for DBN: ", round(acc, 3))
    print("specifity for DBN: ", round(sp, 3))
    print("Sensitivity for DBN: ", round(se, 3))

    score = Voting(test_result)

    cm = confusion_matrix(teY, score)
    sns.heatmap(cm, annot=True, fmt='')
    plt.title('Voting')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

    acc = len(teY[teY == score]) / len(teY)
    sp = cm[0, 0] / (cm[0, 0] + cm[0, 1])
    se = cm[1, 1] / (cm[1, 0] + cm[1, 1])

    print("accuracy for Voting: ", round(acc, 3))
    print("specifity for Voting: ", round(sp, 3))
    print("Sensitivity for Voting: ", round(se, 3))
Exemplo n.º 8
0
def fsvmBagging(SamplingMethode):
    # 读取数据
    train = pd.read_csv("../data/data.csv", header=0)
    # 将数据都变为int型
    for col in train.columns:
        for i in range(1000):
            train[col][i] = int(train[col][i])

    features = train.columns[1:21]
    X = train[features]
    y = train['Creditability']
    min_max_scaler = preprocessing.MinMaxScaler()
    X = min_max_scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    y_train = np.array(y_train)
    y_train = y_train.reshape(len(y_train), 1)
    train = np.append(y_train, np.array(X_train), axis=1)
    if SamplingMethode == 'upSampling':
        X_train, y_train = upSampling(train)
    elif SamplingMethode == 'lowSampling':
        train = pd.DataFrame(train)
        train = np.array(lowSampling(train))
        X_train = train[:, 1:]
        y_train = train[:, 0]

    X_train = np.asarray(X_train)
    y_train = np.asarray(y_train)
    for i in range(len(y_train)):
        if y_train[i] == 0:
            y_train[i] = -1
    y_test = np.array(y_test)
    for i in range(len(y_test)):
        if y_test[i] == 0:
            y_test[i] = -1
    y_train = np.array(y_train)
    y_train = y_train.reshape(len(y_train), 1)
    train = np.append(y_train, np.array(X_train), axis=1)

    y_test = np.array(y_test)
    y_test = y_test.reshape(len(y_test), 1)
    test = np.append(y_test, np.array(X_test), axis=1)

    # 切割出EI数据集
    # le numbre de sous ensembles de donnée est 0.5*len(train)
    EI = np.array(RepetitionRandomSampling(train, len(train), 0.5))
    EI_train = EI[:, 1:]
    EI_test = EI[:, 0]

    clf_svm = [HYP_SVM(kernel='polynomial', C=1.75, P=0.1) for _ in range(20)]

    # 20 sous ensembles de données, estimator est fsvm, rate est 0.5
    bag = Bagging(20, clf_svm, 0.5, 'fsvm')
    svms = bag.MutModel_clf(np.array(train), np.array(test))

    result = list()
    for each in svms:
        result.append(each.predict(EI_train))

    result = np.array(result)
    trX = np.transpose(result)
    trX = trX.astype(np.float32)

    for i in range(len(EI_test)):
        if EI_test[i] == -1:
            EI_test[i] = 0
    trY = to_categorical(EI_test)
    trY = trY.astype(np.float32)

    # DLOption为一个用于存储模型超参数的类。
    # 按顺序来是: epoches, learning_rate, batchsize, momentum, penaltyL2,dropoutProb
    opts = DLOption(10, 1, 64, 0.01, 0., 0.2)

    # DBN类代表DBN网络类,参数分别为sizes, opts, X
    # 这里的[400,100]表示有两层RBM,每一层输出为400和100 [100,50,10]
    dbn = DBN([100, 50, 10], opts, trX)
    # DBN训练
    dbn.train()

    # 这里初始化三层全联接层,前两层全联接层使用已训练好的RBM参数填入进去,最后一层进行fine-turn训练
    # 输入参数分别为sizes, opts, X, Y
    # nn = NN([100], opts, trX, trY)
    # 这里创建的三层,前两层的输出与上面保持一致
    nn = NN([100, 50, 10], opts, trX, trY)
    # 这里加载已经训练好的dbn参数
    nn.load_from_dbn(dbn)
    # 训练最后一层输出层,达到分类效果
    nn.train()

    testX = test[:, 1:]
    testY = test[:, 0]
    test_result = list()
    for each in svms:
        test_result.append(each.predict(testX))

    test_result = np.array(test_result)
    teX = np.transpose(test_result)
    teX = teX.astype(np.float32)
    for i in range(len(testY)):
        if testY[i] == -1:
            testY[i] = 0
    teY = testY.astype(np.float32)

    # print('Fianl acc: ',teY == nn.predict(teX))

    # score = Voting(result)

    cm = confusion_matrix(teY, nn.predict(teX))
    sns.heatmap(cm, annot=True, fmt='')
    plt.title('DBN')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

    acc = len(teY[teY == nn.predict(teX)]) / len(teY)
    sp = cm[0, 0] / (cm[0, 0] + cm[0, 1])
    se = cm[1, 1] / (cm[1, 0] + cm[1, 1])

    print("accuracy for DBN: ", round(acc, 3))
    print("specifity for DBN: ", round(sp, 3))
    print("Sensitivity for DBN: ", round(se, 3))

    score = Voting(test_result)

    cm = confusion_matrix(teY, score)
    sns.heatmap(cm, annot=True, fmt='')
    plt.title('Voting')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

    acc = len(teY[teY == score]) / len(teY)
    sp = cm[0, 0] / (cm[0, 0] + cm[0, 1])
    se = cm[1, 1] / (cm[1, 0] + cm[1, 1])

    print("accuracy for Voting: ", round(acc, 3))
    print("specifity for Voting: ", round(sp, 3))
    print("Sensitivity for Voting: ", round(se, 3))
Exemplo n.º 9
0
logreg = linear_model.LogisticRegression(C=600.0)

hidt = 512

X_train, X_test = {}, {}
"""--------------------"""

#save the Data
X_train['ppi'] = np.load('X_train_ppi.npy')
X_train['go'] = np.load('X_train_go.npy')
y_train = np.load('y_train.npy')

top_dbn = DBN(hidden_layers_structure=[hidt, hidt, hidt],
              weight_cost=0.001,
              batch_size=4,
              n_epoches=30,
              learning_rate_rbm=[1e-2, 1e-2, 1e-2])

X_train_joint = np.concatenate((X_train['ppi'], X_train['go']), axis=1)
top_dbn.fit(X_train_joint)
X_train_joint = top_dbn.transform(X_train_joint)

logreg.fit(X_train_joint, y_train)

#predict
X_test_joint = np.load('X_test_joint.npy')
"""--------------------
This batch_size is also used due to the limited amount of RAM,
the program can only analyze 10000 samples at a time.
"""