Пример #1
0
def test_balanced_batch_generator(data, sampler):
    X, y = data
    batch_size = 10
    training_generator, steps_per_epoch = balanced_batch_generator(
        X,
        y,
        sample_weight=None,
        sampler=sampler,
        batch_size=batch_size,
        random_state=42)

    learning_rate = 0.01
    epochs = 10
    input_size = X.shape[1]
    output_size = 3

    # helper functions
    def init_weights(shape):
        return tf.Variable(tf.random_normal(shape, stddev=0.01))

    def accuracy(y_true, y_pred):
        return np.mean(np.argmax(y_pred, axis=1) == y_true)

    # input and output
    data = tf.placeholder("float32", shape=[None, input_size])
    targets = tf.placeholder("int32", shape=[None])

    # build the model and weights
    W = init_weights([input_size, output_size])
    b = init_weights([output_size])
    out_act = tf.nn.sigmoid(tf.matmul(data, W) + b)

    # build the loss, predict, and train operator
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=out_act, labels=targets)
    loss = tf.reduce_sum(cross_entropy)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.minimize(loss)
    predict = tf.nn.softmax(out_act)

    # Initialization of all variables in the graph
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)

        for e in range(epochs):
            for i in range(steps_per_epoch):
                X_batch, y_batch = next(training_generator)
                sess.run([train_op, loss],
                         feed_dict={
                             data: X_batch,
                             targets: y_batch
                         })

            # For each epoch, run accuracy on train and test
            predicts_train = sess.run(predict, feed_dict={data: X})
            print("epoch: {} train accuracy: {:.3f}".format(
                e, accuracy(y, predicts_train)))
Пример #2
0
 def __init__(self, x, y, datagen, batch_size):
     self.datagen = datagen
     self.batch_size = batch_size
     self._shape = x.shape
     datagen.fit(x)
     self.gen, self.steps_per_epochs = balanced_batch_generator(x.reshape(x.shape[0], -1),
                                                                y,
                                                                sampler = RandomOverSampler(),
                                                                batch_size = self.batch_size,
                                                                keep_sparse = True)
Пример #3
0
def test_balanced_batch_generator(sampler):
    X, y = load_iris(return_X_y=True)
    X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
    X = X.astype(np.float32)

    batch_size = 10
    training_generator, steps_per_epoch = balanced_batch_generator(
        X, y, sample_weight=None, sampler=sampler,
        batch_size=batch_size, random_state=42)

    learning_rate = 0.01
    epochs = 10
    input_size = X.shape[1]
    output_size = 3

    # helper functions
    def init_weights(shape):
        return tf.Variable(tf.random_normal(shape, stddev=0.01))

    def accuracy(y_true, y_pred):
        return np.mean(np.argmax(y_pred, axis=1) == y_true)

    # input and output
    data = tf.placeholder("float32", shape=[None, input_size])
    targets = tf.placeholder("int32", shape=[None])

    # build the model and weights
    W = init_weights([input_size, output_size])
    b = init_weights([output_size])
    out_act = tf.nn.sigmoid(tf.matmul(data, W) + b)

    # build the loss, predict, and train operator
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=out_act, labels=targets)
    loss = tf.reduce_sum(cross_entropy)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.minimize(loss)
    predict = tf.nn.softmax(out_act)

    # Initialization of all variables in the graph
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)

        for e in range(epochs):
            for i in range(steps_per_epoch):
                X_batch, y_batch = next(training_generator)
                sess.run([train_op, loss],
                         feed_dict={data: X_batch, targets: y_batch})

            # For each epoch, run accuracy on train and test
            predicts_train = sess.run(predict, feed_dict={data: X})
            print("epoch: {} train accuracy: {:.3f}"
                  .format(e, accuracy(y, predicts_train)))
Пример #4
0
def test_balanced_batch_generator_function_sparse(data, keep_sparse):
    X, y = data

    training_generator, steps_per_epoch = balanced_batch_generator(
        sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
        random_state=42)
    for idx in range(steps_per_epoch):
        X_batch, y_batch = next(training_generator)
        if keep_sparse:
            assert sparse.issparse(X_batch)
        else:
            assert not sparse.issparse(X_batch)
Пример #5
0
def test_balanced_batch_generator_function_sparse(keep_sparse):
    X, y = load_iris(return_X_y=True)
    X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
    X = X.astype(np.float32)

    training_generator, steps_per_epoch = balanced_batch_generator(
        sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
        random_state=42)
    for idx in range(steps_per_epoch):
        X_batch, y_batch = next(training_generator)
        if keep_sparse:
            assert sparse.issparse(X_batch)
        else:
            assert not sparse.issparse(X_batch)
Пример #6
0
def test_balanced_batch_generator_function_sparse(data, keep_sparse):
    X, y = data

    training_generator, steps_per_epoch = balanced_batch_generator(
        sparse.csr_matrix(X),
        y,
        keep_sparse=keep_sparse,
        batch_size=10,
        random_state=42)
    for idx in range(steps_per_epoch):
        X_batch, y_batch = next(training_generator)
        if keep_sparse:
            assert sparse.issparse(X_batch)
        else:
            assert not sparse.issparse(X_batch)
Пример #7
0
def test_balanced_batch_generator_function_sparse(keep_sparse):
    X, y = load_iris(return_X_y=True)
    X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
    X = X.astype(np.float32)

    training_generator, steps_per_epoch = balanced_batch_generator(
        sparse.csr_matrix(X),
        y,
        keep_sparse=keep_sparse,
        batch_size=10,
        random_state=42)
    for idx in range(steps_per_epoch):
        X_batch, y_batch = next(training_generator)
        if keep_sparse:
            assert sparse.issparse(X_batch)
        else:
            assert not sparse.issparse(X_batch)
Пример #8
0
def train():

    tensorboard_dir = 'E:/Easy_TextCnn_Rnn-master1/tensorboard/Text_Rnn'
    save_dir = 'E:/Easy_TextCnn_Rnn-master1/checkpoints/Text_Rnn'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    tf.summary.scalar('loss', model.loss)
    tf.summary.scalar('accuracy', model.accuracy)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)
    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    x_train, y_train = process(pm.train_filename,
                               wordid,
                               cat_to_id,
                               max_length=250)  #x_train是标签,y_train是文本数据
    #x_test, y_test = process(pm.test_filename, wordid, cat_to_id, max_length=250)
    val_x, val_y = process(pm.val_filename, wordid, cat_to_id, max_length=250)
    '''
    x_train, y_train = make_classification()
    #通过设置RandomUnderSampler中的replacement=True参数,可以实现自助法(boostrap)抽样
    #通过设置RandomUnderSampler中的ratio参数,可以设置数据采样比例
    rus = RandomUnderSampler(random_state = 0,replacement = True,sampling_strategy = {0:4251,1:4251})#采用随机欠采样(下采样)random_state = 0,sampling_strategy = 0.2
    #pipe = make_pipeline(
            #SMOTE(sampling_strategy = {0:4250}),
            #NearMiss(sampling_strategy = {1:4250})
                         #)
    #x_resample,y_resample=pipe.fit_resample(x_train,y_train)
    x_resample,y_resample=rus.fit_sample(x_train,y_train)
    '''
    '''
    data1=x_train[x_train['label']=='负']
    data0=x_train[x_train['label']=='正']
    index = np.random.randint(
            len(data1),size=1*(len(x_train)-len(data1)))
    lower_data1 = data1.iloc[list(index)]#下采样
    '''
    '''
    ratio = {0:4251,1:4251}
    x_imb,y_imb = make_imbalance(x_train, y_train,ratio=ratio)
    #x_imb = np.array(x_imb)
    #y_imb = np.array(y_imb)
    '''
    '''
    model_RandomUnderSampler = RandomUnderSampler()
    x_RandomUnderSample_resampled,y_RandomUnderSample_resampled = model_RandomUnderSampler.fit_sample(x_train, y_train)
    #RandomUnderSampler_resampled=pd.concat([x_RandomUnderSample_resampled,y_RandomUnderSample_resampled],axis=1)
    '''
    '''
    x_train, y_train = make_classification()
    ee=EasyEnsembleClassifier(random_state=0,sampling_strategy='majority')#sampling_strategy=0.2
    #x_resampled,y_resampled == ee.fit_sample(x_train, y_train)
    #ee.fit(x_train, y_train)
    x_train,y_train == ee.fit(x_train, y_train)
    '''

    #class_dict = dict()
    #class_dict[0] = 4251;class_dict[1] = 4251
    #x_train,y_train = make_imbalance(x_train,y_train,class_dict)
    #0表示正样本,1表示负样本
    training_generator, steps_per_epoch = balanced_batch_generator(
        x_train,
        y_train,
        sampler=RandomUnderSampler(sampling_strategy={
            0: 4251,
            1: 4251
        }),
        batch_size=pm.batch_size,
        random_state=42)  #sample_weight = None,
    for epoch in range(pm.num_epochs):
        print('Epoch:', epoch + 1)
        #num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1
        for i in range(steps_per_epoch):
            x_batch, y_batch = next(training_generator)
            #feed_dict = dict(y_batch,x_batch)
            seq_len = sequence(x_batch)
            feed_dict = model.feed_data(x_batch, y_batch, seq_len,
                                        pm.keep_prob)
            #feed_dict[pm.input_y]=y_batch;feed_dict[targets] = x_batch
            _, global_step, _summary, train_loss, train_accuracy = session.run(
                [
                    model.optimizer, model.global_step, merged_summary,
                    model.loss, model.accuracy
                ],
                feed_dict=feed_dict)
            if global_step % 100 == 0:
                #test_loss, test_accuracy = model.evaluate(session, x_test, y_test)
                val_loss, val_accuracy = model.evaluate(session, val_x, val_y)
                print(
                    'global_step:',
                    global_step,
                    'train_loss:',
                    train_loss,
                    'train_accuracy:',
                    train_accuracy,
                    #'test_loss:', test_loss, 'test_accuracy:', test_accuracy)
                    'val_loss:',
                    val_loss,
                    'val_accuracy:',
                    val_accuracy)
                #label, proba_label, pre_label = model.getprob(session, x_test, y_test)
                label, proba_label, pre_label = model.getprob(
                    session, val_x, val_y)
                label = np.argmax(label, 1).tolist()
                AUC = getAUC(proba_label, label)
                #                print(np.argmax(label, 1).tolist()[:10])
                #                print(proba_label[:10])
                #                print(pre_label[:10])

                ACC, SN, SP, Precision, F1, MCC, TP, FN, FP, TN = performance(
                    label, pre_label)  ##
                #print('ACC:%.3f SN:%.3f SP:%.3f Precision:%.3f F1:%.3f MCC:%.3f AUC:%.3f' %(ACC, SN, SP, Precision, F1, MCC, AUC))##
                print(
                    'ACC:%.3f SN:%.3f SP:%.3f Precision:%.3f F1:%.3f MCC:%.3f AUC:%.3f TP:%d,FN:%d,FP:%d,TN:%d'
                    % (ACC, SN, SP, Precision, F1, MCC, AUC, TP, FN, FP, TN))
                #print('test_AUC:', AUC)
                print('val_AUC:', AUC)
                #print('Saving Model...')
                #saver.save(session, save_path, global_step=global_step)
            if global_step % steps_per_epoch == 0:
                #if global_step % num_batchs == 0:
                pre_ = []
                for i in range(0, 400):
                    pre_.append(proba_label[i][1])
                #np.savetxt(r'E:\Easy_TextCnn_Rnn-master1\TextRnn'+"/scores-val-1.data",pre_,fmt="%f",delimiter="\t")
                print('Saving Model...')
                saver.save(session, save_path, global_step=global_step)

        pm.learning_rate *= pm.lr_decay
    '''