def test_balanced_batch_generator(data, sampler): X, y = data batch_size = 10 training_generator, steps_per_epoch = balanced_batch_generator( X, y, sample_weight=None, sampler=sampler, batch_size=batch_size, random_state=42) learning_rate = 0.01 epochs = 10 input_size = X.shape[1] output_size = 3 # helper functions def init_weights(shape): return tf.Variable(tf.random_normal(shape, stddev=0.01)) def accuracy(y_true, y_pred): return np.mean(np.argmax(y_pred, axis=1) == y_true) # input and output data = tf.placeholder("float32", shape=[None, input_size]) targets = tf.placeholder("int32", shape=[None]) # build the model and weights W = init_weights([input_size, output_size]) b = init_weights([output_size]) out_act = tf.nn.sigmoid(tf.matmul(data, W) + b) # build the loss, predict, and train operator cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=out_act, labels=targets) loss = tf.reduce_sum(cross_entropy) optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(loss) predict = tf.nn.softmax(out_act) # Initialization of all variables in the graph init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for e in range(epochs): for i in range(steps_per_epoch): X_batch, y_batch = next(training_generator) sess.run([train_op, loss], feed_dict={ data: X_batch, targets: y_batch }) # For each epoch, run accuracy on train and test predicts_train = sess.run(predict, feed_dict={data: X}) print("epoch: {} train accuracy: {:.3f}".format( e, accuracy(y, predicts_train)))
def __init__(self, x, y, datagen, batch_size): self.datagen = datagen self.batch_size = batch_size self._shape = x.shape datagen.fit(x) self.gen, self.steps_per_epochs = balanced_batch_generator(x.reshape(x.shape[0], -1), y, sampler = RandomOverSampler(), batch_size = self.batch_size, keep_sparse = True)
def test_balanced_batch_generator(sampler): X, y = load_iris(return_X_y=True) X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40}) X = X.astype(np.float32) batch_size = 10 training_generator, steps_per_epoch = balanced_batch_generator( X, y, sample_weight=None, sampler=sampler, batch_size=batch_size, random_state=42) learning_rate = 0.01 epochs = 10 input_size = X.shape[1] output_size = 3 # helper functions def init_weights(shape): return tf.Variable(tf.random_normal(shape, stddev=0.01)) def accuracy(y_true, y_pred): return np.mean(np.argmax(y_pred, axis=1) == y_true) # input and output data = tf.placeholder("float32", shape=[None, input_size]) targets = tf.placeholder("int32", shape=[None]) # build the model and weights W = init_weights([input_size, output_size]) b = init_weights([output_size]) out_act = tf.nn.sigmoid(tf.matmul(data, W) + b) # build the loss, predict, and train operator cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=out_act, labels=targets) loss = tf.reduce_sum(cross_entropy) optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(loss) predict = tf.nn.softmax(out_act) # Initialization of all variables in the graph init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for e in range(epochs): for i in range(steps_per_epoch): X_batch, y_batch = next(training_generator) sess.run([train_op, loss], feed_dict={data: X_batch, targets: y_batch}) # For each epoch, run accuracy on train and test predicts_train = sess.run(predict, feed_dict={data: X}) print("epoch: {} train accuracy: {:.3f}" .format(e, accuracy(y, predicts_train)))
def test_balanced_batch_generator_function_sparse(data, keep_sparse): X, y = data training_generator, steps_per_epoch = balanced_batch_generator( sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10, random_state=42) for idx in range(steps_per_epoch): X_batch, y_batch = next(training_generator) if keep_sparse: assert sparse.issparse(X_batch) else: assert not sparse.issparse(X_batch)
def test_balanced_batch_generator_function_sparse(keep_sparse): X, y = load_iris(return_X_y=True) X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40}) X = X.astype(np.float32) training_generator, steps_per_epoch = balanced_batch_generator( sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10, random_state=42) for idx in range(steps_per_epoch): X_batch, y_batch = next(training_generator) if keep_sparse: assert sparse.issparse(X_batch) else: assert not sparse.issparse(X_batch)
def train(): tensorboard_dir = 'E:/Easy_TextCnn_Rnn-master1/tensorboard/Text_Rnn' save_dir = 'E:/Easy_TextCnn_Rnn-master1/checkpoints/Text_Rnn' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, 'best_validation') tf.summary.scalar('loss', model.loss) tf.summary.scalar('accuracy', model.accuracy) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) x_train, y_train = process(pm.train_filename, wordid, cat_to_id, max_length=250) #x_train是标签,y_train是文本数据 #x_test, y_test = process(pm.test_filename, wordid, cat_to_id, max_length=250) val_x, val_y = process(pm.val_filename, wordid, cat_to_id, max_length=250) ''' x_train, y_train = make_classification() #通过设置RandomUnderSampler中的replacement=True参数,可以实现自助法(boostrap)抽样 #通过设置RandomUnderSampler中的ratio参数,可以设置数据采样比例 rus = RandomUnderSampler(random_state = 0,replacement = True,sampling_strategy = {0:4251,1:4251})#采用随机欠采样(下采样)random_state = 0,sampling_strategy = 0.2 #pipe = make_pipeline( #SMOTE(sampling_strategy = {0:4250}), #NearMiss(sampling_strategy = {1:4250}) #) #x_resample,y_resample=pipe.fit_resample(x_train,y_train) x_resample,y_resample=rus.fit_sample(x_train,y_train) ''' ''' data1=x_train[x_train['label']=='负'] data0=x_train[x_train['label']=='正'] index = np.random.randint( len(data1),size=1*(len(x_train)-len(data1))) lower_data1 = data1.iloc[list(index)]#下采样 ''' ''' ratio = {0:4251,1:4251} x_imb,y_imb = make_imbalance(x_train, y_train,ratio=ratio) #x_imb = np.array(x_imb) #y_imb = np.array(y_imb) ''' ''' model_RandomUnderSampler = RandomUnderSampler() x_RandomUnderSample_resampled,y_RandomUnderSample_resampled = model_RandomUnderSampler.fit_sample(x_train, y_train) #RandomUnderSampler_resampled=pd.concat([x_RandomUnderSample_resampled,y_RandomUnderSample_resampled],axis=1) ''' ''' x_train, y_train = make_classification() ee=EasyEnsembleClassifier(random_state=0,sampling_strategy='majority')#sampling_strategy=0.2 #x_resampled,y_resampled == ee.fit_sample(x_train, y_train) #ee.fit(x_train, y_train) x_train,y_train == ee.fit(x_train, y_train) ''' #class_dict = dict() #class_dict[0] = 4251;class_dict[1] = 4251 #x_train,y_train = make_imbalance(x_train,y_train,class_dict) #0表示正样本,1表示负样本 training_generator, steps_per_epoch = balanced_batch_generator( x_train, y_train, sampler=RandomUnderSampler(sampling_strategy={ 0: 4251, 1: 4251 }), batch_size=pm.batch_size, random_state=42) #sample_weight = None, for epoch in range(pm.num_epochs): print('Epoch:', epoch + 1) #num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1 for i in range(steps_per_epoch): x_batch, y_batch = next(training_generator) #feed_dict = dict(y_batch,x_batch) seq_len = sequence(x_batch) feed_dict = model.feed_data(x_batch, y_batch, seq_len, pm.keep_prob) #feed_dict[pm.input_y]=y_batch;feed_dict[targets] = x_batch _, global_step, _summary, train_loss, train_accuracy = session.run( [ model.optimizer, model.global_step, merged_summary, model.loss, model.accuracy ], feed_dict=feed_dict) if global_step % 100 == 0: #test_loss, test_accuracy = model.evaluate(session, x_test, y_test) val_loss, val_accuracy = model.evaluate(session, val_x, val_y) print( 'global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy, #'test_loss:', test_loss, 'test_accuracy:', test_accuracy) 'val_loss:', val_loss, 'val_accuracy:', val_accuracy) #label, proba_label, pre_label = model.getprob(session, x_test, y_test) label, proba_label, pre_label = model.getprob( session, val_x, val_y) label = np.argmax(label, 1).tolist() AUC = getAUC(proba_label, label) # print(np.argmax(label, 1).tolist()[:10]) # print(proba_label[:10]) # print(pre_label[:10]) ACC, SN, SP, Precision, F1, MCC, TP, FN, FP, TN = performance( label, pre_label) ## #print('ACC:%.3f SN:%.3f SP:%.3f Precision:%.3f F1:%.3f MCC:%.3f AUC:%.3f' %(ACC, SN, SP, Precision, F1, MCC, AUC))## print( 'ACC:%.3f SN:%.3f SP:%.3f Precision:%.3f F1:%.3f MCC:%.3f AUC:%.3f TP:%d,FN:%d,FP:%d,TN:%d' % (ACC, SN, SP, Precision, F1, MCC, AUC, TP, FN, FP, TN)) #print('test_AUC:', AUC) print('val_AUC:', AUC) #print('Saving Model...') #saver.save(session, save_path, global_step=global_step) if global_step % steps_per_epoch == 0: #if global_step % num_batchs == 0: pre_ = [] for i in range(0, 400): pre_.append(proba_label[i][1]) #np.savetxt(r'E:\Easy_TextCnn_Rnn-master1\TextRnn'+"/scores-val-1.data",pre_,fmt="%f",delimiter="\t") print('Saving Model...') saver.save(session, save_path, global_step=global_step) pm.learning_rate *= pm.lr_decay '''