def __init__(self, _num_words, _num_class, _vocab_file, _language): self._graph = tf.Graph() with self._graph.as_default(): self._model = TextCNN(sequenceLength=_num_words, numClasses=_num_class, vocabSize=10000, embeddingSize=options.embedding_dim, kernelSizes=list( map(int, options.kernel_sizes.split(","))), numKernels=options.num_kernels, l2RegLambda=options.l2_reg_lambda) self._sess = tf.Session(graph=self._graph) self._vocab_file = _vocab_file self._num_words = _num_words self._num_class = _num_class self._language = _language
xEVAL, yEVAL = open_data.open_data_and_labels(options.dev_data, options.vocab_file, options.num_words, options.language) np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(train_label))) xTrain = train_data[shuffle_indices] yTrain = train_label[shuffle_indices] del train_data, train_label sess = tf.Session() with sess.as_default(): cnn = TextCNN( sequenceLength = options.num_words, numClasses=options.num_class, vocabSize=10000, embeddingSize=options.embedding_dim, kernelSizes=list(map(int, options.kernel_sizes.split(","))), numKernels=options.num_kernels, l2RegLambda=options.l2_reg_lambda ) global_step = tf.Variable(0, name='globalStep', trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) sess.run(tf.global_variables_initializer()) _saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000) def trainStep(xBatch, yBatch): feed_dict = {cnn.inputX: xBatch, cnn.inputY: yBatch,
# Split train/test set dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] # Training with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.allow_soft_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity grad_summaries = [] for g, v in grads_and_vars: if g is not None:
##### Training ################################### with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement = allow_soft_placement, log_device_placement = log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length= trainX.shape[1], num_classes= trainY.shape[1], vocab_size= vocab_size, embedding_size=embedding_dim, filter_sizes=filter_sizes, num_filters=num_filters, l2_reg_lambda=L2_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(0.001) #learning rate = 0.001 grads_and_vars = optimizer.compute_gradients(cnn.loss) #it returns a list of gradients and variables. W, dW, b, db etc. train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) #updates variable #train_op = optimizer.minimize(cnn.loss, global_step=global_step) # Keep track of gradient values and sparsity (optional)
x_train, x_dev = x_data[:validdata_size], x_data[validdata_size:] y_train, y_dev = y_data[:validdata_size], y_data[validdata_size:] # print "train data size is {:d} , and train lable is {:d}".format( len( x_train ) , len( y_train ) ) # print "valid data size is {:d} , and valid lable is {:d}".format( len( x_dev ) , len( y_dev ) ) """ # Training #================================================= """ with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): cnn = TextCNN( sequence_length = FLAGS.sequence_length ,\ num_classes = y_train.shape[ 1 ] ,\ embedding_size = FLAGS.embedding_dim , \ filter_sizes = list( map( int , FLAGS.filter_sizes.split( "," ) ) ) , \ num_filters = FLAGS.num_filters , \ l2_reg_lambda = FLAGS.l2_reg_lambda ) #define the Training produce global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_var = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_var, global_step=global_step) #output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print "Writing to {} \n".format(out_dir)
def train(x, y, pretrained_embedding_filter): # Split data into developement set and training set x_train, y_train, x_dev, y_dev = split_data(x, y, FLAGS.devset_percentage) # Training with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sentence_len=x_train.shape[1], vocab_size=pretrained_embedding_filter.shape[0], embedding_size=pretrained_embedding_filter.shape[1], static_embedding_filter=pretrained_embedding_filter, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, num_classes=y_train.shape[1], l2_reg_lambda=FLAGS.l2_reg_lambda) # Define training precedure global_step = tf.Variable(tf.constant(0), name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) train_op = optimizer.minimize(cnn.loss, global_step=global_step) # Output directory for model timestamp = str(time.time()) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) #Checkpoint_directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initializer all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.inputs: x_batch, cnn.labels: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, loss, accuracy = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) def dev_step(x_batch, y_batch): """ A single developement step """ feed_dict = { cnn.inputs: x_batch, cnn.labels: y_batch, cnn.dropout_keep_prob: 1.0 } step, loss, accuracy = sess.run( [global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) return loss, accuracy # Batch Gradient iter best_loss = 0.0 final_accuracy = 0.0 patience = 0 should_stop = False num_batches = int((len(y_train) - 1) / FLAGS.batch_size + 1) start_training_time = datetime.datetime.now().isoformat() for epoch in range(FLAGS.num_epochs): if should_stop: break shuffled_indices = np.random.permutation( np.arange(len(y_train))) x_shuffled = x_train[shuffled_indices] y_shuffled = y_train[shuffled_indices] for batch in range(num_batches): start_index = batch * FLAGS.batch_size end_index = min(start_index + FLAGS.batch_size, len(y_train)) train_step(x_shuffled[start_index:end_index], y_shuffled[start_index:end_index]) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("Evaluation...") loss_value, accuracy_value = dev_step(x_dev, y_dev) print("") if current_step == FLAGS.evaluate_every or loss_value < best_loss: patience -= patience print(best_loss, loss_value) best_loss = loss_value final_accuracy = accuracy_value path = saver.save(sess, checkpoint_prefix, global_step=current_step) print( "Saved model checkpoint to {}\n".format(path)) else: patience += 1 if patience > FLAGS.patience_threshold: should_stop = True print("Early stopping after {} step".format( current_step)) break print("Accuracy: {}, Loss: {}".format(final_accuracy, best_loss)) print("Training Completed!") end_training_time = datetime.datetime.now().isoformat() print("Started training: {}\nCompleted Training: {}".format( start_training_time, end_training_time))