Exemplo n.º 1
0
def train():
  # Prepare data.
  print("Preparing Train & Eval data in %s" % FLAGS.data_dir)

  for d in FLAGS.data_dir, FLAGS.model_dir:
    if not os.path.exists(d):
      os.makedirs(d)

  data = Data(FLAGS.model_dir,FLAGS.data_dir, FLAGS.vocab_size, FLAGS.max_seq_length)
  epoc_steps = len(data.rawTrainPosCorpus) /  FLAGS.batch_size

  print( "Training Data: %d total positive samples, each epoch need %d steps" % (len(data.rawTrainPosCorpus), epoc_steps ) )

  cfg = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
  with tf.Session(config=cfg) as sess:
    model = create_model( sess, data.rawnegSetLen, data.vocab_size, False )

    #setup tensorboard logging
    sw =  tf.summary.FileWriter( logdir=FLAGS.model_dir,  graph=sess.graph, flush_secs=120)
    summary_op = model.add_summaries()
    # This is the training loop.
    step_time, loss, train_acc = 0.0, 0.0, 0.0
    current_step = 0
    previous_accuracies = []
    for epoch in range( FLAGS.max_epoc ):
      epoc_start_Time = time.time()
      for batchId in range( epoc_steps ):
        start_time = time.time()
        source_inputs, tgt_inputs, labels = data.get_train_batch(FLAGS.batch_size)
        model.set_forward_only(False)
        d = model.get_train_feed_dict(source_inputs, tgt_inputs, labels)
        ops = [model.train, summary_op, model.loss, model.train_acc ]
        _, summary, step_loss, step_train_acc = sess.run(ops, feed_dict=d)
        step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
        loss += step_loss / FLAGS.steps_per_checkpoint
        train_acc += step_train_acc / FLAGS.steps_per_checkpoint
        current_step += 1

        # Once in a while, we save checkpoint, print statistics, and run evals.
        if current_step % FLAGS.steps_per_checkpoint == 0:
          print ("global epoc: %.3f, global step %d, learning rate %.4f step-time:%.2f loss:%.4f train_binary_acc:%.4f " %
                 ( float(model.global_step.eval())/ float(epoc_steps), model.global_step.eval(), model.learning_rate.eval(),
                             step_time, step_loss, train_acc ))
          checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt")
          acc_sum = tf.Summary(value=[tf.Summary.Value(tag="train_binary_acc", simple_value=train_acc)])
          sw.add_summary(acc_sum, current_step)

          # #########debugging##########
          # model.set_forward_only(True)
          # sse_index.createIndexFile(model, encoder, os.path.join(FLAGS.model_dir, FLAGS.rawfilename),
          #                           FLAGS.max_seq_length, os.path.join(FLAGS.model_dir, FLAGS.encodedIndexFile), sess,
          #                           batchsize=1000)
          # evaluator = sse_evaluator.Evaluator(model, eval_corpus, os.path.join(FLAGS.model_dir, FLAGS.encodedIndexFile),
          #                                     sess)
          # acc1, acc3, acc10 = evaluator.eval()
          # print("epoc# %.3f, task specific evaluation: top 1/3/10 accuracies: %f / %f / %f " % (float(model.global_step.eval())/ float(epoc_steps), acc1, acc3, acc10))
          # ###end of debugging########

          # Decrease learning rate if no improvement was seen over last 3 times.
          if len(previous_accuracies) > 3 and train_acc < min(previous_accuracies[-2:]):
            sess.run(model.learning_rate_decay_op)
          previous_accuracies.append(train_acc)
          # save currently best-ever model
          if train_acc == max(previous_accuracies):
            print("Better Accuracy %.4f found. Saving current best model ..." % train_acc )
            model.save(sess, checkpoint_path + "-BestEver")
          else:
            print("Best Accuracy is: %.4f, while current round is: %.4f" % (max(previous_accuracies), train_acc) )
            print("skip saving model ...")
          # if finished at least 2 Epocs and still no further accuracy improvement, stop training
          # report the best accuracy number and final model's number and save it.
          if epoch > 10 and train_acc < min(previous_accuracies[-5:]):
            p = model.save(sess, checkpoint_path + "-final")
            print("After around %d Epocs no further improvement, Training finished, wrote checkpoint to %s." % (epoch, p) )
            break

          # reset current checkpoint step statistics
          step_time, loss, train_acc = 0.0, 0.0, 0.0


      epoc_train_time = time.time() - epoc_start_Time
      print('\n\n\nepoch# %d  took %f hours' % ( epoch , epoc_train_time / (60.0 * 60) ) )

      # run task specific evaluation afer each epoch
      if (FLAGS.task_type not in ['ranking', 'crosslingual']) or ( (epoch+1) % 20 == 0 ):
        model.set_forward_only(True)
        sse_index.createIndexFile( model, data.encoder, os.path.join(FLAGS.model_dir, FLAGS.rawfilename), FLAGS.max_seq_length, os.path.join(FLAGS.model_dir, FLAGS.encodedIndexFile), sess, batchsize=1000 )
        evaluator = sse_evaluator.Evaluator(model, data.rawEvalCorpus, os.path.join(FLAGS.model_dir, FLAGS.encodedIndexFile) , sess)
        acc1, acc3, acc10 = evaluator.eval()
        print("epoc#%d, task specific evaluation: top 1/3/10 accuracies: %f / %f / %f \n\n\n" % (epoch, acc1, acc3, acc10) )
      # Save checkpoint at end of each epoch
      checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt")
      model.save(sess, checkpoint_path + '-epoch-%d'%epoch)
      if len(previous_accuracies) > 0:
        print('So far best ever model training binary accuracy is: %.4f ' % max(previous_accuracies) )
Exemplo n.º 2
0
def train():
    # Prepare data.
    print("Preparing Train & Eval data in %s" % FLAGS.data_dir)

    for d in FLAGS.data_dir, FLAGS.model_dir:
        if not os.path.exists(d):
            os.makedirs(d)

    encoded_train_pair_path, encoded_eval_pair_path, encodedFullTargetSpace_path, _, _ = data_utils.prepare_raw_data(
        FLAGS.data_dir, FLAGS.model_dir, FLAGS.src_vocab_size,
        FLAGS.tgt_vocab_size)

    #load full set targetSeqID data
    tgtID_EncodingMap, tgtID_FullLableMap, fullLabel_tgtID_Map, target_inputs, target_lens = load_encodedTargetSpace(
        encodedFullTargetSpace_path)

    #load full set train data
    print("Reading development and training data ...")
    train_set, epoc_steps = read_train_data(encoded_train_pair_path,
                                            tgtID_EncodingMap)
    print("Training Data: %d total samples, each epoch need %d steps" %
          (len(train_set), epoc_steps))

    #load eval data
    eval_src_seqs, eval_src_lens, eval_tgtIDs = get_eval_set(
        encoded_eval_pair_path)

    cfg = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
    with tf.device('/' + FLAGS.device), tf.Session(config=cfg) as sess:
        # Create SSE model and build tensorflow training graph.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.embedding_size))
        model = create_model(sess, len(tgtID_FullLableMap), False)

        #setup evaluation graph
        evaluator = sse_evaluator.Evaluator(model, eval_src_seqs,
                                            eval_src_lens, eval_tgtIDs,
                                            target_inputs, target_lens,
                                            tgtID_FullLableMap, sess)

        #setup tensorboard logging
        sw = tf.train.SummaryWriter(FLAGS.model_dir,
                                    sess.graph,
                                    flush_secs=120)
        summary_op = model.add_summaries()

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_accuracies = []
        for epoch in range(FLAGS.max_epoc):
            epoc_start_Time = time.time()
            random.shuffle(train_set, random.random)
            for batchId in range(epoc_steps -
                                 int(2.5 * FLAGS.steps_per_checkpoint)
                                 ):  #basic drop out here
                start_time = time.time()
                source_inputs, labels, src_lens = [], [], []
                for idx in xrange(FLAGS.batch_size):
                    source_input, src_len, tgtID = train_set[batchId *
                                                             FLAGS.batch_size +
                                                             idx]
                    source_inputs.append(source_input)
                    labels.append(tgtID_FullLableMap[tgtID])
                    src_lens.append(src_len)

                d = model.get_train_feed_dict(source_inputs, target_inputs,
                                              labels, src_lens, target_lens)
                ops = [model.train, summary_op, model.loss]
                _, summary, step_loss = sess.run(ops, feed_dict=d)
                step_time += (time.time() -
                              start_time) / FLAGS.steps_per_checkpoint
                loss += step_loss / FLAGS.steps_per_checkpoint
                current_step += 1

                # Once in a while, we save checkpoint, print statistics, and run evals.
                if current_step % FLAGS.steps_per_checkpoint == 0:
                    print(
                        "global epoc: %.3f, global step %d, learning rate %.4f step-time:%.2f loss:%.4f "
                        % (float(model.global_step.eval()) / float(epoc_steps),
                           model.global_step.eval(),
                           model.learning_rate.eval(), step_time, step_loss))
                    # Save checkpoint and zero timer and loss.
                    checkpoint_path = os.path.join(FLAGS.model_dir,
                                                   "SSE-LSTM.ckpt")
                    model.save(sess,
                               checkpoint_path,
                               global_step=model.global_step)
                    step_time, loss = 0.0, 0.0
                    # Run evals on development set and print their accuracy number.
                    t = time.time()
                    acc1, acc3, acc10 = evaluator.eval()
                    acc_sum = tf.Summary(value=[
                        tf.Summary.Value(tag="acc1", simple_value=acc1),
                        tf.Summary.Value(tag="acc3", simple_value=acc3),
                        tf.Summary.Value(tag="acc10", simple_value=acc10)
                    ])
                    sw.add_summary(acc_sum, current_step)
                    print(
                        "Step %d, top 1/3/10 accuracies: %f / %f / %f, (eval took %f seconds) "
                        % (current_step, acc1, acc3, acc10, time.time() - t))

                    sys.stdout.flush()
                    # Decrease learning rate if no improvement was seen over last 3 times.
                    if len(previous_accuracies) > 2 and acc1 < min(
                            previous_accuracies[-3:]):
                        sess.run(model.learning_rate_decay_op)
                    previous_accuracies.append(acc1)
                    # save currently best-ever model
                    if acc1 == max(previous_accuracies):
                        model.save(sess, checkpoint_path + "-BestEver")
                    # if finished at least 2 Epocs and still no further accuracy improvement, stop training
                    # report the best accuracy number and final model's number and save it.
                    if epoch > 2 and acc1 < min(previous_accuracies[-3:]):
                        p = model.save(sess, checkpoint_path + "-final")
                        print(
                            "After around %d Epocs no further improvement, Training finished, wrote checkpoint to %s."
                            % (epoch, p))
                        print(
                            "Best ever top1 accuracy: %.2f , Final top 1 / 3 / 10 accuracies: %.2f / %.2f / %.2f"
                            % (max(previous_accuracies), acc1, acc3, acc10))
                        break
            #give out epoc statistics
            epoc_train_time = time.time() - epoc_start_Time
            print('epoch# %d  took %f hours' % (epoch, epoc_train_time /
                                                (60.0 * 60)))
            # Save checkpoint at end of each epoch
            checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt")
            model.save(sess, checkpoint_path + '-epoch-%d' % epoch)
            if len(previous_accuracies) > 0:
                print('So far best ever model top1 accuracy is: %.4f ' %
                      max(previous_accuracies))
def train():
    # Prepare data.
    print("Preparing Train & Eval data in %s" % FLAGS.data_dir)

    for d in FLAGS.data_dir, FLAGS.model_dir:
        if not os.path.exists(d):
            os.makedirs(d)

    encoder, train_corpus, dev_corpus, encodedTgtSpace, tgtIdNameMap = data_utils.prepare_raw_data(
        FLAGS.data_dir, FLAGS.model_dir, FLAGS.vocab_size, FLAGS.task_type,
        FLAGS.max_seq_length)

    epoc_steps = int(math.floor(len(train_corpus) / FLAGS.batch_size))

    print(
        "Training Data: %d total samples (pos + neg), each epoch need %d steps"
        % (len(train_corpus), epoc_steps))

    cfg = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)

    with tf.Session(config=cfg) as sess:
        # Create SSE model and build tensorflow training graph.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.embedding_size))
        model = create_model(sess, len(encodedTgtSpace), encoder.vocab_size,
                             False)

        #setup evaluation graph
        evaluator = sse_evaluator.Evaluator(model, dev_corpus, encodedTgtSpace,
                                            sess)

        #setup tensorboard logging
        sw = tf.summary.FileWriter(logdir=FLAGS.model_dir,
                                   graph=sess.graph,
                                   flush_secs=120)
        summary_op = model.add_summaries()

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_accuracies = []
        fullSetTargetIds = set(encodedTgtSpace.keys())
        for epoch in range(FLAGS.max_epoc):
            epoc_start_Time = time.time()
            random.shuffle(train_corpus, random.random)
            for batchId in range(epoc_steps -
                                 int(2.5 * FLAGS.steps_per_checkpoint)
                                 ):  #basic drop out here
                start_time = time.time()
                source_inputs, src_lens, tgt_inputs, tgt_lens, labels  = [], [], [], [], []
                for idx in range(FLAGS.batch_size):
                    source_input, tgtId = train_corpus[batchId *
                                                       FLAGS.batch_size + idx]
                    #add positive pair
                    source_inputs.append(source_input)
                    src_lens.append(
                        source_input.index(text_encoder.PAD_ID) + 1)
                    tgt_inputs.append(encodedTgtSpace[tgtId])
                    tgt_lens.append(
                        encodedTgtSpace[tgtId].index(text_encoder.PAD_ID) + 1)
                    labels.append(1.0)
                    #add negative pair
                    negTgt = random.sample(fullSetTargetIds - set([tgtId]),
                                           1)[0]
                    source_inputs.append(source_input)
                    src_lens.append(
                        source_input.index(text_encoder.PAD_ID) + 1)
                    tgt_inputs.append(encodedTgtSpace[negTgt])
                    tgt_lens.append(
                        encodedTgtSpace[negTgt].index(text_encoder.PAD_ID) + 1)
                    labels.append(0.0)

                d = model.get_train_feed_dict(source_inputs, tgt_inputs,
                                              labels, src_lens, tgt_lens)
                ops = [model.train, summary_op, model.loss]
                _, summary, step_loss = sess.run(ops, feed_dict=d)
                step_time += (time.time() -
                              start_time) / FLAGS.steps_per_checkpoint
                loss += step_loss / FLAGS.steps_per_checkpoint
                current_step += 1

                # Once in a while, we save checkpoint, print statistics, and run evals.
                if current_step % FLAGS.steps_per_checkpoint == 0:
                    print(
                        "global epoc: %.3f, global step %d, learning rate %.4f step-time:%.2f loss:%.4f "
                        % (float(model.global_step.eval()) / float(epoc_steps),
                           model.global_step.eval(),
                           model.learning_rate.eval(), step_time, step_loss))
                    # Save checkpoint and zero timer and loss.
                    checkpoint_path = os.path.join(FLAGS.model_dir,
                                                   "SSE-LSTM.ckpt")
                    # model.save(sess, checkpoint_path, global_step=model.global_step)  #only save better models
                    step_time, loss = 0.0, 0.0
                    # Run evals on development set and print their accuracy number.
                    t = time.time()
                    acc1, acc3, acc10 = evaluator.eval()
                    acc_sum = tf.Summary(value=[
                        tf.Summary.Value(tag="acc1", simple_value=acc1),
                        tf.Summary.Value(tag="acc3", simple_value=acc3),
                        tf.Summary.Value(tag="acc10", simple_value=acc10)
                    ])
                    sw.add_summary(acc_sum, current_step)
                    print(
                        "Step %d, top 1/3/10 accuracies: %f / %f / %f, (eval took %f seconds) "
                        % (current_step, acc1, acc3, acc10, time.time() - t))
                    sys.stdout.flush()
                    # Decrease learning rate if no improvement was seen over last 3 times.
                    if len(previous_accuracies) > 2 and acc1 < min(
                            previous_accuracies[-3:]):
                        sess.run(model.learning_rate_decay_op)
                    previous_accuracies.append(acc1)
                    # save currently best-ever model
                    if acc1 == max(previous_accuracies):
                        print(
                            "Better Accuracy %f found. Saving current best model ..."
                            % acc1)
                        model.save(sess, checkpoint_path + "-BestEver")
                    else:
                        print(
                            "Best Accuracy is: %f, while current round is: %f"
                            % (max(previous_accuracies), acc1))
                        print("skip saving model ...")
                    # if finished at least 2 Epocs and still no further accuracy improvement, stop training
                    # report the best accuracy number and final model's number and save it.
                    if epoch > 2 and acc1 < min(previous_accuracies[-3:]):
                        p = model.save(sess, checkpoint_path + "-final")
                        print(
                            "After around %d Epocs no further improvement, Training finished, wrote checkpoint to %s."
                            % (epoch, p))
                        print(
                            "Best ever top1 accuracy: %.2f , Final top 1 / 3 / 10 accuracies: %.2f / %.2f / %.2f"
                            % (max(previous_accuracies), acc1, acc3, acc10))
                        break
            #give out epoc statistics
            epoc_train_time = time.time() - epoc_start_Time
            print('epoch# %d  took %f hours' % (epoch, epoc_train_time /
                                                (60.0 * 60)))
            # Save checkpoint at end of each epoch
            checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt")
            model.save(sess, checkpoint_path + '-epoch-%d' % epoch)
            if len(previous_accuracies) > 0:
                print('So far best ever model top1 accuracy is: %.4f ' %
                      max(previous_accuracies))