示例#1
0
def train():
    print('data_path: %s' % FLAGS.data_path)
    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, valid_nbest_data, vocab = raw_data
    train_data = chop(train_data, vocab['<eos>'])

    config = MediumConfig()
    if FLAGS.init_scale: config.init_scale = FLAGS.init_scale
    if FLAGS.learning_rate: config.learning_rate = FLAGS.learning_rate
    if FLAGS.max_grad_norm: config.max_grad_norm = FLAGS.max_grad_norm
    if FLAGS.num_layers: config.num_layers = FLAGS.num_layers
    if FLAGS.num_steps: config.num_steps = FLAGS.num_steps
    if FLAGS.hidden_size: config.hidden_size = FLAGS.hidden_size
    if FLAGS.max_epoch: config.max_epoch = FLAGS.max_epoch
    if FLAGS.max_max_epoch: config.max_max_epoch = FLAGS.max_max_epoch
    if FLAGS.keep_prob: config.keep_prob = FLAGS.keep_prob
    if FLAGS.lr_decay: config.lr_decay = FLAGS.lr_decay
    if FLAGS.batch_size: config.batch_size = FLAGS.batch_size
    if FLAGS.opt_method: config.opt_method = FLAGS.opt_method
    if FLAGS.log_dir: config.log_dir = FLAGS.log_dir
    config.h_max_log_smooth = FLAGS.h_max_log_smooth
    config.vocab_size = len(vocab)
    print('init_scale: %.2f' % config.init_scale)
    print('learning_rate: %.2f' % config.learning_rate)
    print('max_grad_norm: %.2f' % config.max_grad_norm)
    print('num_layers: %d' % config.num_layers)
    print('num_steps: %d' % config.num_steps)
    print('hidden_size: %d' % config.hidden_size)
    print('max_epoch: %d' % config.max_epoch)
    print('max_max_epoch: %d' % config.max_max_epoch)
    print('keep_prob: %.2f' % config.keep_prob)
    print('lr_decay: %.2f' % config.lr_decay)
    print('batch_size: %d' % config.batch_size)
    print('vocab_size: %d' % config.vocab_size)
    print('opt_method: %s' % config.opt_method)
    print('log_dir: %s' % config.log_dir)
    print('seed: %d' % FLAGS.seed)
    sys.stdout.flush()

    eval_config = MediumConfig()
    eval_config.init_scale = config.init_scale
    eval_config.learning_rate = config.learning_rate
    eval_config.max_grad_norm = config.max_grad_norm
    eval_config.num_layers = config.num_layers
    eval_config.num_steps = config.num_steps
    eval_config.hidden_size = config.hidden_size
    eval_config.max_epoch = config.max_epoch
    eval_config.max_max_epoch = config.max_max_epoch
    eval_config.keep_prob = config.keep_prob
    eval_config.lr_decay = config.lr_decay
    eval_config.batch_size = 200
    # eval_config.batch_size = config.batch_size
    eval_config.vocab_size = len(vocab)
    eval_config.h_max_log_smooth = config.h_max_log_smooth

    prev = 0
    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = PTBModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = PTBModel(is_training=False, config=eval_config)

        tf.initialize_all_variables().run()
        if FLAGS.model_path:
            saver = tf.train.Saver()

        loss_list = []
        train_perp_list = []
        val_perp_list = []
        val_f1_list = []
        for i in range(config.max_max_epoch):
            shuffle(train_data)
            shuffled_data = list(itertools.chain(*train_data))

            start_time = time.time()
            lr_decay = config.lr_decay**max(i - config.max_epoch, 0.0)
            if config.opt_method == "YF":
                session.run(tf.assign(m.optimizer.lr_factor, lr_decay))
            else:
                m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity, loss = run_epoch(session,
                                               m,
                                               shuffled_data,
                                               m.train_op,
                                               verbose=True,
                                               epoch_id=i)
            loss_list += loss
            print("Epoch: %d Train Perplexity: %.3f" %
                  (i + 1, train_perplexity))
            valid_perplexity, _ = run_epoch(session, mvalid, valid_data,
                                            tf.no_op())
            print("Epoch: %d Valid Perplexity: %.3f" %
                  (i + 1, valid_perplexity))
            valid_f1, num = run_epoch2(session, mvalid, valid_nbest_data,
                                       tf.no_op(), vocab['<eos>'])
            print("Epoch: %d Valid F1: %.2f (%d trees)" %
                  (i + 1, valid_f1, num))
            print('It took %.2f seconds' % (time.time() - start_time))

            #print("summary added step", i * len(loss) )
            summ = tf.Summary(value=[
                tf.Summary.Value(tag="eval_perp",
                                 simple_value=valid_perplexity),
            ])
            m.writer.add_summary(summ, i * len(loss))

            summ = tf.Summary(value=[
                tf.Summary.Value(tag="eval_F1", simple_value=valid_f1),
            ])
            m.writer.add_summary(summ, i * len(loss))
            train_perp_list.append([i * len(loss), train_perplexity])
            val_perp_list.append([i * len(loss), valid_perplexity])
            val_f1_list.append([i * len(loss), valid_f1])

            if prev < valid_f1:
                prev = valid_f1
                if FLAGS.model_path:
                    print('Save a model to %s' % FLAGS.model_path)
                    saver.save(session, FLAGS.model_path)
                    pickle.dump(eval_config,
                                open(FLAGS.model_path + '.config', 'wb'))
            sys.stdout.flush()

            with open(config.log_dir + "/loss.txt", "w") as f:
                np.savetxt(f, np.array(loss_list))
            with open(config.log_dir + "/train_perp.txt", "w") as f:
                np.savetxt(f, np.array(train_perp_list))
            with open(config.log_dir + "/val_perp.txt", "w") as f:
                np.savetxt(f, np.array(val_perp_list))
            with open(config.log_dir + "/val_f1.txt", "w") as f:
                np.savetxt(f, np.array(val_f1_list))
示例#2
0
def train():
  print('data_path: %s' % FLAGS.data_path)
  raw_data = reader.ptb_raw_data3(FLAGS.data_path)
  train_data, silver_path, valid_data, valid_nbest_data, vocab = raw_data
  train_data = chop(train_data, vocab['<eos>'])
  
  config = MediumConfig()
  if FLAGS.init_scale: config.init_scale = FLAGS.init_scale
  if FLAGS.learning_rate: config.learning_rate = FLAGS.learning_rate
  if FLAGS.max_grad_norm: config.max_grad_norm = FLAGS.max_grad_norm
  if FLAGS.num_layers: config.num_layers = FLAGS.num_layers
  if FLAGS.num_steps: config.num_steps = FLAGS.num_steps
  if FLAGS.hidden_size: config.hidden_size = FLAGS.hidden_size
  if FLAGS.max_epoch: config.max_epoch = FLAGS.max_epoch
  if FLAGS.max_max_epoch: config.max_max_epoch = FLAGS.max_max_epoch
  if FLAGS.keep_prob: config.keep_prob = FLAGS.keep_prob
  if FLAGS.lr_decay: config.lr_decay = FLAGS.lr_decay
  if FLAGS.batch_size: config.batch_size = FLAGS.batch_size
  config.vocab_size = len(vocab)
  if FLAGS.silver: config.silver = FLAGS.silver
  print('init_scale: %.2f' % config.init_scale)
  print('learning_rate: %.2f' % config.learning_rate)
  print('max_grad_norm: %.2f' % config.max_grad_norm)
  print('num_layers: %d' % config.num_layers)
  print('num_steps: %d' % config.num_steps)
  print('hidden_size: %d' % config.hidden_size)
  print('max_epoch: %d' % config.max_epoch)
  print('max_max_epoch: %d' % config.max_max_epoch)
  print('keep_prob: %.2f' % config.keep_prob)
  print('lr_decay: %.2f' % config.lr_decay)
  print('batch_size: %d' % config.batch_size)
  print('vocab_size: %d' % config.vocab_size)
  print('silver: %d' % config.silver)
  sys.stdout.flush()
  
  eval_config = MediumConfig()
  eval_config.init_scale = config.init_scale
  eval_config.learning_rate = config.learning_rate
  eval_config.max_grad_norm = config.max_grad_norm
  eval_config.num_layers = config.num_layers
  eval_config.num_steps = config.num_steps
  eval_config.hidden_size = config.hidden_size
  eval_config.max_epoch = config.max_epoch
  eval_config.max_max_epoch = config.max_max_epoch
  eval_config.keep_prob = config.keep_prob
  eval_config.lr_decay = config.lr_decay
  eval_config.batch_size = 200
  eval_config.vocab_size = len(vocab)

  prev = 0 # record F1 scores
  with tf.Graph().as_default(), tf.Session() as session:
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    with tf.variable_scope("model", reuse=None, initializer=initializer):
      m = PTBModel(is_training=True, config=config)
    with tf.variable_scope("model", reuse=True, initializer=initializer):
      mvalid = PTBModel(is_training=False, config=eval_config)

    tf.initialize_all_variables().run()
    if FLAGS.model_path:
      saver = tf.train.Saver()

    silver_generator = reader.file_to_word_ids3(silver_path)
    j = 0
    for i in range(config.max_max_epoch):
      shuffle(train_data)
      shuffled_data = list(itertools.chain(*train_data))
      
      start_time = time.time()
      lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
      m.assign_lr(session, config.learning_rate * lr_decay)
      print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
      train_perplexity = run_epoch(session, m, shuffled_data, m.train_op,
                                   verbose=True)
      print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
      valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
      print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
      valid_f1, num = run_epoch2(session, mvalid, valid_nbest_data,
                                 tf.no_op(), vocab['<eos>'])
      print("Epoch: %d Valid F1: %.2f (%d trees)" % (i + 1, valid_f1, num))
      if valid_f1 > prev:
        prev = valid_f1
        if FLAGS.model_path:
          print('Save a model to %s' % FLAGS.model_path)
          saver.save(session, FLAGS.model_path)
          pickle.dump(eval_config, open(FLAGS.model_path + '.config', 'wb'))
      print('It took %.2f seconds' % (time.time() - start_time))
      sys.stdout.flush()

      start_time = time.time()
      for k in xrange(config.silver):
        try:
          silver_data = silver_generator.next()
        except:
          silver_generator = reader.file_to_word_ids3(silver_path)
          silver_data = silver_generator.next()
        j += 1
        silver_data = chop(silver_data, vocab['<eos>'])
        shuffle(silver_data)
        silver_data = list(itertools.chain(*silver_data))
        silver_perplexity = run_epoch(session, m, silver_data, m.train_op,
                                      verbose=False)
        print("Epoch: %d Silver(%d) Perplexity: %.3f" %
              (i + 1, j, silver_perplexity))
        valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
        print("Epoch: %d Silver(V) Perplexity: %.3f" % (i+1, valid_perplexity))
        
      valid_f1, num = run_epoch2(session, mvalid, valid_nbest_data,
                                 tf.no_op(), vocab['<eos>'])
      print("Epoch: %d Silver(V) F1: %.2f (%d trees)" % (i+1, valid_f1, num))
      if valid_f1 > prev:
        prev = valid_f1
        if FLAGS.model_path:
          print('Save a model to %s' % FLAGS.model_path)
          saver.save(session, FLAGS.model_path)
          pickle.dump(eval_config, open(FLAGS.model_path + '.config', 'wb'))
      print('It took %.2f seconds' % (time.time() - start_time))
      sys.stdout.flush()