def main(): parser = argparse.ArgumentParser() parser.add_argument('--data', default='data/ptb_char') parser.add_argument('--model', required=True) parser.add_argument('--config', required=True) parser.add_argument('--gpu', default=-1, type=int) args = parser.parse_args() with open(args.config, 'r') as f: config = yaml.load(f) pprint(config) text_field = PTBCharTextField() train_dataset, test_dataset = PTBChar.splits( path=args.data, validation=None, text_field=text_field) text_field.build_vocab(train_dataset) test_loader = data.BPTTIterator( dataset=test_dataset, batch_size=1, bptt_len=2000, train=False, device=args.gpu) model = PTBModel(num_chars=len(text_field.vocab), **config['model']) model.load_state_dict(torch.load(args.model)) print(model) num_params = sum(p.numel() for p in model.parameters()) print(f'Total parameters: {num_params}') if args.gpu > -1: model.cuda(args.gpu) model.eval() state = hyper_state = None test_bpc_sum = test_bpc_denom = 0 for test_batch in tqdm(test_loader): test_inputs = test_batch.text test_targets = test_batch.target test_logits, state, hyper_state = model( inputs=test_inputs, state=state, hyper_state=hyper_state) test_loss = sequence_cross_entropy( logits=test_logits, targets=test_targets) test_bpc_sum += (test_loss.data[0] / np.log(2)) * test_inputs.size(0) test_bpc_denom += test_inputs.size(0) test_bpc = test_bpc_sum / test_bpc_denom print(f'Test BPC = {test_bpc:.6f}')
def main(): initialzer = tf.random_uniform_initializer(-0.05, 0.05) with tf.variable_scope("language_model", reuse=None, initializer=initialzer): train_model = PTBModel(True, FLAGS.TRAIN_BATCH_SIZE, FLAGS.TRAIN_NUM_STEP) with tf.variable_scope("language_model", reuse=True, initializer=initialzer): eval_model = PTBModel(False, FLAGS.EVAL_BATCH_SIZE, FLAGS.EVAL_NUM_STEP) with tf.Session() as sess: tf.global_variables_initializer().run() train_batches = make_batches(read_data(FLAGS.TRAIN_DATA), FLAGS.TRAIN_BATCH_SIZE, FLAGS.TRAIN_NUM_STEP) eval_batches = make_batches(read_data(FLAGS.EVAL_DATA), FLAGS.EVAL_BATCH_SIZE, FLAGS.EVAL_NUM_STEP) test_batches = make_batches(read_data(FLAGS.TEST_DATA), FLAGS.EVAL_BATCH_SIZE, FLAGS.EVAL_NUM_STEP) step = 0 for i in range(FLAGS.NUM_EPOCH): print("in iteration :%d " % (i + 1)) step, train_pplx = run_epoch(sess, train_model, train_batches, train_model.train_op, True, step) print("Epoch: %d Train perplexity:%.3f" % (i + 1, train_pplx)) step, eval_pplx = run_epoch(sess, eval_model, eval_batches, tf.no_op(), False, 0) print("Epoch: %d Eval perplexity:%.3f" % (i + 1, eval_pplx)) step, test_pplx = run_epoch(sess, eval_model, test_batches, tf.no_op(), False, 0) print("Test perplexity:%.3f" % test_pplx)
def main(_): # read data raw_data = utils.ptb_raw_data(FLAGS.data_dir, FLAGS.data_name) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) with tf.name_scope("Test"): test_input = PTBInput(config=config, data=test_data, name="Test") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(sess, config.lr * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, sess.run(m.lr))) train_perplexity = run_epoch(sess, m, eval_op=m.train_op) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(sess, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(sess, mtest) print("Test Perplexity: %.3f" % test_perplexity) coord.request_stop() coord.join(threads)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto( log_device_placement=True)) as session: tf.set_random_seed(1) initializer = tf.uniform_unit_scaling_initializer() with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.global_variables_initializer().run() def get_learning_rate(epoch, config): base_lr = config.learning_rate if epoch <= config.nr_epoch_first_stage: return base_lr elif epoch <= config.nr_epoch_second_stage: return base_lr * 0.1 else: return base_lr * 0.01 for i in range(config.max_epoch): m.assign_lr(session, get_learning_rate(i, config)) print("Epoch: %d Learning rate: %f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest, test_data, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity)
def main(_): if not FLAGS.data_path: raise ValueError('Must set --data_path to PTB data directory') gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == 'GPU' ] if FLAGS.num_gpus > len(gpus): raise ValueError('Your machine has only %d gpus ' 'which is less than the requested --num_gpus=%d.' % (len(gpus), FLAGS.num_gpus)) # Genereate words to ids dictionary and convert words to ids raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data # Get hyperparameters config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) # Generate with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss,", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope('Valid'): valid_input = PTBInput(config=config, data=valid_data, name='ValidInput') with tf.variable_scope('Model', reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar('Validation Loss', mvalid.cost) with tf.name_scope('Test'): test_input = PTBInput(config=eval_config, data=test_data, name='TestInput') with tf.variable_scope('Model', reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) # Add ops to collection (tf.add_to_collection), The collection is managed by tensorflow" models = {'Train': m, 'Valid': mvalid, 'Test': mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph( ) # Export the graph, it can be stored in the disk if tf.__version__ < '1.1.0' and FLAGS.num_gpus > 1: raise ValueError( 'num_gpus > 1 is not supported for TensorFlow versions ' 'below 1.1.0') # Parallel config soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) with tf.Graph().as_default(): # Import ops and graph tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() # Use supervisor to save and load checkpoint, pre-train variables sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=soft_placement ) # Used to set config for session with sv.managed_session(config=config_proto) as session: # Times to loop corpusvxcvzxvxvzxvzxvz for i in range(config.max_max_epoch): time1 = time.time() # Calculate learning decay lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print('Epoch: %d Learning rate: %.3f' % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print('Epoch: %d Train Perplexity: %.3f' % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print('Epoch: %d Valid Perplexity: %.3f' % (i + 1, valid_perplexity)) print('One loop used %d s' % time.time() - time1) test_perplexity = run_epoch(session, mtest) print('Epoch: %d Valid Perplexity: %.3f' % test_perplexity) if FLAGS.save_path: print('Saving model to %s.' % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
test_path = os.path.join(data_path, "ptb.test.txt") if not os.path.exists(train_path): raise Exception("no such file.") train_params = Params() test_params = Params() test_params.batch_size = 1 test_params.time_steps = 1 initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.name_scope("Train"): train_input = PTBInputs(train_path, train_params, "TrainInputs") with tf.variable_scope("Model", reuse=None, initializer=initializer): train_model = PTBModel(train_params, train_input, is_training=True) with tf.name_scope("Valid"): valid_input = PTBInputs(valid_path, train_params, "ValidInputs") with tf.variable_scope("Model", reuse=True, initializer=initializer): valid_model = PTBModel(train_params, valid_input) with tf.name_scope("Test"): test_input = PTBInputs(test_path, test_params, "TestInputs") with tf.variable_scope("Model", reuse=True, initializer=initializer): test_model = PTBModel(test_params, test_input) init = tf.global_variables_initializer() sv = tf.train.Supervisor(logdir="test/new_lstm/logs/", init_op=init)