Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', default='data/ptb_char')
    parser.add_argument('--model', required=True)
    parser.add_argument('--config', required=True)
    parser.add_argument('--gpu', default=-1, type=int)
    args = parser.parse_args()

    with open(args.config, 'r') as f:
        config = yaml.load(f)
    pprint(config)

    text_field = PTBCharTextField()
    train_dataset, test_dataset = PTBChar.splits(
        path=args.data, validation=None, text_field=text_field)
    text_field.build_vocab(train_dataset)

    test_loader = data.BPTTIterator(
        dataset=test_dataset, batch_size=1, bptt_len=2000, train=False,
        device=args.gpu)

    model = PTBModel(num_chars=len(text_field.vocab), **config['model'])
    model.load_state_dict(torch.load(args.model))
    print(model)
    num_params = sum(p.numel() for p in model.parameters())
    print(f'Total parameters: {num_params}')

    if args.gpu > -1:
        model.cuda(args.gpu)

    model.eval()

    state = hyper_state = None
    test_bpc_sum = test_bpc_denom = 0
    for test_batch in tqdm(test_loader):
        test_inputs = test_batch.text
        test_targets = test_batch.target
        test_logits, state, hyper_state = model(
            inputs=test_inputs, state=state,  hyper_state=hyper_state)
        test_loss = sequence_cross_entropy(
            logits=test_logits, targets=test_targets)
        test_bpc_sum += (test_loss.data[0] / np.log(2)) * test_inputs.size(0)
        test_bpc_denom += test_inputs.size(0)
    test_bpc = test_bpc_sum / test_bpc_denom

    print(f'Test BPC = {test_bpc:.6f}')
Exemplo n.º 2
0
def main():
    initialzer = tf.random_uniform_initializer(-0.05, 0.05)

    with tf.variable_scope("language_model",
                           reuse=None,
                           initializer=initialzer):
        train_model = PTBModel(True, FLAGS.TRAIN_BATCH_SIZE,
                               FLAGS.TRAIN_NUM_STEP)

    with tf.variable_scope("language_model",
                           reuse=True,
                           initializer=initialzer):
        eval_model = PTBModel(False, FLAGS.EVAL_BATCH_SIZE,
                              FLAGS.EVAL_NUM_STEP)

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        train_batches = make_batches(read_data(FLAGS.TRAIN_DATA),
                                     FLAGS.TRAIN_BATCH_SIZE,
                                     FLAGS.TRAIN_NUM_STEP)

        eval_batches = make_batches(read_data(FLAGS.EVAL_DATA),
                                    FLAGS.EVAL_BATCH_SIZE, FLAGS.EVAL_NUM_STEP)

        test_batches = make_batches(read_data(FLAGS.TEST_DATA),
                                    FLAGS.EVAL_BATCH_SIZE, FLAGS.EVAL_NUM_STEP)

        step = 0
        for i in range(FLAGS.NUM_EPOCH):
            print("in iteration :%d " % (i + 1))
            step, train_pplx = run_epoch(sess, train_model, train_batches,
                                         train_model.train_op, True, step)
            print("Epoch: %d Train perplexity:%.3f" % (i + 1, train_pplx))

            step, eval_pplx = run_epoch(sess, eval_model, eval_batches,
                                        tf.no_op(), False, 0)
            print("Epoch: %d Eval perplexity:%.3f" % (i + 1, eval_pplx))

        step, test_pplx = run_epoch(sess, eval_model, test_batches, tf.no_op(),
                                    False, 0)
        print("Test perplexity:%.3f" % test_pplx)
Exemplo n.º 3
0
def main(_):

    # read data
    raw_data = utils.ptb_raw_data(FLAGS.data_dir, FLAGS.data_name)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()

    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)

    with tf.name_scope("Train"):
        train_input = PTBInput(config=config,
                               data=train_data,
                               name="TrainInput")
        with tf.variable_scope("Model", reuse=None, initializer=initializer):
            m = PTBModel(is_training=True, config=config, input_=train_input)

    with tf.name_scope("Valid"):
        valid_input = PTBInput(config=config,
                               data=valid_data,
                               name="ValidInput")
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            mvalid = PTBModel(is_training=False,
                              config=config,
                              input_=valid_input)

    with tf.name_scope("Test"):
        test_input = PTBInput(config=config, data=test_data, name="Test")
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            mtest = PTBModel(is_training=False,
                             config=eval_config,
                             input_=test_input)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
            m.assign_lr(sess, config.lr * lr_decay)
            print("Epoch: %d Learning rate: %.3f" % (i + 1, sess.run(m.lr)))
            train_perplexity = run_epoch(sess, m, eval_op=m.train_op)

            print("Epoch: %d Train Perplexity: %.3f" %
                  (i + 1, train_perplexity))

            valid_perplexity = run_epoch(sess, mvalid)
            print("Epoch: %d Valid Perplexity: %.3f" %
                  (i + 1, valid_perplexity))

        test_perplexity = run_epoch(sess, mtest)
        print("Test Perplexity: %.3f" % test_perplexity)
        coord.request_stop()
        coord.join(threads)
Exemplo n.º 4
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(
            log_device_placement=True)) as session:

        tf.set_random_seed(1)

        initializer = tf.uniform_unit_scaling_initializer()
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = PTBModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = PTBModel(is_training=False, config=config)
            mtest = PTBModel(is_training=False, config=eval_config)

        tf.global_variables_initializer().run()

        def get_learning_rate(epoch, config):
            base_lr = config.learning_rate
            if epoch <= config.nr_epoch_first_stage:
                return base_lr
            elif epoch <= config.nr_epoch_second_stage:
                return base_lr * 0.1
            else:
                return base_lr * 0.01

        for i in range(config.max_epoch):
            m.assign_lr(session, get_learning_rate(i, config))

            print("Epoch: %d Learning rate: %f" % (i + 1, session.run(m.lr)))
            train_perplexity = run_epoch(session,
                                         m,
                                         train_data,
                                         m.train_op,
                                         verbose=True)
            print("Epoch: %d Train Perplexity: %.3f" %
                  (i + 1, train_perplexity))
            valid_perplexity = run_epoch(session, mvalid, valid_data,
                                         tf.no_op())
            print("Epoch: %d Valid Perplexity: %.3f" %
                  (i + 1, valid_perplexity))

        test_perplexity = run_epoch(session, mtest, test_data, tf.no_op())
        print("Test Perplexity: %.3f" % test_perplexity)
Exemplo n.º 5
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError('Must set --data_path to PTB data directory')
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == 'GPU'
    ]

    if FLAGS.num_gpus > len(gpus):
        raise ValueError('Your machine has only %d gpus '
                         'which is less than the requested --num_gpus=%d.' %
                         (len(gpus), FLAGS.num_gpus))

    # Genereate words to ids dictionary and convert words to ids
    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    # Get hyperparameters
    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        # Generate
        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss,", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope('Valid'):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name='ValidInput')
            with tf.variable_scope('Model',
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar('Validation Loss', mvalid.cost)

        with tf.name_scope('Test'):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name='TestInput')
            with tf.variable_scope('Model',
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)

        # Add ops to collection (tf.add_to_collection), The collection is managed by tensorflow"
        models = {'Train': m, 'Valid': mvalid, 'Test': mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph(
        )  # Export the graph, it can be stored in the disk
        if tf.__version__ < '1.1.0' and FLAGS.num_gpus > 1:
            raise ValueError(
                'num_gpus > 1 is not supported for TensorFlow versions '
                'below 1.1.0')
        # Parallel config
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():

        # Import ops and graph
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()

        # Use supervisor to save and load checkpoint, pre-train variables
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement
                                      )  # Used to set config for session
        with sv.managed_session(config=config_proto) as session:
            # Times to loop corpusvxcvzxvxvzxvzxvz
            for i in range(config.max_max_epoch):
                time1 = time.time()
                # Calculate learning decay
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                print('Epoch: %d Learning rate: %.3f' %
                      (i + 1, session.run(m.lr)))

                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=True)
                print('Epoch: %d Train Perplexity: %.3f' %
                      (i + 1, train_perplexity))

                valid_perplexity = run_epoch(session, mvalid)
                print('Epoch: %d Valid Perplexity: %.3f' %
                      (i + 1, valid_perplexity))

                print('One loop used %d s' % time.time() - time1)

            test_perplexity = run_epoch(session, mtest)
            print('Epoch: %d Valid Perplexity: %.3f' % test_perplexity)

            if FLAGS.save_path:
                print('Saving model to %s.' % FLAGS.save_path)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)
Exemplo n.º 6
0
test_path = os.path.join(data_path, "ptb.test.txt")

if not os.path.exists(train_path):
    raise Exception("no such file.")

train_params = Params()
test_params = Params()
test_params.batch_size = 1
test_params.time_steps = 1

initializer = tf.random_uniform_initializer(-0.1, 0.1)

with tf.name_scope("Train"):
    train_input = PTBInputs(train_path, train_params, "TrainInputs")
    with tf.variable_scope("Model", reuse=None, initializer=initializer):
        train_model = PTBModel(train_params, train_input, is_training=True)

with tf.name_scope("Valid"):
    valid_input = PTBInputs(valid_path, train_params, "ValidInputs")
    with tf.variable_scope("Model", reuse=True, initializer=initializer):
        valid_model = PTBModel(train_params, valid_input)

with tf.name_scope("Test"):
    test_input = PTBInputs(test_path, test_params, "TestInputs")
    with tf.variable_scope("Model", reuse=True, initializer=initializer):
        test_model = PTBModel(test_params, test_input)

init = tf.global_variables_initializer()

sv = tf.train.Supervisor(logdir="test/new_lstm/logs/", init_op=init)