Пример #1
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
Пример #2
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
Пример #3
0
def train(args):
    # Step 1: load data
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    # print (sys.version)
    # print (data_loader.vocab_size)
    # ^^ 65

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # Step 2: define a model
    model = Model(args)

    # Step 3: define an optimizer
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(model.cost, tvars), args.grad_clip)
    optimizer = tf.train.AdamOptimizer(args.learning_rate)
    train_op = optimizer.apply_gradients(zip(grads, tvars))

    # Step 4: train
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())

        for e in range(args.num_epochs):
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            # ^^ always starts with zero-filled state tensor

            for b in range(data_loader.num_batches):
                start = time.time()

                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                
                # copy the state of previous batch
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h

                train_loss, state, _ = sess.run([model.cost, model.final_state, train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                      .format(e * data_loader.num_batches + b,
                              args.num_epochs * data_loader.num_batches,
                              e, train_loss, end - start))

                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and
                            b == data_loader.num_batches-1):
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
Пример #4
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    
    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist 
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme
        
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"
        
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)
        
    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
Пример #5
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
Пример #6
0
def train(args):
    print(args)
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            #print("model learning rate is {}".format(model.lr.eval()))
            data_loader.reset_batch_pointer('train')

            state = model.initial_state.eval()
            for b in xrange(data_loader.ntrain):
                start = time.time()
                x, y = data_loader.next_batch('train')

                # tmp = ''
                # for c in x:
                #   for i in c:
                #     tmp += np.array(data_loader.chars)[i]
                # print(tmp)

                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.ntrain + b,
                            args.num_epochs * data_loader.ntrain,
                            e, train_loss, end - start))
                if (e * data_loader.ntrain + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.ntrain + b)
                    print("model saved to {}".format(checkpoint_path))


            # eval validation loss
            data_loader.reset_batch_pointer('validation')
            validation_state = model.initial_state.eval()
            val_losses = 0
            for n in xrange(data_loader.nvalidation):
                x, y = data_loader.next_batch('validation')
                feed = {model.input_data: x, model.targets: y, model.initial_state: validation_state}
                validation_loss, validation_state = sess.run([model.cost, model.final_state], feed)
                val_losses += validation_loss

            validation_loss = val_losses / data_loader.nvalidation
            print("validation loss is {}".format(validation_loss))
Пример #7
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        train_loss_iterations = {'iteration': [], 'epoch': [], 'train_loss': [], 'val_loss': []}

        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                batch_idx = e * data_loader.num_batches + b
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(batch_idx,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                train_loss_iterations['iteration'].append(batch_idx)
                train_loss_iterations['epoch'].append(e)
                train_loss_iterations['train_loss'].append(train_loss)

                if batch_idx % args.save_every == 0:

                    # evaluate
                    state_val = model.initial_state.eval()
                    avg_val_loss = 0
                    for x_val, y_val in data_loader.val_batches:
                        feed_val = {model.input_data: x_val, model.targets: y_val, model.initial_state: state_val}
                        val_loss, state_val, _ = sess.run([model.cost, model.final_state, model.train_op], feed_val)
                        avg_val_loss += val_loss / len(data_loader.val_batches)
                    print 'val_loss: {:.3f}'.format(avg_val_loss)
                    train_loss_iterations['val_loss'].append(avg_val_loss)

                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
                else:
                    train_loss_iterations['val_loss'].append(None)

            pd.DataFrame(data=train_loss_iterations,
                         columns=train_loss_iterations.keys()).to_csv(os.path.join(args.save_dir, 'log.csv'))
Пример #8
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length,
                             args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())

        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            assign_op = model.epoch_pointer.assign(e)
            sess.run(assign_op)

            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed
                }
                summary, train_loss, state, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ], feed)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
Пример #9
0
def train(args):
    args.data_dir = 'data'
    args.save_dir = 'save'
    args.rnn_size = 64
    args.num_layers = 1
    args.num_epochs = 5
    args.batch_size = 50
    args.seq_length = 50
    args.num_epochs = 5
    args.save_every = 1000
    args.grad_clip = 5.
    args.learning_rate = 0.002
    args.decay_rate = 0.97

    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)
    model = Model(args)
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * args.decay_rate**e))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                train_loss, state, _ = sess.run(
                    [model.cost, model.final_state, model.train_op],
                    feed_dict=feed)
                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches, e,
                            train_loss, end - start))
                # save for the last result
                if (e == args.num_epochs - 1
                        and b == data_loader.num_batches - 1):
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model save to {}".format(checkpoint_path))
Пример #10
0
def train(args):
    # Step 1: load data
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # Step 2: define a model
    model = Model(args)

    # Step 3: define an optimizer
    # YOUR CODE HERE

    # Step 4: train
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())

        for e in range(args.num_epochs):
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                # copy the state of previous batch
                # YOUR CODE HERE
                train_loss, state, _ = sess.run(
                    [model.cost, model.final_state, train_op], feed)

                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches, e,
                            train_loss, end - start))

                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and
                            b == data_loader.num_batches-1):
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
    def training(self, args):
        data_loader = TextLoader(args.data_dir, args.batch_size,
                                 args.seq_length)
        args.vocab_size = data_loader.vocab_size
        if os.path.isdir(args.save_dir):
            ckpt = tf.train.get_checkpoint_state(args.save_dir)
        else:
            os.makedirs(args.save_dir)
            ckpt = None
        model = Model(args)

        with tf.Session() as sess:
            tf.summary.FileWriter(os.getcwd() + '\\logs', sess.graph)
            # 使用cmd在当前目录键入 tensorboard --logdir=logs,并根据提示在chrome打开网址查看网络结构
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            now_epochs = 0
            if ckpt:
                saver.restore(sess, ckpt.model_checkpoint_path)
                now_epochs = int(ckpt.model_checkpoint_path.split('-')
                                 [1]) // data_loader.num_batches
            count = 0
            for e in range(now_epochs, args.num_epochs):
                sess.run(
                    tf.assign(model.lr,
                              args.learning_rate * (args.decay_rate**e)))
                data_loader.reset_batch_pointer()
                state = sess.run(model.initial_state)
                for b in range(data_loader.num_batches):
                    x, y = data_loader.next_batch()
                    feed = {model.input_data: x, model.targets: y}
                    for i, (c, h) in enumerate(model.initial_state):
                        feed[c] = state[i].c
                        feed[h] = state[i].h
                    train_loss, state, _ = sess.run(
                        [model.cost, model.final_state, model.train_op], feed)

                    if count % 5 == 0:
                        percent = (e * data_loader.num_batches + b + 1) / (
                            args.num_epochs * data_loader.num_batches)
                        print('%' + str(int(percent * 100)) + '|' +
                              '▉' * int(50 * percent) + ' ' * 2 *
                              (50 - int(50 * percent)) + '|')
                    count += 1

                checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                saver.save(sess,
                           checkpoint_path,
                           global_step=(e + 1) * data_loader.num_batches)
            print('%100' + '|' + '▉' * 50 + '|')
        tf.reset_default_graph()
Пример #12
0
def train():
    loader = TextLoader(DATA_DIR, rnn.BATCH_SIZE, rnn.SEQ_LENGTH)
    vocab_size = loader.vocab_size

    if init_from is not None:
        ckpt = tf.train.get_checkpoint_state(init_from)

    with open(os.path.join(SAVE_DIR + 'conf.pkl'), 'wb') as f:
        cPickle.dump((loader.vocab_size, loader.chars, loader.vocab), f)

    model = rnn.Model(vocab_size, True)

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        if init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)

        for i in range(NUM_EPOCHS):
            sess.run(
                tf.assign(model.lr, rnn.learning_rate * (rnn.decay_rate**i)))
            loader.reset_batch_pointer()

            state = sess.run(model.initial_state)
            for b in range(loader.num_batches):
                curr_batch = i * loader.num_batches + b
                start = time.time()
                x, y = loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for j, s in enumerate(model.initial_state):
                    feed[s] = state[j]

                train_loss, state, _ = sess.run(
                    [model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print(
                    ('{0}/{1} (epoch {2}),' + ' train_loss = {3:.2f},' +
                     ' time/batch = {4:.2f},' + ' time_left = {5:.2f}').format(
                         curr_batch, NUM_EPOCHS * loader.num_batches, i,
                         train_loss, end - start,
                         ((end - start) / 3600 *
                          (NUM_EPOCHS * loader.num_batches - curr_batch))))
                if curr_batch % 1000 == 0 or (i == (NUM_EPOCHS - 1) and
                                              (b == loader.num_batches - 1)):
                    ckpath = os.path.join(SAVE_DIR, 'model.ckpt')
                    saver.save(sess, ckpath, global_step=curr_batch)
                    print('model saved to {}'.format(ckpath))
Пример #13
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'configure.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)
    merged = tf.summary.merge_all()

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())

        for e in range(args.num_epochs):
            #sess.run(tf.assign(model.lr,args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            assign_op = model.epoch_pointer.assign(e)
            sess.run(assign_op)

            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed
                }
                summary, train_loss, state, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ], feed)
                #train_writer.add_summary(summary, e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                '''if (e * data_loader.num_batches + b) % args.save_every == 0 \
Пример #14
0
def train(args):
    # Load data
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    # Set vocabulary size
    args.vocab_size = data_loader.vocab_size

    # Create the save directory if it does not exist
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # Save the configuration and the vocab, used to reload models when sampling
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # Create models with arguments
    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                      .format(e * data_loader.num_batches + b,
                              args.num_epochs * data_loader.num_batches,
                              e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'models.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    print("models saved to {}".format(checkpoint_path))
        # Save the final state
        saver.save(sess, os.path.join(args.save_dir, 'models.ckpt'),
                   global_step=args.num_epochs * data_loader.num_batches)
Пример #15
0
def main(_):
  pp.pprint(FLAGS.__flags)

  if not os.path.exists(FLAGS.checkpoint_dir):
    print(" [*] Creating checkpoint directory...")
    os.makedirs(FLAGS.checkpoint_dir)

  data_loader = TextLoader(os.path.join(FLAGS.data_dir, FLAGS.dataset_name),
                           FLAGS.batch_size, FLAGS.seq_length)
  vocab_size = data_loader.vocab_size
  valid_size = 50
  valid_window = 100

  with tf.variable_scope('model'):
    train_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                          FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                          FLAGS.seq_length, FLAGS.keep_prob,
                          FLAGS.grad_clip, FLAGS.nce_samples)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, train_model.global_step,
                                               data_loader.num_batches, FLAGS.grad_clip,
                                               staircase=True)
  with tf.variable_scope('model', reuse=True):
    simple_model = CharRNN(vocab_size, 1, FLAGS.rnn_size,
                           FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                           1, FLAGS.keep_prob,
                           FLAGS.grad_clip)

  with tf.variable_scope('model', reuse=True):
    valid_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                          FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                          FLAGS.seq_length, FLAGS.keep_prob,
                          FLAGS.grad_clip)

  with tf.Session() as sess:
    tf.global_variables_initializer().run()

    best_val_pp = float('inf')
    best_val_epoch = 0
    valid_loss = 0
    valid_perplexity = 0
    start = time.time()

    if FLAGS.export:
      print("Eval...")
      final_embeddings = train_model.embedding.eval(sess)
      emb_file = os.path.join(FLAGS.data_dir, FLAGS.dataset_name, 'emb.npy')
      print("Embedding shape: {}".format(final_embeddings.shape))
      np.save(emb_file, final_embeddings)

    else: # Train
      current_step = 0
      similarity, valid_examples, _ = compute_similarity(train_model, valid_size, valid_window, 6)

      # save hyper-parameters
      cPickle.dump(FLAGS.__flags, open(FLAGS.log_dir + "/hyperparams.pkl", 'wb'))

      # run it!
      for e in range(FLAGS.num_epochs):
        data_loader.reset_batch_pointer()

        # decay learning rate
        sess.run(tf.assign(train_model.lr, learning_rate))

        # iterate by batch
        for b in range(data_loader.num_batches):
          x, y = data_loader.next_batch()
          res, time_batch = run_epochs(sess, x, y, train_model)
          train_loss = res["loss"][0]
          train_perplexity = np.exp(train_loss)
          iterate = e * data_loader.num_batches + b

          if current_step != 0 and current_step % FLAGS.valid_every == 0:
            valid_loss = 0

            for vb in range(data_loader.num_valid_batches):
              res, valid_time_batch = run_epochs(sess, data_loader.x_valid[vb], data_loader.y_valid[vb], valid_model, False)
              valid_loss += res["loss"][0]

            valid_loss = valid_loss / data_loader.num_valid_batches
            valid_perplexity = np.exp(valid_loss)

            print("### valid_perplexity = {:.2f}, time/batch = {:.2f}".format(valid_perplexity, valid_time_batch))

            log_str = ""

            # Generate sample
            smp1 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"我喜歡做")
            smp2 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"他吃飯時會用")
            smp3 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"人類總要重複同樣的")
            smp4 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"天色暗了,好像快要")

            log_str = log_str + smp1 + "\n"
            log_str = log_str + smp2 + "\n"
            log_str = log_str + smp3 + "\n"
            log_str = log_str + smp4 + "\n"

            # Write a similarity log
            # Note that this is expensive (~20% slowdown if computed every 500 steps)
            sim = similarity.eval()
            for i in range(valid_size):
              valid_word = data_loader.chars[valid_examples[i]]
              top_k = 8 # number of nearest neighbors
              nearest = (-sim[i, :]).argsort()[1:top_k+1]
              log_str = log_str + "Nearest to %s:" % valid_word
              for k in range(top_k):
                close_word = data_loader.chars[nearest[k]]
                log_str = "%s %s," % (log_str, close_word)
              log_str = log_str + "\n"
            print(log_str)
            # Write to log
            text_file = codecs.open(FLAGS.log_dir + "/similarity.txt", "w", "utf-8")
            text_file.write(log_str)
            text_file.close()

          # print log
          print("{}/{} (epoch {}) loss = {:.2f}({:.2f}) perplexity(train/valid) = {:.2f}({:.2f}) time/batch = {:.2f} chars/sec = {:.2f}k"\
              .format(e * data_loader.num_batches + b,
                      FLAGS.num_epochs * data_loader.num_batches,
                      e, train_loss, valid_loss, train_perplexity, valid_perplexity,
                      time_batch, (FLAGS.batch_size * FLAGS.seq_length) / time_batch / 1000))

          current_step = tf.train.global_step(sess, train_model.global_step)

        if valid_perplexity < best_val_pp:
          best_val_pp = valid_perplexity
          best_val_epoch = iterate

          # save best model
          train_model.save(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name)
          print("model saved to {}".format(FLAGS.checkpoint_dir))

        # early_stopping
        if iterate - best_val_epoch > FLAGS.early_stopping:
          print('Total time: {}'.format(time.time() - start))
          break
Пример #16
0
def main(_):
  pp.pprint(FLAGS.__flags)

  if not os.path.exists(FLAGS.checkpoint_dir):
    print(" [*] Creating checkpoint directory...")
    os.makedirs(FLAGS.checkpoint_dir)

  data_loader = TextLoader(os.path.join(FLAGS.data_dir, FLAGS.dataset_name),
                           FLAGS.batch_size, FLAGS.seq_length)
  vocab_size = data_loader.vocab_size
  valid_size = 50
  valid_window = 100

  with tf.variable_scope('model'):
    train_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                          FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                          FLAGS.seq_length, FLAGS.keep_prob,
                          FLAGS.grad_clip)

  with tf.variable_scope('model', reuse=True):
    simple_model = CharRNN(vocab_size, 1, FLAGS.rnn_size,
                           FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                           1, FLAGS.keep_prob,
                           FLAGS.grad_clip)

  with tf.variable_scope('model', reuse=True):
    valid_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                          FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                          FLAGS.seq_length, FLAGS.keep_prob,
                          FLAGS.grad_clip)

  with tf.Session() as sess:
    tf.global_variables_initializer().run()

    train_model.load(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name)

    best_val_pp = float('inf')
    best_val_epoch = 0
    valid_loss = 0
    valid_perplexity = 0
    start = time.time()

    if FLAGS.export:
      print("Eval...")
      final_embeddings = train_model.embedding.eval(sess)
      emb_file = os.path.join(FLAGS.data_dir, FLAGS.dataset_name, 'emb.npy')
      print("Embedding shape: {}".format(final_embeddings.shape))
      np.save(emb_file, final_embeddings)

    else: # Train
      current_step = 0
      similarity, valid_examples, _ = compute_similarity(train_model, valid_size, valid_window, 6)

      # save hyper-parameters
      cPickle.dump(FLAGS.__flags, open(FLAGS.log_dir + "/hyperparams.pkl", 'wb'))

      # run it!
      for e in range(FLAGS.num_epochs):
        data_loader.reset_batch_pointer()

        # decay learning rate
        sess.run(tf.assign(train_model.lr, FLAGS.learning_rate))

        # iterate by batch
        for b in range(data_loader.num_batches):
          x, y = data_loader.next_batch()
          res, time_batch = run_epochs(sess, x, y, train_model)
          train_loss = res["loss"]
          train_perplexity = np.exp(train_loss)
          iterate = e * data_loader.num_batches + b

          # print log
          print("{}/{} (epoch {}) loss = {:.2f}({:.2f}) perplexity(train/valid) = {:.2f}({:.2f}) time/batch = {:.2f} chars/sec = {:.2f}k"\
              .format(e * data_loader.num_batches + b,
                      FLAGS.num_epochs * data_loader.num_batches,
                      e, train_loss, valid_loss, train_perplexity, valid_perplexity,
                      time_batch, (FLAGS.batch_size * FLAGS.seq_length) / time_batch / 1000))

          current_step = tf.train.global_step(sess, train_model.global_step)

        # validate
        valid_loss = 0

        for vb in range(data_loader.num_valid_batches):
          res, valid_time_batch = run_epochs(sess, data_loader.x_valid[vb], data_loader.y_valid[vb], valid_model, False)
          valid_loss += res["loss"]

        valid_loss = valid_loss / data_loader.num_valid_batches
        valid_perplexity = np.exp(valid_loss)

        print("### valid_perplexity = {:.2f}, time/batch = {:.2f}".format(valid_perplexity, valid_time_batch))

        log_str = ""

        # Generate sample
        smp1 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"我喜歡做")
        smp2 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"他吃飯時會用")
        smp3 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"人類總要重複同樣的")
        smp4 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"天色暗了,好像快要")

        log_str = log_str + smp1 + "\n"
        log_str = log_str + smp2 + "\n"
        log_str = log_str + smp3 + "\n"
        log_str = log_str + smp4 + "\n"

        # Write a similarity log
        # Note that this is expensive (~20% slowdown if computed every 500 steps)
        sim = similarity.eval()
        for i in range(valid_size):
          valid_word = data_loader.chars[valid_examples[i]]
          top_k = 8 # number of nearest neighbors
          nearest = (-sim[i, :]).argsort()[1:top_k+1]
          log_str = log_str + "Nearest to %s:" % valid_word
          for k in range(top_k):
            close_word = data_loader.chars[nearest[k]]
            log_str = "%s %s," % (log_str, close_word)
          log_str = log_str + "\n"
        print(log_str)

        # Write to log
        text_file = codecs.open(FLAGS.log_dir + "/similarity.txt", "w", "utf-8")
        text_file.write(log_str)
        text_file.close()

        if valid_perplexity < best_val_pp:
          best_val_pp = valid_perplexity
          best_val_epoch = iterate

          # save best model
          train_model.save(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name)
          print("model saved to {}".format(FLAGS.checkpoint_dir))

        # early_stopping
        if iterate - best_val_epoch > FLAGS.early_stopping:
          print('Total time: {}'.format(time.time() - start))
          break
Пример #17
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    name = ""
    if args.separate != 0 and args.data_dir is not None:
        name = args.data_dir
        if '\\' in name:
            name = name[name.rfind('\\')+1:]
        if '/' in name:
            name = name[name.rfind('/')+1:]
        print("Name: "+name)
        args.save_dir = os.path.join(args.save_dir,name)

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
    #if args.cont != 0:
        # check if all necessary files exist
        #if args.separate != 0:
            #args.init_from = os.path.join(args.init_from,name)
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.latest_checkpoint(args.init_from)
        assert ckpt, "No checkpoint found"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"
        

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with codecs.open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with codecs.open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)
    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # instrument for tensorboard
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(
                os.path.join(args.log_dir,time.strftime("%Y-%m-%d-%H-%M-%S")+' '+name))
                #os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr,
                               args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run([summaries, model.cost, model.final_state, model.train_op], feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                end = time.time()
                if (e * data_loader.num_batches + b) % args.print_every == 0:
                    tDelta = str(datetime.timedelta(seconds=(args.num_epochs * data_loader.num_batches) - (e * data_loader.num_batches + b))*(end - start))[:-7]
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}, remaining = {}"
                          .format(e * data_loader.num_batches + b,
                                  args.num_epochs * data_loader.num_batches,
                                  e, train_loss, end - start, tDelta))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e == args.num_epochs-1 and
                            b == data_loader.num_batches-1):
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

                    #sample.sample()

                    #sampling the output
                    #with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f:
                        #chars, vocab = cPickle.load(f)
                    #Use most frequent char if no prime is given
                    #if args.prime == '':
                        #args.prime = chars[0]
                    #s = str(bytes.decode(model.sample(sess, chars, vocab, args.n, args.prime,args.sample).encode('utf-8'))).encode('utf-8')[:-1]
                    #with codecs.open(os.path.join(args.save_dir,name+' checkpoint '+str(train_loss)),'w') as f:
                        #f.write(s)
                    #print(s)

                    with open(os.path.join(args.save_dir,'status.txt'),'w') as f:
                        f.write("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}, remaining = {}"
                      .format(e * data_loader.num_batches + b,
                              args.num_epochs * data_loader.num_batches,
                              e, train_loss, end - start,str(datetime.timedelta(seconds=(args.num_epochs * data_loader.num_batches) - (e * data_loader.num_batches + b))*(end - start))[:-7]))
                        f.write('\nRNN size: {}\nLayers: {}\nSequence length: {}\nModel: {}'.format(args.rnn_size,args.num_layers,args.seq_length,args.model))
                        f.write('\n\nNum epochs: {}\nGradient clip: {}\nLearning rate: {}\nDecay rate: {}\nOutput-keep-prob: {}\nInput-keep=prob:{}'.format(args.num_epochs,args.grad_clip,args.learning_rate,args.decay_rate,args.output_keep_prob,args.input_keep_prob))
Пример #18
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        train_loss_iterations = {
            'iteration': [],
            'epoch': [],
            'train_loss': [],
            'val_loss': []
        }

        for e in xrange(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state
                }
                train_loss, state, _ = sess.run(
                    [model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                batch_idx = e * data_loader.num_batches + b
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(batch_idx,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                train_loss_iterations['iteration'].append(batch_idx)
                train_loss_iterations['epoch'].append(e)
                train_loss_iterations['train_loss'].append(train_loss)

                if batch_idx % args.save_every == 0:

                    # evaluate
                    state_val = model.initial_state.eval()
                    avg_val_loss = 0
                    for x_val, y_val in data_loader.val_batches:
                        feed_val = {
                            model.input_data: x_val,
                            model.targets: y_val,
                            model.initial_state: state_val
                        }
                        val_loss, state_val, _ = sess.run(
                            [model.cost, model.final_state, model.train_op],
                            feed_val)
                        avg_val_loss += val_loss / len(data_loader.val_batches)
                    print 'val_loss: {:.3f}'.format(avg_val_loss)
                    train_loss_iterations['val_loss'].append(avg_val_loss)

                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
                else:
                    train_loss_iterations['val_loss'].append(None)

            pd.DataFrame(data=train_loss_iterations,
                         columns=train_loss_iterations.keys()).to_csv(
                             os.path.join(args.save_dir, 'log.csv'))
Пример #19
0
def train(args):
    print(args)
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            #print("model learning rate is {}".format(model.lr.eval()))
            data_loader.reset_batch_pointer('train')

            state = model.initial_state.eval()
            for b in xrange(data_loader.ntrain):
                start = time.time()
                x, y = data_loader.next_batch('train')

                # tmp = ''
                # for c in x:
                #   for i in c:
                #     tmp += np.array(data_loader.chars)[i]
                # print(tmp)

                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state
                }
                train_loss, state, _ = sess.run(
                    [model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.ntrain + b,
                            args.num_epochs * data_loader.ntrain,
                            e, train_loss, end - start))
                if (e * data_loader.ntrain + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.ntrain + b)
                    print("model saved to {}".format(checkpoint_path))

            # eval validation loss
            data_loader.reset_batch_pointer('validation')
            validation_state = model.initial_state.eval()
            val_losses = 0
            for n in xrange(data_loader.nvalidation):
                x, y = data_loader.next_batch('validation')
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: validation_state
                }
                validation_loss, validation_state = sess.run(
                    [model.cost, model.final_state], feed)
                val_losses += validation_loss

            validation_loss = val_losses / data_loader.nvalidation
            print("validation loss is {}".format(validation_loss))
Пример #20
0
def train(args):

    data_loader = TextLoader(args.data_path, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    args.file_size = data_loader.file_size
    print("Vocab size: ", args.vocab_size)
    print("File size: ", args.file_size)
    args.lower_bound = 0  #If we know the entropy then we set it to this
    data_info = {}
    if args.info_path is not None:
        assert os.path.isfile(
            args.info_path
        ), "Info file not found in the path: %s" % args.info_path

        #Open the info file
        with open(args.info_path, 'rb') as f:
            data_info = json.load(f)
            #Assuming we know entropy
            args.lower_bound = data_info['Entropy']
            print(data_info)

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    ##################################################
    # Get the model
    ##################################################
    model = Model(args)
    print("model Loaded")

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        writer = tf.summary.FileWriter(args.summary_dir, sess.graph)
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)

        ######################################################
        # Perform the training
        #####################################################
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()  #Need to check what this does
            state = sess.run(model.initial_state)  #What is this initial state
            cumul_loss = 0

            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}

                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                summary, train_loss, state, _ = sess.run([
                    model.merged_summaries, model.cost, model.final_state,
                    model.train_op
                ], feed)  #what is the training loss
                train_loss /= np.log(2)
                cumul_loss += train_loss
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

                if b % 10 == 0:
                    writer.add_summary(summary,
                                       e * data_loader.num_batches + b)

            cumul_loss /= data_loader.num_batches
            print("Epoch {}: Cumulative Loss for the epoch: {:.3f}".format(
                e, cumul_loss))
            if (abs(cumul_loss - args.lower_bound) < 0.1):
                print("Stopping Training as we get a good loss.. :) ... ")
                break

        ##############################################################
        # Append details to the output file
        ##############################################################
        args.epoch_stopped = e + 1
        args.last_epoch_loss = cumul_loss
        with open(args.output_path, 'a') as f:

            params = vars(args)
            params.update(data_info)
            #json.dump(params, f,indent=2)
            cPickle.dump(params, f)
            #f.write("\n ############################################# \n")

        with open(args.output_path + ".json", 'a') as f:

            params = vars(args)
            params.update(data_info)
            json.dump(params, f, indent=2)
            #cPickle.dump(params)
            f.write("\n ############################################# \n")
Пример #21
0
def train(args):
    # Data Preparation
    # ====================================

    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    print("Number of sentences: {}".format(data_loader.num_data))
    print("Vocabulary size: {}".format(args.vocab_size))

    # Check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
        need_be_same = ["rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = pickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.words, data_loader.vocab), f)
    """
    embedding_matrix = get_vocab_embedding(args.save_dir, data_loader.words, args.embedding_file)
    print("Embedding matrix shape:",embedding_matrix.shape)
    """

    # Training
    # ====================================
    with tf.Graph().as_default():
        with tf.Session(config=tf.ConfigProto(gpu_options=options)) as sess:
            model = BasicLSTM(args)

            # Define training procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(args.learning_rate)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(model.cost, tvars),
                                              args.grad_clip)
            train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity
            grad_summaries = []
            for g, v in zip(grads, tvars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)

            # Summary for loss
            loss_summary = tf.summary.scalar("loss", model.cost)

            # Train summaries
            merged = tf.summary.merge_all()
            if not os.path.exists(args.log_dir):
                os.makedirs(args.log_dir)
            train_writer = tf.summary.FileWriter(args.log_dir, sess.graph)

            # saver = tf.train.Saver(tf.global_variables())
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=max_model_keep)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Restore model
            if args.init_from is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Start training
            print("Start training")
            valLoss_opt = 100000000.0
            lastStripValLoss = 100000000.0
            successiveHit = 0
            trainLossList = list()
            largestGL = [-1000, 0]
            largestPG = [-1000, 0]
            largestUP = [0, 0]
            total_start_time = time.time()
            for epoch in range(args.num_epochs):
                data_loader.reset_batch_pointer()
                state = sess.run(model.initial_state)
                for i in range(data_loader.num_batches):
                    start = time.time()
                    #training
                    x_batch, y_batch = data_loader.next_batch()
                    feed_dict = {
                        model.x: x_batch,
                        model.y: y_batch,
                        model.keep_prob: args.keep_prob
                    }
                    _, step, summary, loss, equal = sess.run([
                        train_op, global_step, merged, model.cost, model.equal
                    ], feed_dict)
                    print(
                        "training step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, time/batch: {:.3f}"
                        .format(step, epoch, i, data_loader.num_batches, loss,
                                np.mean(equal),
                                time.time() - start))
                    train_writer.add_summary(summary, step)
                    trainLossList.append(loss)

                    current_step = tf.train.global_step(sess, global_step)
                    #validing
                    if current_step % args.check_strip_length == 0 and current_step > 0 and epoch > 0:
                        start = time.time()
                        x_batch_valid, y_batch_valid = data_loader.get_first_batch_as_valid(
                        )
                        total_valid_loss = 0.0
                        total_valid_equal = 0.0
                        for index in range(len(x_batch_valid)):
                            feed_dict_valid = {
                                model.x: x_batch_valid[index],
                                model.y: y_batch_valid[index],
                                model.keep_prob: args.keep_prob
                            }
                            valid_loss, valid_equal = sess.run(
                                [model.cost, model.equal], feed_dict_valid)
                            total_valid_loss += valid_loss
                            total_valid_equal += valid_equal
                        total_valid_loss /= len(x_batch_valid)
                        total_valid_equal /= len(x_batch_valid)

                        print(
                            "================================= step {} ==================================="
                            .format(step))
                        print(
                            "validing step {}, epoch {}, loss: {:.4f}, accuracy: {:.4f}, time/batch: {:.3f}"
                            .format(step, epoch, total_valid_loss,
                                    np.mean(total_valid_equal),
                                    time.time() - start))

                        _GL = checkEarlyStopGL(total_valid_loss, valLoss_opt)
                        _PG = checkEarlyStopPQ(total_valid_loss, valLoss_opt,
                                               trainLossList)
                        _UP = checkEarlyStopUP(total_valid_loss,
                                               lastStripValLoss, successiveHit)
                        if _GL > largestGL[0]:
                            largestGL[0] = _GL
                            largestGL[1] = current_step
                        if _PG > largestPG[0]:
                            largestPG[0] = _PG
                            largestPG[1] = current_step
                        if _UP > largestUP[0]:
                            largestUP[0] = _UP
                            largestUP[1] = current_step
                        print("Criteria GL : " + str(_GL))
                        print("Criteria PG : " + str(_PG))
                        print("Criteria UP : " + str(_UP))
                        print(
                            "=============================================================================="
                        )
                        #save model
                        #check GL criteria
                        if _GL > args.GL_threshold0:
                            args.GL_threshold0 = 10000000.0
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_GL0.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved GL0 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "GL0", current_step, epoch,
                                          total_start_time)
                        if _GL > args.GL_threshold1:
                            args.GL_threshold1 = 10000000.0
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_GL1.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved GL1 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "GL1", current_step, epoch,
                                          total_start_time)
                        if _GL > args.GL_threshold2:
                            args.GL_threshold2 = 10000000.0
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_GL2.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved GL2 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "GL2", current_step, epoch,
                                          total_start_time)
                        if _GL > args.GL_threshold3:
                            args.GL_threshold3 = 10000000.0
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_GL3.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved GL3 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "GL3", current_step, epoch,
                                          total_start_time)
                        #check PG criteria
                        if _PG > args.PG_threshold0:
                            args.PG_threshold0 = 10000000.0
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_PG0.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved PG0 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "PG0", current_step, epoch,
                                          total_start_time)
                        if _PG > args.PG_threshold1:
                            args.PG_threshold1 = 10000000.0
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_PG1.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved PG1 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "PG1", current_step, epoch,
                                          total_start_time)
                        if _PG > args.PG_threshold2:
                            args.PG_threshold2 = 10000000.0
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_PG2.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved PG2 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "PG2", current_step, epoch,
                                          total_start_time)
                        if _PG > args.PG_threshold3:
                            args.PG_threshold3 = 10000000.0
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_PG3.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved PG3 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "PG3", current_step, epoch,
                                          total_start_time)
                        #check UP criteria
                        if _UP > args.UP_threshold0:
                            args.UP_threshold0 = 1000
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_UP0.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved UP0 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "UP0", current_step, epoch,
                                          total_start_time)
                        if _UP > args.UP_threshold1:
                            args.UP_threshold1 = 1000
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_UP1.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved UP1 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "UP1", current_step, epoch,
                                          total_start_time)
                        if _UP > args.UP_threshold2:
                            args.UP_threshold2 = 1000
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_UP2.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved UP2 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "UP2", current_step, epoch,
                                          total_start_time)
                        if _UP > args.UP_threshold3:
                            args.UP_threshold3 = 1000
                            checkpoint_path = os.path.join(
                                args.save_dir, 'model_UP3.ckpt')
                            path = saver.save(sess,
                                              checkpoint_path,
                                              global_step=current_step)
                            print("Saved UP3 model checkpoint to {}".format(
                                path))
                            print2LogFile(args, "UP3", current_step, epoch,
                                          total_start_time)

                        #setting variables
                        if total_valid_loss < valLoss_opt:
                            valLoss_opt = total_valid_loss
                        lastStripValLoss = total_valid_loss
                        successiveHit = _UP
                        trainLossList = list()

                    # current_step = tf.train.global_step(sess, global_step)
                    if current_step % args.save_every == 0 or (
                            epoch == args.num_epochs - 1
                            and i == data_loader.num_batches -
                            1):  #save for the last result
                        checkpoint_path = os.path.join(args.save_dir,
                                                       'model.ckpt')
                        path = saver.save(sess,
                                          checkpoint_path,
                                          global_step=current_step)
                        print("Saved model checkpoint to {}".format(path))

                        printEarlyStopLog2File(args, largestGL, largestPG,
                                               largestUP, current_step, epoch,
                                               total_start_time)
                        print("print early stop log to file : " +
                              args.earlyStop_log_filename)
                        print("cost time : " +
                              str(time.time() - total_start_time) + " secs")
                        #reset
                        largestGL = [-1000, 0]
                        largestPG = [-1000, 0]
                        largestUP = [0, 0]

            train_writer.close()
Пример #22
0
def test(args):
    with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
        saved_args = cPickle.load(f)
    with open(os.path.join(args.save_dir, 'event_words_vocab.pkl'), 'rb') as f:
        event_words, event_vocab, event_vocab_rev = cPickle.load(f)
    with open(os.path.join(args.save_dir, 'para_words_vocab.pkl'), 'rb') as f:
        para_words, para_vocab, para_vocab_rev = cPickle.load(f)
        

    onlyfiles = [f for f in listdir(args.data_dir) if isfile(join(args.data_dir, f)) and 
        (not ("pkl" in f) and not ("npy" in f)) ]
     
    data_loader = TextLoader(args.data_dir, onlyfiles, 1, 50, args.cid_num)    
    data_loader.reset_batch_pointer()

    arg1 = args.arg_1
    arg2 = args.arg_2

    model = Model(saved_args, False)
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.save_dir)

        eventWin = []
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

            #state = sess.run(model.cell.zero_state(1, tf.float32))

            x_e, y_e, x_p1, y_p1, x_p2, y_p2= data_loader.next_batch()
            print(x_e, y_e, x_p1, y_p1)
            argpara = zip(x_e, x_p1, x_p2, y_e, y_p1, y_p2) # get a sequence of (e, p1, p2)            
            #tgtargpara = zip(y_e, y_p1, y_p2) # get a sequence of (e, p1, p2)            
            #print (argpara)
            predicated_list = ""
            start = time.time()

            count = 0
            print (argpara)
            for (elist, p1list, p2list, elistNext, p1listNext, p2listNext) in argpara:                                
                for e, p1, p2, et, p1t, p2t in zip(elist, p1list, p2list, elistNext, p1listNext, p2listNext):
                    state = sess.run(model.cell.zero_state(1, tf.float32))
                    count += 1

                    suspiciousRank = args.susp_rank
                    eventStr = data_loader.event_vocab_rev.get(e)
                    if  count > 5 and not eventStr in predicated_list and 'EVENT_READ' not in eventStr and 'EVENT_ACCEPT' not in eventStr:
                        #print ("observed:" + data_loader.event_vocab_rev.get(e))# + ' ' + data_loader.para_vocab_rev.get(p1) + ' ' + data_loader.para_vocab_rev.get(p2))
                        #print (e) 
                        
                        if 'FORK' in eventStr:
                            print("abnormal events alerted:")
                            print(sortedevent) 
                            print ("observed:" + eventStr + ' ' + args.arg_1)
                            #print (predicated_list)

                        if ("none" not in args.arg_2) and 'FORK' not in eventStr and 'SEPARATE' not in eventStr:# and 'EVENT_READ' not in eventStr and 'EVENT_ACCEPT' not in eventStr:
                            print("abnormal events alerted:")
                            print(sortedevent) 
                            print ("observed:" + eventStr + ' ' + args.arg_2)
                            #print (predicated_list)

                        print (predicated_list)    
                    

                    print("========  ") 
                    #print (e, p1, p2)
                    
                    x = np.zeros((1, 1))
                    x[0, 0] = e

                    y1 = np.zeros((1, 1))
                    y1[0, 0] = p1

                    y2 = np.zeros((1, 1))
                    y2[0, 0] = p2
                    

                    feed = {model.event_input_data: x, model.para1_input_data : y1, model.para2_input_data : y2, model.initial_state:state}
                    [state, probs, probs1, probs2] = sess.run([model.final_state, model.probs, model.probs1, model.probs2], feed)
                    #print(probs)#, probs1, probs2)
                    #maxval = tf.reduce_max(probs, 1, keep_dims=False)
                    
                    #eventval = np.argmax(probs[0])
                    sortedevent = np.argsort(probs[0])[::-1]
                    #desentsortedevent = sortedevent.reverse()
                    #print(sortedevent[len(sortedevent)-1], probs[0])
                    #print (eventval)
                    argval1 = np.argmax(probs1[0])
                    argval2 = np.argmax(probs2[0])
                    predicated_list = "predicate:["

                    for x in range(len(sortedevent)) :
                        if sortedevent[x]==et:
                          a = x
                    print (a+1)
                    eventWin.append(a+1)
                    if (len(eventWin)==5):
                        eventWin.pop(0)

                    total = 0
                    for i in eventWin:
                        total += i

                    arg = total/len(eventWin)
                        

                    for i in range(suspiciousRank):
                        #print(i)                                              
                        predicated_list += data_loader.event_vocab_rev.get(sortedevent[i]) + ' '
                    predicated_list += '] ' + data_loader.para_vocab_rev.get(argval1) + ' ' +  data_loader.para_vocab_rev.get(argval2)
                     
                    #if arg > 3: 
                    # print("Average suspicious ranking:" + str(arg))
                    """
                    if count > 3 and a+1 > suspiciousRank: 
                      print("abnormal events alerted:")
                      print(sortedevent) 
                    """
                    i = 3
                    if count == 2 :
                        while i <= 4 :
                            print("========  ")  
                            print(i)  
                            i += 1
                        #if 'EVENT_UPDATE' in eventStr:
                        print ("observed:" + 'EVENT_WRITE' + ' ' + args.arg_1)
                        #print ("observed:" + data_loader.event_vocab_rev.get(e) + ' ' + args.arg_2)
                        print("abnormal alerted:")
                        print(sortedevent) 
                        i = 1
                        while i <= 10 :
                            print("========  ")  
                            print(i)  
                            i += 1
                        


                    
                    #e = eventval.eval()                    
                    #arg1 = argval1.eval()
                    #arg2 = argval2.eval()

                    #print(data_loader.event_vocab_rev.get(e[0]), data_loader.event_vocab_rev.get(arg1[0]), data_loader.event_vocab_rev.get(arg2[0]))
                    
            end = time.time()
            print("time/batch = {:.3f}".format(end - start))
Пример #23
0
def train(args):
    # Data Preparation
    # ====================================

    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    print("Number of sentences: {}" .format(data_loader.num_data))
    print("Vocabulary size: {}" .format(args.vocab_size))

    
    # Check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"words_vocab.pkl")),"words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
        need_be_same=["rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = pickle.load(f)
        assert saved_words==data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"
    
    
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.words, data_loader.vocab), f)
    
    """
    embedding_matrix = get_vocab_embedding(args.save_dir, data_loader.words, args.embedding_file)
    print("Embedding matrix shape:",embedding_matrix.shape)
    """
    
    
    # Training
    # ====================================
    with tf.Graph().as_default():
        with tf.Session() as sess:
            model = BasicLSTM(args)
          
            # Define training procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(args.learning_rate)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(model.cost, tvars), args.grad_clip)
            train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)

            
            # Keep track of gradient values and sparsity
            grad_summaries = []
            for g, v in zip(grads, tvars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            
            # Summary for loss
            loss_summary = tf.summary.scalar("loss", model.cost)

            # Train summaries
            merged = tf.summary.merge_all()
            if not os.path.exists(args.log_dir):
                os.makedirs(args.log_dir)
            train_writer = tf.summary.FileWriter(args.log_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables())

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Restore model
            if args.init_from is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Start training
            print("Start training")
            for epoch in range(args.num_epochs):
                data_loader.reset_batch_pointer()
                state = sess.run(model.initial_state)
                for i in range(data_loader.num_batches):
                    start = time.time()
                    x_batch, y_batch = data_loader.next_batch()
                    feed_dict = {model.x: x_batch, model.y: y_batch, model.keep_prob: args.keep_prob }
                    _, step, summary, loss, equal = sess.run([train_op, global_step, merged, model.cost, model.equal], feed_dict)
                   
                    print("training step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, time/batch: {:.3f}"
                        .format(step, epoch, i, data_loader.num_batches, loss, np.mean(equal), time.time()-start))
                    train_writer.add_summary(summary, step)

                    current_step = tf.train.global_step(sess, global_step)
                    if current_step % args.save_every == 0 or (epoch == args.num_epochs-1 
                        and i == data_loader.num_batches-1): #save for the last result
                        checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                        path = saver.save(sess, checkpoint_path, global_step = current_step)
                        print("Saved model checkpoint to {}".format(path))

            train_writer.close()
Пример #24
0
def train(args):

    start_time = datetime.now()

    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length,
                             args.test_flag)
    args.vocab_size = data_loader.vocab_size
    args.save_dir += '_bit_{}'.format(args.w_bit)
    result_file_path = 'result/bit_{}_{}.txt'.format(args.w_bit,
                                                     args.test_flag)

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)
    block_size = args.block_size

    with tf.Session() as sess:
        # instrument for tensorboard

        # tf.contrib.quantize.create_training_graph(quant_delay=2000000)
        # tf.contrib.quantize.create_eval_graph()

        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(
            os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            loss_list = []
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run(
                    [summaries, model.cost, model.final_state, model.train_op],
                    feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches, e,
                            train_loss, end - start))
                loss_list.append(train_loss)
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and
                            b == data_loader.num_batches-1):
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
                # optim_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
                # print([v for v in optim_vars]) #=> prints lists of vars created
                size = np.sum([
                    np.prod(v.get_shape().as_list())
                    for v in tf.trainable_variables()
                ])

            print("mean_loss for this epoch:{:.3f}".format(
                sum(loss_list) / float(len(loss_list))))
            with open(result_file_path, 'a') as f:
                print("mean_loss for this epoch:{:.3f}".format(
                    sum(loss_list) / float(len(loss_list))),
                      file=f)

    print("Run time: {}".format(datetime.now() - start_time))
    with open(result_file_path, 'a') as f:
        print("Run time: {}".format(datetime.now() - start_time), file=f)
Пример #25
0
def train(args):

    display_step = 100
    num_train = 20000;
    train_input, train_output, train_length, max_length = get_training_data(args, 'train', num_train, 0)
    test_input, test_output, test_length, max_length = get_training_data(args, 'test', 25000, 50000)
    val_input, val_output, val_length, max_length = get_training_data(args, 'val', 25000, 75000)

    #for i in range(2):
    #  print('i: ' + str(i) + ' => ' + str(train_input[i,:]))

    train_input = train_input.astype(int)

    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = 50000 #data_loader.vocab_size
    
    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist 
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme
        
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"
        
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)
        
    model = Model(args)

    print("num_layers: ", args.num_layers)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()

            step = 0
            ptr = 0

	    print('train_input: ', train_input.shape)

            while step < num_train/args.batch_size:
                b = step
            #for b in range(data_loader.num_batches):
		step += 1
                start = time.time()

	        # inputs batch
	        x = np.squeeze(train_input[ptr:ptr+args.batch_size, :args.batch_size])

	        # output batch
	        y = np.squeeze(train_input[ptr:ptr+args.batch_size, 1:args.batch_size+1])
		ptr += args.batch_size+1
                #x, y = data_loader.next_batch()
		#print('x: ', x.shape)
		#print('y: ', y.shape)
		#print('x: ', x[1])
		#print('y: ', y)
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                tt, calc_res, reg_cost, train_loss, state, _ = sess.run([model.target_vector, model.logits, model.reg_cost, model.cost, model.final_state, model.train_op], feed)
		print('out len: ', len(tt))
		print('target: ', tt)
		print('calc_res: ', calc_res)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}, reg_cost = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start, reg_cost))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

		if step % display_step == 0:
		    print('x: ', x[1])
Пример #26
0
def train(args):
    # 加载词库,取词库的大小
    data_loader = TextLoader(args.batch_size)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"
        assert os.path.isfile(
            os.path.join(args.init_from, "iterations")
        ), "iterations file does not exist in path %s " % args.init_from

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)

        need_be_same = ["model", "rnn_size", "num_layers"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    # 保存本次的运行配置
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    # 保存本次词库和词的编号字典
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # 创建模型
    model = Model(args)

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        iterations = 0
        # restore model and number of iterations
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
            with open(os.path.join(args.save_dir, 'iterations'), 'rb') as f:
                iterations = cPickle.load(f)

        losses = []
        for e in range(args.num_epochs):
            # 指数衰减学习率
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            # 每轮大循环重置批指针索引
            data_loader.reset_batch_pointer()

            for b in range(data_loader.num_batches):
                iterations += 1

                start = time.time()
                # 取一个批次的输入数据和目标数据
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                train_loss, _, _ = sess.run(
                    [model.cost, model.final_state, model.train_op], feed)
                end = time.time()

                sys.stdout.write('\r')
                info = "{}/{} (epoch {}), train_loss = {:.3f}, iterations = {} time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, iterations, end - start)
                sys.stdout.write(info)
                sys.stdout.flush()

                losses.append(train_loss)

                if (e * data_loader.num_batches + b + 1) % args.save_every == 0\
                    or (e == args.num_epochs - 1 and b == data_loader.num_batches - 1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=iterations)
                    with open(os.path.join(args.save_dir, "iterations"),
                              'wb') as f:
                        cPickle.dump(iterations, f)
                    with open(
                            os.path.join(args.save_dir,
                                         "losses-" + str(iterations)),
                            'wb') as f:
                        cPickle.dump(losses, f)
                    losses = []
                    sys.stdout.write('\n')
                    print("model saved to {}".format(checkpoint_path))
            sys.stdout.write('\n')
def cross_validation(args):
    data_loader = TextLoader(args.utils_dir, args.data_path, args.batch_size, args.seq_length, None, None)
    args.vocab_size = data_loader.vocab_size
    args.label_size = data_loader.label_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)
    with open(os.path.join(args.save_dir, 'labels.pkl'), 'wb') as f:
        pickle.dump(data_loader.labels, f)

    data = data_loader.tensor.copy()
    np.random.shuffle(data)
    data_list = np.array_split(data, 10, axis=0)

    model = Model(args)
    accuracy_list = []

    with tf.Session() as sess:
        for n in range(10):
            init = tf.initialize_all_variables()
            sess.run(init)
            saver = tf.train.Saver(tf.all_variables())

            test_data = data_list[n].copy()
            train_data = np.concatenate(map(lambda i: data_list[i], [j for j in range(10) if j!=n]), axis=0)
            data_loader.tensor = train_data

            for e in range(args.num_epochs):
                sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
                data_loader.reset_batch_pointer()

                for b in range(data_loader.num_batches):
                    start = time.time()
                    state = model.initial_state.eval()
                    x, y = data_loader.next_batch()
                    feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                    train_loss, state, _, accuracy = sess.run([model.cost, model.final_state, model.optimizer, model.accuracy], feed_dict=feed)
                    end = time.time()
                    print '{}/{} (epoch {}), train_loss = {:.3f}, accuracy = {:.3f}, time/batch = {:.3f}'\
                        .format(e * data_loader.num_batches + b + 1,
                                args.num_epochs * data_loader.num_batches,
                                e + 1,
                                train_loss,
                                accuracy,
                                end - start)
                    if (e*data_loader.num_batches+b+1) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b==data_loader.num_batches-1):
                        checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=e*data_loader.num_batches+b+1)
                        print 'model saved to {}'.format(checkpoint_path)

            n_chunks = len(test_data) / args.batch_size
            if len(test_data) % args.batch_size:
                n_chunks += 1
            test_data_list = np.array_split(test_data, n_chunks, axis=0)

            correct_total = 0.0
            num_total = 0.0
            for m in range(n_chunks):
                start = time.time()
                x = test_data_list[m][:, :-1]
                y = test_data_list[m][:, -1]
                results = model.predict_class(sess, x)
                correct_num = np.sum(results==y)
                end = time.time()

                correct_total += correct_num
                num_total += len(x)

            accuracy_total = correct_total / num_total
            accuracy_list.append(accuracy_total)
            print 'total_num = {}, total_accuracy = {:.6f}'.format(int(num_total), accuracy_total)

    accuracy_average = np.average(accuracy_list)
    print 'The average accuracy of cross_validation is {}'.format(accuracy_average)
Пример #28
0
def train(args):
    model_name = args.data_dir.split("/")[-1]
    # make a dir to store checkpoints
    args.save_dir = os.path.join('checkpoints', model_name)
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        # instrument for tensorboard
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(
                os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run([summaries, model.cost, model.final_state, model.train_op], feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                      .format(e * data_loader.num_batches + b,
                              args.num_epochs * data_loader.num_batches,
                              e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and b == data_loader.num_batches-1):
                    # remove previous checkpoints
                    current_checkpoints = [f for f in os.listdir(args.save_dir) if os.path.isfile(os.path.join(args.save_dir, f))]
                    for f in current_checkpoints:
                        if model_name in f:
                            os.remove(os.path.join(args.save_dir, f))
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, model_name)
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    final_model = '{}-{}'.format(model_name, e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

    # get the vocab
    model_vocab = getModelVocab(model_name)
    # dump the checkpoints to javascript
    dump_checkpoints(model_vocab, model_name, final_model)
Пример #29
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)

    if args.init_from is not None:
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

    Disc = Discriminator(args)
    Gen = Generator(args)
    #    D_tvars = [Disc.W1,Disc.W2]
    #    G_tvars = [Gen.weight]
    fp1 = open('G_loss_training', 'w')
    fp2 = open('D_loss_training', 'w')

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            print str(e) + 'th epoch'
            sess.run(tf.assign(Disc.lr, args.disc_learning_rate))
            sess.run(tf.assign(Gen.lr, args.gen_learning_rate))
            data_loader.reset_batch_pointer()
            for b in range(data_loader.num_batches):
                start = time.time()
                con, res = data_loader.next_batch()
                real_data = np.concatenate((con, res), axis=1)
                fake_data = sess.run(Fake_data,
                                     feed_dict={Gen.input_data: con})

                D_real, D_logit_real = sess.run(
                    [prob, logit], feed_dict={Disc.input_data: real_data})
                D_fake, D_logit_fake = sess.run(
                    [prob, logit], feed_dict={Disc.input_data: fake_data})

                D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1 - D_fake))
                G_loss = -tf.reduce_mean(tf.log(D_fake))

                D_tvars = [v for v in t_vars if v.name.startswith('disc')]
                G_tvars = [v for v in t_vars if v.name.startswith('gen')]
                D_solver = tf.train.AdamOptimizer(Disc.lr).minimize(
                    D_loss, var_list=D_tvars)
                G_solver = tf.train.AdamOptimizer(Gen.lr).minimize(
                    G_loss, var_list=G_tvars)

                _, d_loss = sess.run([D_solver, D_loss],
                                     feed_dict={
                                         Disc.input_data: real_data,
                                         Gen.input_data: con
                                     })
                _, g_loss = sess.run([G_solver, G_loss],
                                     feed_dict={
                                         Disc.input_data: fake_data,
                                         Gen.input_data: con
                                     })

                fp1.write(str(g_loss) + '\n')
                fp2.write(str(d_loss) + '\n')
                end = time.time()
                print("{}/{} (epoch {}), Generator_loss = {:.3f}, Discriminator_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, g_loss, d_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        fp1.close()
        fp2.close()
Пример #30
0
def train(args):
    data_loader = TextLoader(args.data_dir,
                             args.batch_size,
                             args.seq_length,
                             partition='train')
    eval_data_loader = TextLoader(args.data_dir,
                                  args.batch_size,
                                  args.seq_length,
                                  partition='eval')
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # model = Model(args, opt_method="Adam")
    model = Model(args, opt_method="SGD")
    loss_list = []
    eval_loss_list = []
    gpu_mem_portion = 0.005
    n_core = 16
    with tf.Session(config=tf.ConfigProto(
            intra_op_parallelism_threads=n_core,
            inter_op_parallelism_threads=n_core,
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=gpu_mem_portion))
                    ) as sess, tf.device("cpu:0") as devices:
        # instrument for tensorboard
        summaries = tf.summary.merge(model.train_summary)
        writer = tf.summary.FileWriter(
            os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)

        # do evaluation
        e = -1
        eval_loss = 0.0
        start = time.time()
        eval_data_loader.reset_batch_pointer()
        state = sess.run(model.initial_state)
        for b in range(eval_data_loader.num_batches):
            x, y = eval_data_loader.next_batch()
            feed = {model.input_data: x, model.targets: y}
            for i, (c, h) in enumerate(model.initial_state):
                feed[c] = state[i].c
                feed[h] = state[i].h
            eval_loss_batch, state = sess.run(
                [model.eval_cost, model.final_state], feed)
            eval_loss += eval_loss_batch
        eval_loss /= eval_data_loader.num_batches
        # instrument for tensorboard
        summ = tf.Summary(value=[
            tf.Summary.Value(tag="eval_loss", simple_value=eval_loss),
        ])
        writer.add_summary(summ, e * data_loader.num_batches)

        eval_loss_list.append([(e + 1) * data_loader.num_batches, eval_loss])
        end = time.time()
        print(
            "{}/{} (epoch {}), eval_loss = {:.3f}, time/batch = {:.3f}".format(
                (e + 1) * data_loader.num_batches,
                args.num_epochs * data_loader.num_batches, 0, eval_loss,
                end - start))

        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                # train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run(
                    [summaries, model.cost, model.final_state, model.train_op],
                    feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                loss_list.append(train_loss)

                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches, e,
                            train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and
                            b == data_loader.num_batches-1):
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

            # do evaluation
            eval_loss = 0.0
            start = time.time()
            print("start evaluation")
            eval_data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(eval_data_loader.num_batches):
                x, y = eval_data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                eval_loss_batch, state = sess.run(
                    [model.eval_cost, model.final_state], feed)
                eval_loss += eval_loss_batch
            eval_loss /= eval_data_loader.num_batches
            # instrument for tensorboard
            summ = tf.Summary(value=[
                tf.Summary.Value(tag="eval_loss", simple_value=eval_loss),
            ])
            writer.add_summary(summ, e * data_loader.num_batches)

            eval_loss_list.append([(e + 1) * data_loader.num_batches,
                                   eval_loss])
            end = time.time()
            print("{}/{} (epoch {}), eval_loss = {:.3f}, time/batch = {:.3f}".
                  format((e + 1) * data_loader.num_batches,
                         args.num_epochs * data_loader.num_batches, e,
                         eval_loss, end - start))

            with open(args.log_dir + "/loss.txt", "w") as f:
                np.savetxt(f, np.array(loss_list))
            with open(args.log_dir + "/eval_loss.txt", "w") as f:
                np.savetxt(f, np.array(eval_loss_list))
Пример #31
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        # instrument for tensorboard
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(
            os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                weights = sess.run(model.tvars)  # get the hidden weights
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                train_loss, state, _ = sess.run(
                    [model.cost, model.final_state, model.train_op], feed)

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run(
                    [summaries, model.cost, model.final_state, model.train_op],
                    feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                # get the current input in character form
                my_dict = dict((y, x) for x, y in data_loader.vocab.items())
                chars = [my_dict[i] for i in list(np.squeeze(x, axis=0))]

                # print the current sequence of characters and weight values
                print("Batch of Characters: ", chars)

                print("Layer 1 Weights.shape: ", weights[3].shape)
                print("Layer 1 Biases.shape: ", weights[4].shape)
                print("Layer 2 Weights.shape: ", weights[5].shape)
                print("Layer 2 Biases.shape: ", weights[6].shape)

                print("Layer 1 Weights: ", weights[3])
                print("Layer 1 Biases: ", weights[4])
                print("Layer 2 Weights: ", weights[5])
                print("Layer 2 Biases: ", weights[6])

                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches, e,
                            train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and
                            b == data_loader.num_batches-1):
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
Пример #32
0
def train(args):

    data_loader = TextLoader(args.data_path, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    args.file_size = data_loader.file_size
    print("Vocab size: ",args.vocab_size)
    print("File size: ",args.file_size)
    args.lower_bound = 0 #If we know the entropy then we set it to this
    data_info = {}
    if args.info_path is not None:
        assert os.path.isfile(args.info_path),"Info file not found in the path: %s"%args.info_path

        #Open the info file
        with open(args.info_path, 'rb') as f:
            data_info = json.load(f)
            #Assuming we know entropy
            args.lower_bound = data_info['Entropy']
            print(data_info)

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist 
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme
        
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"
        
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)
        
    
    ##################################################
    # Get the model
    ##################################################
    model = Model(args)
    print("model Loaded")

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        writer = tf.summary.FileWriter(args.summary_dir,sess.graph)
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        
        ######################################################
        # Perform the training
        #####################################################
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer() #Need to check what this does
            state = sess.run(model.initial_state) #What is this initial state
            cumul_loss = 0
             
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                summary, train_loss, state, _ = sess.run([model.merged_summaries, model.cost, model.final_state, model.train_op], feed) #what is the training loss
                train_loss /= np.log(2)
                cumul_loss += train_loss
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

                if b%10 == 0:
                    writer.add_summary(summary,e*data_loader.num_batches + b)
             
            cumul_loss /= data_loader.num_batches
            print("Epoch {}: Cumulative Loss for the epoch: {:.3f}".format(e,cumul_loss))
            if (abs(cumul_loss - args.lower_bound) < 0.1):
                print("Stopping Training as we get a good loss.. :) ... ") 
                break    

        ##############################################################
        # Append details to the output file
        ##############################################################
        args.epoch_stopped=e+1
        args.last_epoch_loss = cumul_loss
        with open(args.output_path, 'a') as f:

            params = vars(args)
            params.update(data_info)
            #json.dump(params, f,indent=2)
            cPickle.dump(params,f)
            #f.write("\n ############################################# \n")

        with open(args.output_path+".json", 'a') as f:

            params = vars(args)
            params.update(data_info)
            json.dump(params, f,indent=2)
            #cPickle.dump(params)
            f.write("\n ############################################# \n")
Пример #33
0
def train(args):
    data_loader = TextLoader(args.batch_size)
    args.poem_length = data_loader.poem_length
    #self.rhymes[ID] = self.rhyme_set.index(rhyme)
    print("Maximal length:", args.poem_length)

    print('finish readling file ...\n')

    args.vocab_size = data_loader.vocab_size

    print("Capture Rules Suceessfully")

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"
        assert os.path.isfile(
            os.path.join(args.init_from, "iterations")
        ), "iterations file does not exist in path %s " % args.init_from

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab, saved_rhymes = cPickle.load(f)

        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"
        assert saved_rhymes == data_loader.rhymes, "Data and loaded model disagree on rhyme mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump(
            (data_loader.chars, data_loader.vocab, data_loader.rhymes), f)

    model = Model(args)

    def duplicate(x):
        i, j = x.shape
        k = 2
        xx = np.empty([i, j, k])
        for ii in range(i):
            for jj in range(j):
                for kk in range(k):
                    xx[i, j, k] = x[i, j]
        return xx

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        iterations = 0
        # restore model and number of iterations
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
            with open(os.path.join(args.save_dir, 'iterations'), 'rb') as f:
                iterations = cPickle.load(f)
        losses = []
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            for b in range(data_loader.num_batches):
                iterations += 1
                start = time.time()
                xdata, ydata, xrhyme, yrhyme = data_loader.next_batch()

                #xx = duplicate(x)
                #yy = duplicate(y)

                #feed = {model.input_data: xx,
                #        model.targets: yy}

                feed = {
                    model.input_data: xdata,
                    model.input_rhyme: xrhyme,
                    model.target_data: ydata
                }

                train_loss, _, _ = sess.run(
                    [model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                sys.stdout.write('\r')
                info = "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                sys.stdout.write(info)
                sys.stdout.flush()
                losses.append(train_loss)
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=iterations)
                    with open(os.path.join(args.save_dir, "iterations"),
                              'wb') as f:
                        cPickle.dump(iterations, f)
                    with open(
                            os.path.join(args.save_dir,
                                         "losses-" + str(iterations)),
                            'wb') as f:
                        cPickle.dump(losses, f)
                    losses = []
                    sys.stdout.write('\n')
                    print("model saved to {}".format(checkpoint_path))
            sys.stdout.write('\n')
Пример #34
0
def train(args):
    if args.continue_training in ['True', 'true']:
        args.continue_training = True
    else:
        args.continue_training = False

    data_loader = TextLoader(True, args.utils_dir, args.data_path,
                             args.batch_size, args.seq_length, None, None)
    args.vocab_size = data_loader.vocab_size
    args.label_size = data_loader.label_size

    if args.continue_training:
        assert os.path.isfile(
            os.path.join(args.save_dir, 'config.pkl')
        ), 'config.pkl file does not exist in path %s' % args.save_dir
        assert os.path.isfile(
            os.path.join(args.utils_dir, 'chars_vocab.pkl')
        ), 'chars_vocab.pkl file does not exist in path %s' % args.utils_dir
        assert os.path.isfile(
            os.path.join(args.utils_dir, 'labels.pkl')
        ), 'labels.pkl file does not exist in path %s' % args.utils_dir
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        assert ckpt, 'No checkpoint found'
        assert ckpt.model_checkpoint_path, 'No model path found in checkpoint'

        with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
        need_be_same = ['model', 'rnn_size', 'num_layers', 'seq_length']
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], 'command line argument and saved model disagree on %s' % checkme

        with open(os.path.join(args.utils_dir, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = pickle.load(f)
        with open(os.path.join(args.utils_dir, 'labels.pkl'), 'rb') as f:
            saved_labels = pickle.load(f)
        assert saved_chars == data_loader.chars, 'data and loaded model disagree on character set'
        assert saved_vocab == data_loader.vocab, 'data and loaded model disagree on dictionary mappings'
        assert saved_labels == data_loader.labels, 'data and loaded model disagree on label dictionary mappings'

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.utils_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)
    with open(os.path.join(args.utils_dir, 'labels.pkl'), 'wb') as f:
        pickle.dump(data_loader.labels, f)

    model = Model(args)

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        saver = tf.train.Saver(tf.global_variables())

        if args.continue_training:
            saver.restore(sess, ckpt.model_checkpoint_path)

        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()

            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                train_loss, state, _, accuracy = sess.run([
                    model.cost, model.final_state, model.optimizer,
                    model.accuracy
                ],
                                                          feed_dict=feed)
                end = time.time()
                print '{}/{} (epoch {}), train_loss = {:.3f}, accuracy = {:.3f}, time/batch = {:.3f}'\
                    .format(e * data_loader.num_batches + b + 1,
                            args.num_epochs * data_loader.num_batches,
                            e + 1,
                            train_loss,
                            accuracy,
                            end - start)
                if (e*data_loader.num_batches+b+1) % args.save_every == 0 \
                    or (e==args.num_epochs-1 and b==data_loader.num_batches-1):
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b + 1)
                    print 'model saved to {}'.format(checkpoint_path)
Пример #35
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"words_vocab.pkl")),"words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words==data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state,
                        model.batch_time: speed}
                summary, train_loss, state, _, _ = sess.run([merged, model.cost, model.final_state,
                                                             model.train_op, model.inc_batch_pointer_op], feed)
                train_writer.add_summary(summary, e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()
Пример #36
0
def train(args):
    if args.continue_training in ['True', 'true']:
        args.continue_training = True
    else:
        args.continue_training = False

    data_loader = TextLoader(True, args.utils_dir, args.data_path, args.batch_size, args.seq_length, None, None)
    args.vocab_size = data_loader.vocab_size
    args.label_size = data_loader.label_size

    if args.continue_training:
        assert os.path.isfile(os.path.join(args.save_dir, 'config.pkl')), 'config.pkl file does not exist in path %s' % args.save_dir
        assert os.path.isfile(os.path.join(args.utils_dir, 'chars_vocab.pkl')), 'chars_vocab.pkl file does not exist in path %s' % args.utils_dir
        assert os.path.isfile(os.path.join(args.utils_dir, 'labels.pkl')), 'labels.pkl file does not exist in path %s' % args.utils_dir
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        assert ckpt, 'No checkpoint found'
        assert ckpt.model_checkpoint_path, 'No model path found in checkpoint'

        with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
        need_be_same = ['model', 'rnn_size', 'num_layers', 'seq_length']
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme], 'command line argument and saved model disagree on %s' % checkme

        with open(os.path.join(args.utils_dir, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = pickle.load(f)
        with open(os.path.join(args.utils_dir, 'labels.pkl'), 'rb') as f:
            saved_labels = pickle.load(f)
        assert saved_chars==data_loader.chars, 'data and loaded model disagree on character set'
        assert saved_vocab==data_loader.vocab, 'data and loaded model disagree on dictionary mappings'
        assert saved_labels==data_loader.labels, 'data and loaded model disagree on label dictionary mappings'

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.utils_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)
    with open(os.path.join(args.utils_dir, 'labels.pkl'), 'wb') as f:
        pickle.dump(data_loader.labels, f)

    model = Model(args)

    with tf.Session() as sess:
        init = tf.initialize_all_variables()
        sess.run(init)
        saver = tf.train.Saver(tf.all_variables())

        if args.continue_training:
            saver.restore(sess, ckpt.model_checkpoint_path)

        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()

            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                train_loss, state, _, accuracy = sess.run([model.cost, model.final_state, model.optimizer, model.accuracy], feed_dict=feed)
                end = time.time()
                print '{}/{} (epoch {}), train_loss = {:.3f}, accuracy = {:.3f}, time/batch = {:.3f}'\
                    .format(e * data_loader.num_batches + b + 1,
                            args.num_epochs * data_loader.num_batches,
                            e + 1,
                            train_loss,
                            accuracy,
                            end - start)
                if (e*data_loader.num_batches+b+1) % args.save_every == 0 \
                    or (e==args.num_epochs-1 and b==data_loader.num_batches-1):
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e*data_loader.num_batches+b+1)
                    print 'model saved to {}'.format(checkpoint_path)
Пример #37
0
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target_data))

# 옵티마이저를 선언하고 옵티마이저에 Gradient Clipping을 적용합니다.
# grad_clip(=5)보다 큰 Gradient를 5로 Clippin합니다.
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
optimizer = tf.train.AdamOptimizer(learning_rate)
train_step = optimizer.apply_gradients(zip(grads, tvars))

# 세션을 열고 학습을 진행합니다.
with tf.Session() as sess:
    # 변수들에 초기값을 할당합니다.
    sess.run(tf.global_variables_initializer())

    for e in range(num_epochs):
        data_loader.reset_batch_pointer()
        # 초기 상태값을 지정합니다.
        state = sess.run(initial_state, feed_dict={state_batch_size: batch_size})

        for b in range(data_loader.num_batches):
            # x, y 데이터를 불러옵니다.
            x, y = data_loader.next_batch()
            # y에 one_hot 인코딩을 적용합니다.
            y = tf.one_hot(y, vocab_size)  # y : [batch_size, seq_length, vocab_size]
            y = tf.reshape(y, [-1, vocab_size])  # y : [batch_size * seq_length, vocab_size]
            y = y.eval()

            # feed-dict에 사용할 값들과 LSTM 초기 cell state(feed_dict[c])값과 hidden layer 출력값(feed_dict[h])을 지정합니다.
            feed_dict = {input_data: x, target_data: y, state_batch_size: batch_size}
            for i, (c, h) in enumerate(initial_state):
                feed_dict[c] = state[i].c
Пример #38
0
def train(args):
    """
    Trains a RNN model.

    Args:
        args (argparse): arguments to train the RNN.

    Returns:
        None.
    """
    s_time = time.time()

    # Check compatibility to continue training from a previous model.
    if args.init_from:
        assert os.path.isdir(args.init_from), \
            "{} does not exist".format(args.init_from)
        assert os.path.isfile(os.path.join(args.init_from, "config.pkl")), \
            "config.pkl file does not exist in path {}".format(args.init_from)

        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # Check if models are compatible.
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
            check_list = ["rnn_size", "seq_length"]
            for check in check_list:
                assert vars(saved_model_args)[check] == vars(args)[check], \
                    "CLI argument and saved model disagree on %s".format(check)

    # Store configuration arguments.
    args.save_dir = os.path.join(args.save_dir, str(args.shard))
    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    args.save_dir = os.path.abspath(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)

    # Load input data.
    args.data_dir = os.path.join(args.data_dir, str(args.shard))
    if not os.path.isdir(args.data_dir):
        sys.exit('{} does not exist'.format(args.data_dir))
    args.data_dir = os.path.abspath(args.data_dir)
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)

    # Set logs directories.
    args.log_dir = os.path.join(args.log_dir, args.option)
    if not os.path.isdir(args.log_dir):
        os.makedirs(args.log_dir)
    args.log_dir = os.path.abspath(args.log_dir)

    # Create an instance of the tensorflow model.
    tf.reset_default_graph()
    model = Model(args)

    with tf.Session() as sess:
        # Tensorboard summaries.
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(args.log_dir, sess.graph)

        # Initialize variables (weigths and biases), with a Xavier uniform
        # initializer. See tf.glorot_uniform_initializer().
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        saver = tf.train.Saver()
        # Restore previous model and session.
        if args.init_from:
            saver.restore(sess, ckpt.model_checkpoint_path)

        losses, accuracies = [], []
        global_step_init = model.global_step.eval()
        # Run the model for training or validation/testing.
        for epoch_id in range(args.num_epochs):
            data_loader.reset_batch_pointer()

            # Reset the states at the beginning of each epoch.
            h_state = np.zeros([args.batch_size, args.rnn_size])
            c_state = np.zeros([args.batch_size, args.rnn_size])
            for batch_id in range(data_loader.num_batches):
                x, y = data_loader.next_batch()

                # Update the learning rate, with linear decay.
                global_step = model.global_step.eval()
                if args.lr_decay and global_step != 0:
                    total_weight_updates = args.train_bytes \
                        / (args.batch_size * args.seq_length)
                    lr = args.lr_init - (args.lr_init / total_weight_updates) \
                        * global_step
                    if lr < 1.5 * 10**-13:
                        lr = 1.5 * 10**-13
                else:
                    lr = args.lr_init
                sess.run(tf.assign(model.lr, lr))

                # Keep the states between batches to simulate full
                # backpropagation (stateful RNN).
                feed = {
                    model.initial_hidden_state: h_state,
                    model.initial_cell_state: c_state,
                    model.batchX_placeholder: x,
                    model.batchY_placeholder: y
                }

                if args.option == 'train':
                    _, h_state, c_state, loss, accuracy, \
                        summary = sess.run([model.train_step,
                                            model.final_hidden_state,
                                            model.final_cell_state,
                                            model.total_loss,
                                            model.accuracy,
                                            summaries],
                                           feed_dict=feed)
                elif args.option == 'validate':
                    h_state, c_state, loss, accuracy, \
                        summary = sess.run([model.final_hidden_state,
                                            model.final_cell_state,
                                            model.total_loss,
                                            model.accuracy,
                                            summaries],
                                           feed_dict=feed)

                losses.append(loss)
                accuracies.append(accuracy)

                if args.option == 'train' and \
                        global_step % args.print_every == 0:
                    # Record training for tensorboard.
                    writer.add_summary(summary, global_step)
                    writer.flush()

                    print("Shard {} Epoch {}/{} Batch {}/{} ({}) -- "
                          "loss: {:.3f}, acc: {:.3f}".format(
                              args.shard, epoch_id, args.num_epochs - 1,
                              batch_id, data_loader.num_batches - 1,
                              global_step, loss, accuracy))
                    sys.stdout.flush()

        # Save the model at the end of training.
        if args.option == 'train':
            save_model(args, sess, saver, global_step)

        # Save losses and accuracies.
        np.save(os.path.join(args.log_dir, 'loss_' + str(global_step_init)),
                losses)
        np.save(
            os.path.join(args.log_dir, 'accuracy_' + str(global_step_init)),
            accuracies)

        # Record training for tensorboard.
        writer.add_summary(summary, global_step)
        writer.flush()

        print("Shard {} Epoch {}/{} Batch {}/{} ({}) -- "
              "loss: {:.3f}, acc: {:.3f}".format(args.shard, epoch_id,
                                                 args.num_epochs - 1, batch_id,
                                                 data_loader.num_batches - 1,
                                                 global_step, loss, accuracy))
        sys.stdout.flush()

        # Record time spent.
        time_spent = time.time() - s_time
        hours, rem = divmod(time_spent, 3600)
        minutes, seconds = divmod(rem, 60)
        print('Train time: {:0>2}:{:0>2}:{:05.2f}'.format(
            int(hours), int(minutes), seconds))
        print('Time per batch: {:.3f}ms, time per byte: {:.3f}ms'.format(
            time_spent / (args.num_epochs * data_loader.num_batches) * 1000,
            time_spent / (args.num_epochs * data_loader.num_batches *
                          args.batch_size * args.seq_length) * 1000))
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length,
                             args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed
                }
                summary, train_loss, state, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ], feed)
                train_writer.add_summary(summary,
                                         e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()
Пример #40
0
def train2(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.reprocess)
    args.vocab_size = data_loader.vocab_size

    totalTask = args.num_epochs * data_loader.num_batches

    lastCheckpoint = tf.train.latest_checkpoint(args.save_dir) 
    if lastCheckpoint is None:
        startEpoch = 0
    else:
        print "Last checkpoint :", lastCheckpoint
        startEpoch = int(lastCheckpoint.split("-")[-1])

    print "startEpoch = ", startEpoch

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = ConstrainedModel(args)

    etaCount = 0
    etaString = "-" 
    etaStart = time.time()
    etaTime = 0

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        if startEpoch > 0: # load latest checkpoint
            print "Loading last checkpoint"
            saver.restore(sess, lastCheckpoint)

        for e in xrange(startEpoch, args.num_epochs):
            sess.run(tf.assign(model.lr, decayForEpoch(args, e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y, con = data_loader.next_batch()

                feed = {model.input_data: x, model.targets: y, model.initial_state: state, model.con_data:con}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                #time.sleep(0.01)
                #train_loss = 5
                end = time.time()

                taskNum = (e * data_loader.num_batches + b)
                etaCount += 1
                if (etaCount) % 25 == 0:
                    duration = time.time() - etaStart
                    etaTime = (totalTask - (taskNum + 1)) / 25 * duration
                    m, s = divmod(etaTime, 60)
                    h, m = divmod(m, 60)
                    etaString = "%d:%02d:%02d" % (h, m, s)
                    etaStart = time.time()

                print "{}/{} (epoch {}), loss = {:.3f}, time/batch = {:.3f}, ETA: {} ({})" \
                    .format(taskNum, totalTask, e, train_loss, end - start, time.ctime(time.time()+etaTime), etaString)

            if (e + 1) % args.save_every == 0 or e == args.num_epochs - 1:
                checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step = e + 1)
                print "model saved to {}".format(checkpoint_path)
Пример #41
0
def train(args):
    model_name = args.data_dir.split("/")[-1]
    # make a dir to store checkpoints
    args.save_dir = os.path.join(args.save_checkpoints, model_name)
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        # instrument for tensorboard
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(
            os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run(
                    [summaries, model.cost, model.final_state, model.train_op],
                    feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches, e,
                            train_loss, end - start))

                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and b == data_loader.num_batches-1):
                    # remove previous checkpoints
                    current_checkpoints = [
                        f for f in os.listdir(args.save_dir)
                        if os.path.isfile(os.path.join(args.save_dir, f))
                    ]
                    for f in current_checkpoints:
                        if model_name in f:
                            os.remove(os.path.join(args.save_dir, f))
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    final_model = '{}-{}'.format(
                        model_name, e * data_loader.num_batches + b)
                    print("Model saved to {}!".format(checkpoint_path))

    # get the vocab
    model_vocab = getModelVocab(args.save_checkpoints, model_name)
    # dump the checkpoints to javascript
    dump_checkpoints(args.save_checkpoints, args.save_model, model_vocab,
                     model_name, final_model)
Пример #42
0
def main(_):
    pp.pprint(FLAGS.__flags)

    if not os.path.exists(FLAGS.checkpoint_dir):
        print(" [*] Creating checkpoint directory...")
        os.makedirs(FLAGS.checkpoint_dir)

    data_loader = TextLoader(os.path.join(FLAGS.data_dir, FLAGS.dataset_name),
                             FLAGS.batch_size, FLAGS.seq_length)
    vocab_size = data_loader.vocab_size

    with tf.variable_scope(FLAGS.dataset_name):
        train_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                              FLAGS.layer_depth, FLAGS.num_units,
                              FLAGS.rnn_type, FLAGS.seq_length,
                              FLAGS.keep_prob, FLAGS.grad_clip)

    with tf.variable_scope(FLAGS.dataset_name, reuse=True):
        valid_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                              FLAGS.layer_depth, FLAGS.num_units,
                              FLAGS.rnn_type, FLAGS.seq_length,
                              FLAGS.keep_prob, FLAGS.grad_clip)

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        train_model.load(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name)

        best_val_pp = float('inf')
        best_val_epoch = 0
        valid_loss = 0
        valid_perplexity = 0
        start = time.time()

        if FLAGS.export:
            print("Eval...")
            final_embeddings = train_model.embedding.eval(sess)
            emb_file = os.path.join(FLAGS.data_dir, FLAGS.dataset_name,
                                    'emb.npy')
            print("Embedding shape: {}".format(final_embeddings.shape))
            np.save(emb_file, final_embeddings)

        else:
            if not os.path.exists(FLAGS.log_dir):
                os.makedirs(FLAGS.log_dir)
            with open(
                    FLAGS.log_dir + "/" + FLAGS.dataset_name +
                    "_hyperparams.pkl", 'wb') as f:
                cPickle.dump(FLAGS.__flags, f)
            for e in range(FLAGS.num_epochs):
                data_loader.reset_batch_pointer()
                sess.run(tf.assign(train_model.lr, FLAGS.learning_rate))
                FLAGS.learning_rate /= 2
                for b in range(data_loader.num_batches):
                    x, y = data_loader.next_batch()
                    res, time_batch = run_minibatches(sess, x, y, train_model)
                    train_loss = res["loss"]
                    train_perplexity = np.exp(train_loss)
                    print(
                        "{}/{} (epoch {}) loss = {:.2f}({:.2f}) perplexity(train/valid) = {:.2f}({:.2f}) time/batch = {:.2f} chars/sec = {:.2f}k" \
                            .format(data_loader.pointer, data_loader.num_batches,
                                    e,
                                    train_loss, valid_loss,
                                    train_perplexity, valid_perplexity,
                                    time_batch, (FLAGS.batch_size * FLAGS.seq_length) / time_batch / 1000))
                valid_loss = 0
                for vb in range(data_loader.num_valid_batches):
                    res, valid_time_batch = run_minibatches(
                        sess, data_loader.x_valid[vb], data_loader.y_valid[vb],
                        valid_model, False)
                    valid_loss += res["loss"]
                valid_loss = valid_loss / data_loader.num_valid_batches
                valid_perplexity = np.exp(valid_loss)
                print("### valid_perplexity = {:.2f}, time/batch = {:.2f}".
                      format(valid_perplexity, valid_time_batch))
                if valid_perplexity < best_val_pp:
                    best_val_pp = valid_perplexity
                    best_val_epoch = e
                    train_model.save(sess, FLAGS.checkpoint_dir,
                                     FLAGS.dataset_name)
                    print("model saved to {}".format(FLAGS.checkpoint_dir))
                if e - best_val_epoch > FLAGS.early_stopping:
                    print('Total time: {}'.format(time.time() - start))
                    break
Пример #43
0
def train(args):
    # Load dataset.
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # Checkpoint state.
    ckpt = None

    # Check if training can be continued from previously saved model.
    if args.init_from is not None:
        # Assert all necessary files exists.
        assert os.path.isdir(args.init_from), "{} doesn't exist.".format(
            args.init_from)

        assert os.path.exists(os.path.join(args.init_from, "config.pkl")), \
            "config.pkl doesn't exist in path {}".format(args.init_from)

        assert os.path.exists(os.path.join(args.init_from, "chars_vocab.pkl")), \
            "chars_vocab.pkl doesn't exist in path {}".format(args.init_from)

        # Get the state of checkpoint to be loaded.
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found!"
        assert ckpt.model_checkpoint_path, "model.ckpt-* not found in path {}".format(
            args.init_from)

        # Open config file and verify model compatibility.
        with open(os.path.join(args.init_from, "config.pkl"), mode="rb") as f:
            saved_model_args = pickle.load(f)

        # List of meta data that needs to be the same
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]

        for check_me in need_be_same:
            assert vars(saved_model_args)[check_me] == vars(args)[check_me], \
                "Saved model & command line arguments of {} aren't compatible!".format(check_me)

        # Load saved chars & vocab and check for compatibility.
        with open(os.path.join(args.init_from, "chars_vocab.pkl"),
                  mode="rb") as f:
            saved_chars, saved_vocab = pickle.load(f)

        assert saved_chars == data_loader.chars, "Data and character set aren't compatible!"
        assert saved_vocab == data_loader.vocab, "Data and loaded dictionary mappings aren't compatible!"

    # Create save directory if it doesn't exist.
    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    # Write the command line arguments into config file.
    with open(os.path.join(args.save_dir, "config.pkl"), mode="wb") as f:
        pickle.dump(args, f)

    # Save character set and dictionary mappings
    with open(os.path.join(args.save_dir, "chars_vocab.pkl"), mode="wb") as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)

    # Define the model.
    model = Model(args, training=True)

    # Start TensorFlow session. (with the default graph).
    with tf.Session() as sess:
        # Summary for Tensorboard.
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(os.path.join(
            args.logdir, time.strftime("%Y-%m-%d-%H-%M-%S-%p")),
                                       graph=sess.graph)
        writer.add_graph(graph=sess.graph)

        # Initialize global variables.
        sess.run(tf.global_variables_initializer())

        # Saver object for all global variables.
        saver = tf.train.Saver(var_list=tf.global_variables())

        # Restore model from checkpoint.
        if args.init_from is not None:
            saver.restore(sess=sess, save_path=ckpt.model_checkpoint_path)

        # TRAINING LOOP.
        for epoch in range(args.num_epochs):
            # NOTE: Surrounded with try-except in case training was force-stopped.
            try:
                # Update Model's learning rate.
                sess.run(
                    tf.assign(model.lr,
                              value=args.learning_rate *
                              (args.decay_rate**epoch)))

                # Reset mini batch pointer.
                data_loader.reset_batch_pointer()

                # Initial state.
                state = sess.run(model.initial_state)

                for batch in range(data_loader.num_batches):
                    # Record start time for current batch.
                    start = time.time()

                    # Get the next mini batch.
                    X, y = data_loader.next_batch()

                    feed_dict = {model.input_data: X, model.targets: y}

                    for i, (c, h) in enumerate(model.initial_state):
                        feed_dict[c] = state[i].c
                        feed_dict[h] = state[i].h

                    # Train the model.
                    _, _loss, _global, _summary, state = sess.run(
                        [
                            model.train_op, model.loss, model.global_step,
                            summaries, model.final_state
                        ],
                        feed_dict=feed_dict)

                    writer.add_summary(summary=_summary, global_step=_global)

                    end = time.time()
                    batch_count = epoch * data_loader.num_batches + batch

                    # Log progress.
                    print(
                        "\r{:,} of {:,} | global: {:,} Loss: {} time/batch: {}"
                        .format(batch_count, args.num_epochs, _global, _loss,
                                end - start),
                        end="")

                    # Save model at intervals.
                    if batch_count % args.save_every == 0 or (
                            epoch == args.num_epochs - 1
                            and batch == data_loader.num_batches - 1):
                        save_path = os.path.join(args.save_dir, "model.ckpt")
                        saver.save(sess=sess,
                                   save_path=save_path,
                                   global_step=model.global_step)

                        print("\nModel saved to {}\n".format(save_path))
                """# !- end batch"""
            except KeyboardInterrupt:
                print('\nTraining interrupted by user. Saving...')

                save_path = os.path.join(args.save_dir, "model.ckpt")
                saver.save(sess=sess,
                           save_path=save_path,
                           global_step=model.global_step)

                print("Model saved to {}\n".format(save_path))

                # End training.
                break

        # !- end epoch
        print("\n\nOverall training count = {}".format(
            sess.run(model.global_step)))
Пример #44
0
def train(args):
    print(args)
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.training_data_ratio)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    #sess = tf.InteractiveSession()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter('/tmp', sess.graph)

        step = 0
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            #print("model learning rate is {}".format(model.lr.eval()))
            data_loader.reset_batch_pointer('train')

            state = model.initial_state.eval()
            for b in xrange(data_loader.ntrain):
                start = time.time()
                x, y = data_loader.next_batch('train')

                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                step = e * data_loader.ntrain + b
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(step,
                            args.num_epochs * data_loader.ntrain,
                            e, train_loss, end - start))

                if step % args.write_summary_every == 0:
                    # training loss
                    summary_str = sess.run(summary_op, feed_dict=feed)
                    summary_writer.add_summary(summary_str, step)

                if step % args.save_every == 0 or (step + 1) == (args.num_epochs * data_loader.ntrain):
                    # eval validation loss
                    data_loader.reset_batch_pointer('validation')
                    validation_state = model.initial_state.eval()
                    val_losses = 0
                    for n in xrange(data_loader.nvalidation):
                        x, y = data_loader.next_batch('validation')
                        val_feed = {model.input_data: x, model.targets: y, model.initial_state: validation_state}
                        validation_loss, validation_state = sess.run([model.cost, model.final_state], val_feed)
                        val_losses += validation_loss

                    validation_loss = val_losses / data_loader.nvalidation
                    print("validation loss is {}".format(validation_loss))

                    # write top 5 validation loss to a json file
                    args_dict = vars(args)
                    args_dict['step'] = step
                    val_loss_file = args.save_dir + '/val_loss.json'
                    loss_json = ''
                    save_new_checkpoint = False
                    time_int = int(time.time())
                    args_dict['checkpoint_path'] = os.path.join(args.save_dir, 'model.ckpt-'+str(time_int))
                    if os.path.exists(val_loss_file):
                        with open(val_loss_file, "r") as text_file:
                            text = text_file.read()
                            if text == '':
                                loss_json = {validation_loss: args_dict}
                                save_new_checkpoint = True
                            else:
                                loss_json = json.loads(text)
                                losses = loss_json.keys()
                                if len(losses) > 3:
                                    losses.sort(key=lambda x: float(x), reverse=True)
                                    loss = losses[0]
                                    if validation_loss < float(loss):
                                        to_be_remove_ckpt_file_path =  loss_json[loss]['checkpoint_path']
                                        to_be_remove_ckpt_meta_file_path = to_be_remove_ckpt_file_path + '.meta'
                                        print("removed checkpoint {}".format(to_be_remove_ckpt_file_path))
                                        if os.path.exists(to_be_remove_ckpt_file_path):
                                            os.remove(to_be_remove_ckpt_file_path)
                                        if os.path.exists(to_be_remove_ckpt_meta_file_path):
                                            os.remove(to_be_remove_ckpt_meta_file_path)
                                        del(loss_json[loss])
                                        loss_json[validation_loss] = args_dict
                                        save_new_checkpoint = True
                                else:
                                    loss_json[validation_loss] = args_dict
                                    save_new_checkpoint = True
                    else:
                       loss_json = {validation_loss: args_dict}
                       save_new_checkpoint = True

                    if save_new_checkpoint:
                        checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step = time_int)
                        print("model saved to {}".format(checkpoint_path + '-' + str(time_int)))

                        with open(val_loss_file, "w") as text_file:
                            json.dump(loss_json, text_file)