Exemplo n.º 1
0
def main(_):
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt"))

    if FLAGS.mode == "train":
        #hps.batch_size = 256
        dataset = Dataset(vocab, os.path.join(FLAGS.datadir, "train.txt"))
        run_train(dataset,
                  hps,
                  os.path.join(FLAGS.logdir, "train"),
                  ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval"):
        data_dir = os.path.join(FLAGS.datadir, "eval.txt")
        #predict_model = prediction.Model('/dir/ckpt',os.path.join(FLAGS.datadir, "vocabulary.txt"), hps)

        dataset = Dataset(vocab, data_dir, deterministic=True)
        prefix_words = "<brk>".split()
        predict_model = predict.Model(hps, FLAGS.logdir, FLAGS.datadir)
        print('start input')
        out = predict_model.predictnextkwords(prefix_words, FLAGS.num_sen)
        for row in out:
            print(' '.join(row) + "\n")
        print("len_out: " + str(len(out)))
Exemplo n.º 2
0
def main(_):
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps.num_gpus = FLAGS.num_gpus

    vocab = Vocabulary.from_file("small_voca.txt")  # ("1b_word_vocab.txt")

    if FLAGS.mode == "train":
        hps.batch_size = 256
        dataset = Dataset(
            vocab,
            FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*")
        run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
        hps.num_gpus = FLAGS.num_gpus

        if FLAGS.mode.startswith("eval_train"):
            data_dir = FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*"
        else:
            data_dir = FLAGS.datadir + "/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
Exemplo n.º 3
0
def main(_):
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    print_debug('our training DataSetDir=%s  , LogDir=%s' %
                (FLAGS.datadir, FLAGS.logdir))

    #vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "1b_word_vocab.txt"))
    vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt"))
    FLAGS.mode = "train"
    for i in range(10):
        print("Iteration ", i, " phase: ", FLAGS.mode)
        if FLAGS.mode == "train":
            #hps.batch_size = 256
            # dataset = Dataset(vocab, os.path.join(FLAGS.datadir,
            #                                       "training-monolingual.tokenized.shuffled/*"))
            dataset = Dataset(vocab,
                              os.path.join(FLAGS.datadir, "ptb.train.txt"))

            trainlogdir = (
                FLAGS.logdir + str("/") + "train"
            )  #(FLAGS.logdir+str("\\")+"train")#os.path.join(FLAGS.logdir, "train")
            print_debug('train log dir=%s' % (trainlogdir))

            run_train(dataset, hps, trainlogdir, ps_device="/gpu:0")
            print_debug('Finished run_train !!!!!!!!!!!')
        elif FLAGS.mode.startswith("eval"):
            print_debug('eval mode')

            # if FLAGS.mode.startswith("eval_train"):
            #     data_dir = os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")
            # elif FLAGS.mode.startswith("eval_full"):
            #     data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/*")
            # else:
            #     data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050")
            dataset = Dataset(vocab,
                              os.path.join(FLAGS.datadir, "ptb.test.txt"),
                              deterministic=True)
            run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
            print_debug('Finished run_eval !!!!!!!!!!!')

        if FLAGS.mode == "train":
            FLAGS.mode = "eval_full"
        else:
            FLAGS.mode = "train"
Exemplo n.º 4
0
def main(_):
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    vocab = Vocabulary.from_file(
        os.path.join(FLAGS.datadir, "1b_word_vocab.txt"))

    if FLAGS.mode == "train":
        #hps.batch_size = 256
        dataset = Dataset(
            vocab,
            os.path.join(FLAGS.datadir,
                         "training-monolingual.tokenized.shuffled/*"))
        run_train(dataset,
                  hps,
                  os.path.join(FLAGS.logdir, "train"),
                  ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        if FLAGS.mode.startswith("eval_train"):
            data_dir = os.path.join(
                FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")
        elif FLAGS.mode.startswith("eval_full"):
            data_dir = os.path.join(
                FLAGS.datadir,
                "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
            )
        else:
            data_dir = os.path.join(
                FLAGS.datadir,
                "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
            )
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
    elif FLAGS.mode.startswith("infer"):
        data_dir = os.path.join(
            FLAGS.datadir,
            "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
        )
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_infer(dataset, hps, FLAGS.logdir, FLAGS.mode, vocab)
Exemplo n.º 5
0
def run():

    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    hps = LM.get_default_hparams().parse('num_steps=20,num_shards=8,num_layers=2,emb_size=12,projected_size=12,state_size=80,num_sampled=0,batch_size=1,vocab_size=102')
    hps._set("num_gpus", 1)
    #arg('model')
    #arg('vocab')
    arg('--port', type=int, default=8000)
    arg('--host', default='localhost')
    arg('--debug', action='store_true')
    args = parser.parse_args()

    global model
    #model = Model(args.model, args.vocab, hps)
    model = Model('/Users/ruiyangwang/Desktop/f-lm/logs/test/train/model.ckpt-0','/Users/ruiyangwang/Desktop/examples/word_language_model/data/penn/vocabulary.txt', hps)
    app.run(port=args.port, host=args.host, debug=args.debug)
Exemplo n.º 6
0
def run():

    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    hps = LM.get_default_hparams().parse(
        'num_steps=20,num_shards=6,num_layers=2,learning_rate=0.2,max_grad_norm=1,keep_prob=0.9,emb_size=1024,projected_size=1024,state_size=8192,num_sampled=8192,batch_size=512,vocab_size=11859,num_of_groups=4'
    )
    hps._set("num_gpus", 1)
    #arg('model')
    #arg('vocab')
    arg('--port', type=int, default=8000)
    arg('--host', default='localhost')
    arg('--debug', action='store_true')
    args = parser.parse_args()

    global model
    #model = Model(args.model, args.vocab, hps)
    model = Model('/Users/ruiyangwang/Desktop/model/model.ckpt-44260',
                  '/Users/ruiyangwang/Desktop/vocabulary2.txt', hps)
    app.run(port=args.port, host=args.host, debug=args.debug)
Exemplo n.º 7
0
def main(_):
    hvd.init()
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps.num_gpus = FLAGS.num_gpus

    vocab = Vocabulary.from_file(FLAGS.vocab)
    hps.vocab_size = vocab.num_tokens

    config = tf.ConfigProto()
    config.gpu_options.visible_device_list = str(hvd.local_rank())
    os.environ["CUDA_VISIBLE_DEVICES"] = str(hvd.local_rank())

    if FLAGS.logdir is None:
        FLAGS.logdir = os.path.join('/tmp',
                                    'lm-run-{}'.format(int(time.time())))
        print('logdir: {}'.format(FLAGS.logdir))
    hps.batch_size = 256
    dataset = Dataset(vocab, FLAGS.datadir)
    run_train(dataset,
              hps,
              FLAGS.logdir + '/train',
              ps_device='/gpu:' + str(hvd.local_rank()))
def main(_):
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps.num_gpus = FLAGS.num_gpus
    
    vocab = Vocabulary.from_file(FLAGS.datadir + "/lm_vocab.txt", hps.vocab_size)

    if FLAGS.mode == "train":
        hps.batch_size = 256  # reset batchsize
        dataset = Dataset(vocab, FLAGS.datadir + "/train/*")
        run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        if FLAGS.mode.startswith("eval_train"):
            data_dir = FLAGS.datadir + "/train/*"
        elif FLAGS.mode.startswith("eval_test"):
            data_dir = FLAGS.datadir + "/heldout/*"
        print("data_dir:",data_dir)
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
    elif  FLAGS.mode.startswith("predict_next"):
        data_dir = "data/news.en.heldout-00001-of-00050"
        dataset = Dataset(vocab, data_dir)
        predict_next(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps,vocab) 
Exemplo n.º 9
0
import json
import numpy as np
import time
import tensorflow as tf
from data_utils import Vocabulary, Dataset
from language_model import LM
from common import CheckpointLoader

BATCH_SIZE = 1
NUM_TIMESTEPS = 1
MAX_WORD_LEN = 50

UPLOAD_FOLDER = '/data/ngramTest/uploads'
UPLOAD_FOLDER = './'

hps = LM.get_default_hparams()
vocab = Vocabulary.from_file("1b_word_vocab.txt")
with tf.variable_scope("model"):
    hps.num_sampled = 0  # Always using full softmax at evaluation.   run out of memory
    hps.keep_prob = 1.0
    hps.num_gpus = 1
    model = LM(hps, "predict_next", "/cpu:0")

if hps.average_params:
    print("Averaging parameters for evaluation.")
    saver = tf.train.Saver(model.avg_dict)
else:
    saver = tf.train.Saver()

# Use only 4 threads for the evaluation.
config = tf.ConfigProto(allow_soft_placement=True,
Exemplo n.º 10
0
    print("You're not in the cluster spec!  exiting!")
    exit(-1)
else:
    print("ROLE: %s" % role)
    print("INDEX: %s" % task_index)

cluster = tf.train.ClusterSpec(cluster_spec)
server = tf.train.Server(cluster, job_name=role, task_index=task_index)
if role == "ps":
    server.join()
else:
    ps_device = '/job:ps/task:0'
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    vocab = Vocabulary.from_file(
        os.path.join(FLAGS.datadir, "1b_word_vocab.txt"))

    if FLAGS.mode == "train":
        #hps.batch_size = 256
        dataset = Dataset(
            vocab,
            os.path.join(FLAGS.datadir,
                         "training-monolingual.tokenized.shuffled/*"))
        run_train(dataset,