def main(_): """ Start either train or eval. Note hardcoded parts of path for training and eval data """ hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps._set("num_gpus", FLAGS.num_gpus) print('*****HYPER PARAMETERS*****') print(hps) print('**************************') vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt")) if FLAGS.mode == "train": #hps.batch_size = 256 dataset = Dataset(vocab, os.path.join(FLAGS.datadir, "train.txt")) run_train(dataset, hps, os.path.join(FLAGS.logdir, "train"), ps_device="/gpu:0") elif FLAGS.mode.startswith("eval"): data_dir = os.path.join(FLAGS.datadir, "eval.txt") #predict_model = prediction.Model('/dir/ckpt',os.path.join(FLAGS.datadir, "vocabulary.txt"), hps) dataset = Dataset(vocab, data_dir, deterministic=True) prefix_words = "<brk>".split() predict_model = predict.Model(hps, FLAGS.logdir, FLAGS.datadir) print('start input') out = predict_model.predictnextkwords(prefix_words, FLAGS.num_sen) for row in out: print(' '.join(row) + "\n") print("len_out: " + str(len(out)))
def main(_): """ Start either train or eval. Note hardcoded parts of path for training and eval data """ hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps._set("num_gpus", FLAGS.num_gpus) print('*****HYPER PARAMETERS*****') print(hps) print('**************************') print_debug('our training DataSetDir=%s , LogDir=%s' % (FLAGS.datadir, FLAGS.logdir)) #vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "1b_word_vocab.txt")) vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt")) FLAGS.mode = "train" for i in range(10): print("Iteration ", i, " phase: ", FLAGS.mode) if FLAGS.mode == "train": #hps.batch_size = 256 # dataset = Dataset(vocab, os.path.join(FLAGS.datadir, # "training-monolingual.tokenized.shuffled/*")) dataset = Dataset(vocab, os.path.join(FLAGS.datadir, "ptb.train.txt")) trainlogdir = ( FLAGS.logdir + str("/") + "train" ) #(FLAGS.logdir+str("\\")+"train")#os.path.join(FLAGS.logdir, "train") print_debug('train log dir=%s' % (trainlogdir)) run_train(dataset, hps, trainlogdir, ps_device="/gpu:0") print_debug('Finished run_train !!!!!!!!!!!') elif FLAGS.mode.startswith("eval"): print_debug('eval mode') # if FLAGS.mode.startswith("eval_train"): # data_dir = os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*") # elif FLAGS.mode.startswith("eval_full"): # data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/*") # else: # data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050") dataset = Dataset(vocab, os.path.join(FLAGS.datadir, "ptb.test.txt"), deterministic=True) run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps) print_debug('Finished run_eval !!!!!!!!!!!') if FLAGS.mode == "train": FLAGS.mode = "eval_full" else: FLAGS.mode = "train"
def main(_): hps = Sentiment.get_default_hparams().parse(FLAGS.hpconfig) vocab = Vocabulary.from_file( os.path.join(FLAGS.data_dir, "sent.vocab.freq.dict")) if FLAGS.mode == "train": dataset = Dataset(os.path.join(FLAGS.data_dir, "train.sent_data.txt"), vocab) run_train(dataset, ## dataset hps, ## configurations FLAGS.log_dir + "/train") ## loging dir elif FLAGS.mode.startswith("eval"): dataset = Dataset(os.path.join(FLAGS.data_dir, "test.sent_data.txt"), vocab) run_eval(dataset, ## dataset hps, ## configurations FLAGS.log_dir) ## loging dir
def main(_): hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps.num_gpus = FLAGS.num_gpus vocab = Vocabulary.from_file("1b_word_vocab.txt") if FLAGS.mode == "train": hps.batch_size = 256 dataset = Dataset(vocab, FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*") run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0") elif FLAGS.mode.startswith("eval_"): if FLAGS.mode.startswith("eval_train"): data_dir = FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*" else: data_dir = FLAGS.datadir + "/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050" dataset = Dataset(vocab, data_dir, deterministic=True) run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
def main(_): """ Start either train or eval. Note hardcoded parts of path for training and eval data """ hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps._set("num_gpus", FLAGS.num_gpus) print('*****HYPER PARAMETERS*****') print(hps) print('**************************') vocab = Vocabulary.from_file( os.path.join(FLAGS.datadir, "1b_word_vocab.txt")) if FLAGS.mode == "train": #hps.batch_size = 256 dataset = Dataset( vocab, os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")) run_train(dataset, hps, os.path.join(FLAGS.logdir, "train"), ps_device="/gpu:0") elif FLAGS.mode.startswith("eval_"): if FLAGS.mode.startswith("eval_train"): data_dir = os.path.join( FLAGS.datadir, "training-monolingual.tokenized.shuffled/*") elif FLAGS.mode.startswith("eval_full"): data_dir = os.path.join( FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050" ) else: data_dir = os.path.join( FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050" ) dataset = Dataset(vocab, data_dir, deterministic=True) run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps) elif FLAGS.mode.startswith("infer"): data_dir = os.path.join( FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050" ) dataset = Dataset(vocab, data_dir, deterministic=True) run_infer(dataset, hps, FLAGS.logdir, FLAGS.mode, vocab)
def main(_): hvd.init() hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps.num_gpus = FLAGS.num_gpus vocab = Vocabulary.from_file(FLAGS.vocab) hps.vocab_size = vocab.num_tokens config = tf.ConfigProto() config.gpu_options.visible_device_list = str(hvd.local_rank()) os.environ["CUDA_VISIBLE_DEVICES"] = str(hvd.local_rank()) if FLAGS.logdir is None: FLAGS.logdir = os.path.join('/tmp', 'lm-run-{}'.format(int(time.time()))) print('logdir: {}'.format(FLAGS.logdir)) hps.batch_size = 256 dataset = Dataset(vocab, FLAGS.datadir) run_train(dataset, hps, FLAGS.logdir + '/train', ps_device='/gpu:' + str(hvd.local_rank()))
def main(_): hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps.num_gpus = FLAGS.num_gpus vocab = Vocabulary.from_file(FLAGS.datadir + "/lm_vocab.txt", hps.vocab_size) if FLAGS.mode == "train": hps.batch_size = 256 # reset batchsize dataset = Dataset(vocab, FLAGS.datadir + "/train/*") run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0") elif FLAGS.mode.startswith("eval_"): if FLAGS.mode.startswith("eval_train"): data_dir = FLAGS.datadir + "/train/*" elif FLAGS.mode.startswith("eval_test"): data_dir = FLAGS.datadir + "/heldout/*" print("data_dir:",data_dir) dataset = Dataset(vocab, data_dir, deterministic=True) run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps) elif FLAGS.mode.startswith("predict_next"): data_dir = "data/news.en.heldout-00001-of-00050" dataset = Dataset(vocab, data_dir) predict_next(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps,vocab)
print("INDEX: %s" % task_index) cluster = tf.train.ClusterSpec(cluster_spec) server = tf.train.Server(cluster, job_name=role, task_index=task_index) if role == "ps": server.join() else: ps_device = '/job:ps/task:0' """ Start either train or eval. Note hardcoded parts of path for training and eval data """ hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps._set("num_gpus", FLAGS.num_gpus) print('*****HYPER PARAMETERS*****') print(hps) print('**************************') vocab = Vocabulary.from_file( os.path.join(FLAGS.datadir, "1b_word_vocab.txt")) if FLAGS.mode == "train": #hps.batch_size = 256 dataset = Dataset( vocab, os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")) run_train(dataset, hps, os.path.join(FLAGS.logdir, "train"), ps_device=ps_device)