def create_model(sess, model_type, FLAGS, mode): """Create model only used for train mode. """ if model_type == "seq2seq": model = seq2seq_model.Seq2Seq(FLAGS, mode) model.build() elif model_type == "s2vt": pass # create task file model_path = os.path.join(FLAGS.logdir, FLAGS.task_name) if not os.path.exists(model_path): os.makedirs(model_path) os.makedirs(model_path + "/eval") print("Save model to {}".format(model_path)) # Build new model from scratch using FLAGS configurations and Save configurations elif (FLAGS.reset): shutil.rmtree(model_path) os.makedirs(model_path) print("Remove existing model at {} and restart.".format(model_path)) else: #ERROR raise ValueError("Fail to create the new model.") # Save the current configurations config = dict(FLAGS.__flags.items()) with open("/".join([model_path, "config.json"]), "w") as file: json.dump(config, file) # initialize variables sess.run(tf.global_variables_initializer()) return model
def main(args): with open(args.vocab, 'rb') as f: vocab_dict = pickle.load(f) vocab2index = vocab_dict['vocab2index'] index2vocab = vocab_dict['index2vocab'] epsodes = load_data(args.data, vocab2index, index2vocab)[0] embedding_dim = 100 model = seq2seq_model.Seq2Seq({ 'num_embeddings': len(index2vocab), 'embedding_dim': embedding_dim, 'embdding_weight': None, "rnn_class": torch.nn.GRU, 'hidden_size': 128, 'num_layers': 2, 'dropout': 0.5, 'bidirectional': True, "history_size": 256 * 2, 'persona_size': embedding_dim }).cuda() model.load_state_dict(torch.load(args.model)) criterion = torch.nn.CrossEntropyLoss(ignore_index=idx_PAD, size_average=False).cuda() loss, choice_accu = test_multi_choice(epsodes[-100:], model, criterion) print(loss, choice_accu)
def experiment_fn(run_config, params): """ Experiemnt API """ if params.model == "seq2seq": model = seq2seq_model.Seq2Seq() elif params.model == "s2vt": model = s2vt_model.S2VT() estimator = tf.estimator.Estimator(model_fn=model.model_fn, model_dir=params.logdir, params=params, config=run_config) vocab = load_vocab(params.vocab_path) train_videos, train_captions = dataset.data_reader( params.train_feature_path, params.train_vid_path, params.train_capid_path) test_videos, test_captions = dataset.data_reader(params.test_feature_path, params.test_vid_path, params.test_capid_path) train_input_fn, train_input_hook = dataset.get_train_inputs( train_videos, train_captions) test_input_fn, test_input_hook = dataset.get_test_inputs( test_videos, test_captions) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=params.train_steps, min_eval_frequency=params.min_eval_frequency, train_monitors=[ train_input_hook, hook.print_variables( variables=["Train_Data/caption_0", "train/pred_0"], vocab=vocab, every_n_iter=params.check_hook_n_iter) ], eval_hooks=[ test_input_hook, hook.print_variables( variables=["Test_Data/caption_0", "train/pred_0"], vocab=vocab, every_n_iter=params.check_hook_n_iter) ]) return experiment
def _make_estimator(params): print("Params are recovered: {}".format(params)) run_config = tf.contrib.learn.RunConfig( model_dir=FLAGS.logdir, session_config=tf.ConfigProto(device_count={"GPU": 0})) if params.model == "seq2seq": model = seq2seq_model.Seq2Seq() elif params.model == "s2vt": model = s2vt_model.S2VT() return tf.estimator.Estimator(model_fn=model.model_fn, model_dir=FLAGS.logdir, params=params, config=run_config)
def create_model(session): model = seq2seq_model.Seq2Seq( FLAGS.enc_vocabulary_size, FLAGS.dec_vocabulary_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, ) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("reading model parameters from training directory") model.saver.restore(session, ckpt.model_checkpoint_path) else: print("creating model in the training directory") session.run(tf.global_variables_initializer()) return model
def main(args): episodes = split_data(args.data) episodes = episodes[:len(episodes) // 30] # for debug valid_rate = 0.15 episodes = np.array(episodes, dtype=object) valid_num = int(valid_rate * len(episodes)) valid_episodes = episodes[:valid_num] episodes = episodes[valid_num:] vocab2index, index2vocab = build_vocab(episodes, args.embedding, embedding_dim) embedding_weight, embedding_dim = load_embedding(args.embedding, vocab2index, index2vocab) episodes = episodes[:len(episodes) // 30] # for debug valid_rate = 0.15 episodes = np.array(episodes, dtype=object) valid_num = int(valid_rate * len(episodes)) valid_episodes = episodes[:valid_num] episodes = episodes[valid_num:] batch_size = args.batch_size save_round = 1 date = datetime.datetime.now().strftime("%d-%H-%M") save_path = 'model/model_{}'.format(date) print('save_path = {}'.format(save_path)) if not os.path.exists(save_path): os.makedirs(save_path, exist_ok=True) with open(os.path.join(save_path, 'vocab.pickle'), 'wb') as f: pickle.dump({ 'vocab2index': vocab2index, 'index2vocab': index2vocab }, f) log_file = codecs.open(os.path.join(save_path, 'log'), 'w') embedding_weight = torch.Tensor(embedding_weight) model = seq2seq_model.Seq2Seq({ 'num_embeddings': len(index2vocab), 'embedding_dim': embedding_dim, 'embdding_weight': embedding_weight, "rnn_class": torch.nn.GRU, 'hidden_size': 128, 'num_layers': 2, 'dropout': 0.5, 'bidirectional': True, "history_size": 256 * 2, 'persona_size': embedding_dim }).cuda() criterion = torch.nn.CrossEntropyLoss(ignore_index=idx_PAD).cuda() optimizer = optim.Adam(model.parameters()) part_num = 2 part_size = len(episodes) // part_num + 1 for e in range(100): for p in range(part_num): loss = one_epoch(e, episodes[p * part_size:(p + 1) * part_size], model, criterion, optimizer, batch_size, train=True) print('episodes = {}, training_loss = {}'.format(e, loss)) print('episodes = {}, training_loss = {}'.format(e, loss), file=log_file) loss = one_epoch(e, valid_episodes, model, criterion, optimizer, batch_size, train=False) print('episodes = {}, valid_loss = {}'.format(e, loss)) print('episodes = {}, valid_loss = {}'.format(e, loss), file=log_file) if e % save_round == save_round - 1: with open(os.path.join(save_path, 'model_{}'.format(e)), 'wb') as f: torch.save(model.state_dict(), f)
from datasets.cornell_corpus import data import data_utils import seq2seq_model # load data from pickle and npy files metadata, idx_q, idx_a = data.load_data(PATH='datasets/cornell_corpus/') (trainX, trainY), (testX, testY), (validX, validY) = data_utils.split_dataset(idx_q, idx_a) # parameters xseq_len = trainX.shape[-1] yseq_len = trainY.shape[-1] batch_size = 32 xvocab_size = len(metadata['idx2w']) yvocab_size = xvocab_size emb_dim = 1024 model = seq2seq_model.Seq2Seq(xseq_len=xseq_len, yseq_len=yseq_len, xvocab_size=xvocab_size, yvocab_size=yvocab_size, ckpt_path='ckpt/', emb_dim=emb_dim, num_layers=3) val_batch_gen = data_utils.rand_batch_gen(validX, validY, 32) train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size) sess = model.train(train_batch_gen, val_batch_gen)