def predict(): print("Predicting") tf.reset_default_graph() # tf Graph input x = tf.placeholder( dtype=tf.float32, shape=[None, config.input_window_size - 1, config.input_size], name="input_sequence") dec_in = tf.placeholder( dtype=tf.float32, shape=[None, config.test_output_window, config.input_size], name="decoder_input") # Define model prediction = models.seq2seq(x, dec_in, config, False) sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.6 sess_config.allow_soft_placement = True sess_config.log_device_placement = False sess = tf.Session(config=sess_config) # Restore latest model with open(checkpoint_dir + 'checkpoint') as f: content = f.readlines() saved_epoch = int(re.search(r'\d+', content[0]).group()) model_name = checkpoint_dir + "Epoch_" + str(saved_epoch) saver = tf.train.Saver() saver.restore(sess, model_name) print("Restored session from Epoch ", str(saved_epoch)) start = timeit.default_timer() y_predict = {} for act in actions: pred = sess.run(prediction, feed_dict={ x: x_test[act], dec_in: dec_in_test[act] }) pred = np.array(pred) pred = np.transpose(pred, [1, 0, 2]) y_predict[act] = pred # The following is for zero-velocity baseline # y_predict[act] = np.reshape(np.tile(dec_in_test[act][:,0],dec_in_test[act].shape[1]),dec_in_test[act].shape) stop = timeit.default_timer() print("Test Time: ", stop - start) return y_predict
testset = datas['test'] src_vocab, tgt_vocab = datas['dicts']['src'], datas['dicts']['tgt'] config.src_vocab = src_vocab.size() config.tgt_vocab = tgt_vocab.size() testloader = dataloader.get_loader(testset, batch_size=config.batch_size, shuffle=False, num_workers=0) if opt.pretrain: pretrain_embed = torch.load(config.emb_file) else: pretrain_embed = None #model print('building model...\n') if opt.model == 'seq2seq': model = models.seq2seq(config, src_vocab.size(), tgt_vocab.size(), use_cuda, pretrain=pretrain_embed, score_fn=opt.score) else: raise ValueError('Model not found!') if opt.restore: model.load_state_dict(checkpoints['model']) if use_cuda: model.cuda() if len(opt.gpus) > 1: model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) #optimizer if opt.restore: optim = checkpoints['optim'] else:
def load_model(): model = models.seq2seq(opt) if opt.load_model_path: model.load_state_dict(torch.load(opt.load_model_path)) print("Load Success!", opt.load_model_path) return model
option.this_expsdir = os.path.join(option.exps_dir, option.tag) if not os.path.exists(option.this_expsdir): os.makedirs(option.this_expsdir) option.ckpt_dir = os.path.join(option.this_expsdir, "ckpt") if not os.path.exists(option.ckpt_dir): os.makedirs(option.ckpt_dir) option.model_path = os.path.join(option.ckpt_dir, "model") option.save() print("Option saved.") data = Data(option) print('Data prepared') if option.model == 0: learner = seq2seq(option) elif option.model == 1: learner = seq2seq_luong(option) print('Model prepared') experiment = Experiment(option, learner, data) print('Experiment is ready') if not option.no_cuda: learner = learner.cuda() if option.load is not None: with open(option.load, 'rb') as f: learner.load_state_dict(torch.load(f)) if not option.eval: print("Start training...") experiment.train()
opts = model_pt['opts'] model = seq2seq_trans(opts) model.load_state_dict(model_pt['state_dict']) model = model.to(current_device) use_packed = False else: if current_device == 'cuda': model_pt = torch.load('./Models/chat_model_best_22.pt') else: model_pt = torch.load('./Models/chat_model_best_22.pt', map_location=torch.device('cpu')) opts = model_pt['opts'] model = seq2seq(opts) model.load_state_dict(model_pt['state_dict']) model = model.to(current_device) use_packed = True with open('./Data/chat_dict.pickle', 'rb') as handle: chat_dict = pickle.load(handle) persona = "your persona : i love cats and have two cats.\nyour persona : my favorite season is winter.\nyour persona : i won a gold medal in the __unk__ olympics.\nyour persona : i ' ve a hat collection of over 1000 hats." current_history = persona exchange = 1 if use_decoding == 'nucleus': print(f'\nNucleus Sample with threshold {threshold}\n') if use_decoding == 'ngram_block':
def train(): print("Training") # tf Graph input x = tf.placeholder(dtype=tf.float32, shape=[None, config.input_window_size - 1, config.input_size], name="input_sequence") y = tf.placeholder(dtype=tf.float32, shape=[None, config.output_window_size, config.input_size], name="raw_labels") dec_in = tf.placeholder(dtype=tf.float32, shape=[None, config.output_window_size, config.input_size], name="decoder_input") labels = tf.transpose(y, [1, 0, 2]) labels = tf.reshape(labels, [-1, config.input_size]) labels = tf.split(labels, config.output_window_size, axis=0, name='labels') # Define model prediction = models.seq2seq(x, dec_in, config, True) sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.6 sess_config.allow_soft_placement = True sess_config.log_device_placement = False sess = tf.Session(config=sess_config) # Define cost function loss = eval('loss_functions.' + config.loss + '_loss(prediction, labels, config)') # Add a summary for the loss train_loss = tf.summary.scalar('train_loss', loss) valid_loss = tf.summary.scalar('valid_loss', loss) # Defining training parameters optimizer = tf.train.AdamOptimizer(config.learning_rate) global_step = tf.Variable(0, name='global_step', trainable=False) # Gradient Clipping grads = tf.gradients(loss, tf.trainable_variables()) grads, _ = tf.clip_by_global_norm(grads, config.max_grad_norm) optimizer.apply_gradients(zip(grads, tf.trainable_variables())) train_op = optimizer.minimize(loss, global_step=global_step) saver = tf.train.Saver(max_to_keep=10) train_writer = tf.summary.FileWriter("./log", sess.graph) sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) # Obtain total training parameters total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print('Total training parameters: ' + str(total_parameters)) if not(os.path.exists(checkpoint_dir)): os.makedirs(checkpoint_dir) saved_epoch = 0 train_size = config.training_size valid_size = config.validation_size best_val_loss = float('inf') if config.restore & os.path.exists(checkpoint_dir+'checkpoint'): with open(checkpoint_dir + 'checkpoint') as f: content = f.readlines() saved_epoch = int(re.search(r'\d+', content[0]).group()) model_name = checkpoint_dir + "Epoch_" + str(saved_epoch) saver.restore(sess, model_name) v_loss_mean = 0.0 for i in range(valid_size): batch_x, batch_dec_in, batch_y = data_utils.get_batch(config, test_set) v_loss, valid_summary = sess.run([loss, valid_loss], feed_dict={x: batch_x, y: batch_y, dec_in: batch_dec_in}) v_loss_mean = v_loss_mean*i/(i+1) + v_loss/(i+1) best_val_loss = v_loss_mean print("Restored session from Epoch ", str(saved_epoch)) print("Best Validation Loss: ", best_val_loss, "\n") print("________________________________________________________________") best_val_epoch = saved_epoch for j in range(saved_epoch, config.max_epoch): print("Epoch ", j+1) prog = Progbar(target=train_size) prog_valid = Progbar(target=valid_size) for i in range(train_size): batch_x, batch_dec_in, batch_y = data_utils.get_batch(config, train_set) current_cost, train_summary, _ = sess.run([loss, train_loss, train_op], feed_dict={x: batch_x, y: batch_y, dec_in: batch_dec_in}) train_writer.add_summary(train_summary, j*train_size+i) prog.update(i+1, [("Training Loss", current_cost)]) v_loss_mean = 0.0 for i in range(valid_size): batch_x, batch_dec_in, batch_y = data_utils.get_batch(config, test_set) v_loss, valid_summary = sess.run([loss, valid_loss], feed_dict={x: batch_x, y: batch_y, dec_in: batch_dec_in}) v_loss_mean = v_loss_mean*i/(i+1) + v_loss/(i+1) prog_valid.update(i + 1, [("Validation Loss", v_loss)]) train_writer.add_summary(valid_summary, j*valid_size+i) if v_loss_mean < best_val_loss: model_name = checkpoint_dir + "Epoch_" + str(j+1) best_val_loss = v_loss_mean best_val_epoch = j+1 saver.save(sess, model_name) print("Current Best Epoch: ", best_val_epoch, ", Best Validation Loss: ", best_val_loss, "\n") if j+1 - best_val_epoch > config.early_stop: break
#coding:utf-8 from models import seq2seq from datautils import tianya data = tianya() data.load('../data/data.logits', '../data/vocab.json') data.pad(30) data.build_trainSet() model = seq2seq(emb_dim=100, vocab_size=data.vocab_size, encoder_size=data.pad_size, decoder_size=data.pad_size) model.train(data, 128, 100, 50)
def __init__(self, config, module): super(s2sae, self).__init__() self.s2s = models.seq2seq(config) self.ae = getattr(models, module)(config, use_attention=False, encoder=self.s2s.encoder)
question_padded = pickle.load(f, encoding = 'latin1') with open('labels.pkl', 'rb') as f: answer_padded = pickle.load(f, encoding = 'latin1') tic = time.time() t = c = 0 batch_time = 0 epoch = config.epoch step = config.step batch_size = config.batch_size dict_size = config.dictionary_length loss_array = [] check_point_loss = 9999 model = models.seq2seq() if os.path.isfile("model_weights.h5"): try: model.load_weights("model_weights.h5") print("Loaded previously trained weights!") except: pass try: epoch = int(sys.argv[1]) except: pass loss = [] print("Number of Batches for each epoch: {}".format(int(len(question_padded)/step)))