def sampling(): batchloader = BatchLoader(with_label=True) # gpu memory sess_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( # allow_growth = True )) with tf.Graph().as_default(): with tf.Session(config=sess_conf) as sess: with tf.variable_scope("VAE"): vae_restored = VAE[FLAGS.VAE_NAME](batchloader, is_training=False, ru=False) saver = tf.train.Saver() saver.restore(sess, MODEL_DIR + "/model50.ckpt") itr = SAMPLE_NUM // FLAGS.BATCH_SIZE res = SAMPLE_NUM - itr * FLAGS.BATCH_SIZE # random output generated_texts = [] for i in range(itr + 1): z = np.random.normal( loc=0.0, scale=1.0, size=[FLAGS.BATCH_SIZE, FLAGS.LATENT_VARIABLE_SIZE]) sample_logits = sess.run( vae_restored.logits, feed_dict={vae_restored.latent_variables: z}) if i == itr: sample_num = res else: sample_num = FLAGS.BATCH_SIZE sample_texts = batchloader.logits2str(logits=sample_logits, sample_num=sample_num) generated_texts.extend(sample_texts) for i in range(SAMPLE_NUM): log_and_print(SAVE_FILE, generated_texts[i])
def test(): batchloader = BatchLoader() # gpu memory sess_conf = tf.ConfigProto( gpu_options = tf.GPUOptions( # per_process_gpu_memory_fraction=0.4, # allow_growth = True ) ) with tf.Graph().as_default(): with tf.Session(config=sess_conf) as sess: with tf.variable_scope("Model"): model_restored = Model(batchloader, is_training=False) saver = tf.train.Saver() saver.restore(sess, MODEL_DIR + "/model10.ckpt") with open(FLAGS.TEST_PATH, "rb") as f: test_data = pkl.load(f) sample_num = len(test_data) log_and_print(SAVE_FILE, "sample_num: %d" % (FLAGS.BATCH_SIZE * (sample_num//FLAGS.BATCH_SIZE))) with open(FLAGS.TEST_LABEL_PATH, "rb") as f: test_label = pkl.load(f) accuracy_save = [] for i in range(sample_num//FLAGS.BATCH_SIZE): tmp_data = test_data[FLAGS.BATCH_SIZE*i:(FLAGS.BATCH_SIZE*(i+1))] tmp_label = test_label[FLAGS.BATCH_SIZE*i:FLAGS.BATCH_SIZE*(i+1)] accuracy = sess.run(model_restored.accuracy, feed_dict={model_restored.input_text: tmp_data, model_restored.label: tmp_label}) accuracy_save.append(accuracy) log_and_print(SAVE_FILE, "accuracy: %f" % np.average(accuracy_save))
params['resume'] = resume_save params['path'] = resume_path params['name'] = resume_name # transform to expected format params['trainfolds'] = list(params['trainfolds']) params['scenes_trainvalid'] = list(params['scenes_trainvalid']) if params['validfold'] == -1: params['scenes_test'] = list(params['scenes_test']) print('trainfolds: {}, validfold: {}'.format(params['trainfolds'], params['validfold'])) batchloader_training = BatchLoader( params=params, mode='train', fold_nbs=params['trainfolds'], scene_nbs=params['scenes_trainvalid'], batchsize=params['batchsize'], seed=params['seed'] if params['seed'] != -1 else random.randint( 1, 1000)) # seed for training only if params['validfold'] != -1: # validation set batchloader_validation = BatchLoader( params=params, mode='val', fold_nbs=[params['validfold']], scene_nbs=params['scenes_trainvalid'], batchsize=params['batchsize']) # no seed for validation else: # test set
def train(model_path=TRAIN_RESUME): word_to_num, num_to_word, paired_data = [], [], [] if model_path != "": print("Loading dictionary") word_to_num, num_to_word, paired_data = data_parse_main(no_save=True) with open(os.path.join(MODEL_PATH, "dict_pickle"), "rb") as f: dict_pickle = pickle.load(f) word_to_num, num_to_word = dict_pickle["word_to_num"], dict_pickle[ "num_to_word"] else: print("creating dictionary from scratch") word_to_num, num_to_word, paired_data = data_parse_main(no_save=False) vocab_size = len(word_to_num) train_loader = BatchLoader(word_to_num, num_to_word, paired_data) train_examples = len(train_loader) chatbot_model = EncoderDecoder(vocab_size).to(DEVICE) if model_path != "": print("loading model from path {}".format(model_path)) chatbot_model.load_state_dict(torch.load(model_path)) else: print("Creating model from scratch") critirion = nn.CrossEntropyLoss(ignore_index=PAD_TOKEN) optimizer = torch.optim.AdamW(chatbot_model.parameters(), lr=LEARNING_RATE) best_loss = np.inf best_bleu = -np.inf for epoch in range(NUM_EPOCHS): epoch_bleu = [] epoch_loss = [] chatbot_model.train() for i in tqdm(range(train_examples)): optimizer.zero_grad() input_sentences, input_lengths, output_sentences, output_lengths = train_loader.get_batch( ) input_sentences = input_sentences.to(DEVICE) input_lengths = input_lengths.to(DEVICE) output_sentences = output_sentences.to(DEVICE) output_lengths = output_lengths.to(DEVICE) batch_num = input_sentences.size(0) time_steps = input_sentences.size(1) preds = chatbot_model(input_sentences, input_lengths, target=output_sentences, is_train=True) ground_truth_sent = decode(num_to_word, output_sentences[:, 1:], do_argmax=False) predictions = decode(num_to_word, preds) bleu_sc = calculate_bleu_score(ground_truth_sent, predictions) output_sentences = output_sentences[:, 1:].contiguous().view(-1) loss = critirion(preds.view(-1, preds.shape[-1]), output_sentences) epoch_loss.append(loss.item()) epoch_bleu.append(bleu_sc) loss.backward() torch.nn.utils.clip_grad_norm_(chatbot_model.parameters(), 50) optimizer.step() chatbot_model.eval() input_tensor, strings_to_validate, lengths = encode(word_to_num) input_tensor = input_tensor.to(DEVICE) lengths = lengths.to(DEVICE) output_tensor = None with torch.no_grad(): output_tensor = chatbot_model(input_tensor, lengths, target=None, is_train=False) all_outputs = decode(num_to_word, output_tensor) print_all(strings_to_validate, all_outputs) chatbot_model.train() total_epoch_loss = np.mean(epoch_loss) total_epoch_bleu = np.mean(epoch_bleu) print("Bleu Score is : {}\nLoss is : {}".format( total_epoch_bleu, total_epoch_loss)) if total_epoch_loss < best_loss: best_loss = total_epoch_loss if total_epoch_bleu > best_bleu: best_bleu = total_epoch_bleu print("bleu score increased saving model") model_path = os.path.join( MODEL_PATH, "epoch_{}_loss_{:.2f}_bleu_score_{:.2f}.pt".format( epoch, total_epoch_loss, total_epoch_bleu)) torch.save(chatbot_model.state_dict(), model_path) else: print("bleu score not increased so not saving model") print( "\nBleu Score for the epoch is {} and the best bleu score is {}" .format(total_epoch_bleu, best_bleu))
loss = tf.reduce_sum(loss, 1, keep_dims=True) loss = tf.reduce_sum(-tf.log(loss)) # other branch of graph for predictions max_indices = tf.argmax(pi_k, 1) train_op = tf.train.RMSPropOptimizer(learning_rate).minimize(loss) # launch session sess = tf.Session() sess.run(tf.global_variables_initializer()) # training print('starting training...') batchLoader = BatchLoader(data,batch_size) for epoch in range(epochs): isLastBatch = False i = 0 while not isLastBatch: i += 1 inputs, targets, isLastBatch = batchLoader.nextRNNBatch() _, cost = sess.run([train_op, loss],{x: inputs, y: targets}) print(' epoch = ' + str(epoch) + ', i = ' + str(i) + ' , loss = ' + str(cost*N/batch_size)) print('sampling from the model....') mu_i, maxima, pi_i, sigma_i = sess.run([mu_k, max_indices, pi_k, sigma_k], {x: data}) maxima = np.array(maxima) mu_i = np.array(mu_i) pi_i = np.array(pi_i)
def main(): os.mkdir(FLAGS.LOG_DIR) os.mkdir(FLAGS.LOG_DIR + "/model") log_file = FLAGS.LOG_DIR + "/log.txt" shutil.copyfile("config.py", FLAGS.LOG_DIR + "/config.py") # gpu memory sess_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( # per_process_gpu_memory_fraction=0.4, # allow_growth = True )) with tf.Graph().as_default(): with tf.Session(config=sess_conf) as sess: batchloader = BatchLoader() with tf.variable_scope("Model"): model_train = Model(batchloader, is_training=True) with tf.variable_scope("Model", reuse=True): model_val = Model(batchloader, is_training=False) saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.LOG_DIR, sess.graph) sess.run(tf.global_variables_initializer()) log_and_print(log_file, "start training") loss_log = [] accuracy_log = [] lr = FLAGS.LEARNING_RATE step = 0 for epoch in range(FLAGS.EPOCH): log_and_print(log_file, "epoch %d" % (epoch + 1)) if epoch >= FLAGS.LR_DECAY_START: lr *= 0.95 for batch in range(FLAGS.BATCHES_PER_EPOCH): step += 1 input_text, label = batchloader.next_batch( FLAGS.BATCH_SIZE, "train") feed_dict = { model_train.input_text: input_text, model_train.label: label, model_train.lr: lr } loss, accuracy, merged_summary, _ \ = sess.run([model_train.loss, \ model_train.accuracy, \ model_train.merged_summary, \ model_train.train_op], feed_dict = feed_dict) loss_log.append(loss) accuracy_log.append(accuracy) summary_writer.add_summary(merged_summary, step) # log if (batch % 100 == 99): log_and_print(log_file, "epoch %d batch %d" % \ ((epoch+1), (batch+1)), br=False) ave_loss = np.average(loss_log) log_and_print(log_file, "\ttrain loss: %f" % ave_loss, br=False) ave_acc = np.average(accuracy_log) log_and_print(log_file, "\ttrain accuracy: %f" % ave_acc, br=False) loss_log = [] accuracy_log = [] # valid output input_text, label = batchloader.next_batch( FLAGS.BATCH_SIZE, "valid") feed_dict = { model_val.input_text: input_text, model_val.label: label } loss, accuracy, merged_summary \ = sess.run([model_val.loss, \ model_val.accuracy, \ model_val.merged_summary], feed_dict = feed_dict) log_and_print(log_file, "\tval loss: %f" % loss, br=False) log_and_print(log_file, "\tval accuracy: %f" % accuracy) summary_writer.add_summary(merged_summary, step) # save model save_path = saver.save( sess, FLAGS.LOG_DIR + ("/model/model%d.ckpt" % (epoch + 1))) log_and_print(log_file, "Model saved in file %s" % save_path)
def main(): os.mkdir(FLAGS.LOG_DIR) os.mkdir(FLAGS.LOG_DIR + "/model") log_file = FLAGS.LOG_DIR + "/log.txt" shutil.copyfile("config.py", FLAGS.LOG_DIR + "/config.py") shutil.copyfile("README.md", FLAGS.LOG_DIR + "/README.md") # gpu memory sess_conf = tf.ConfigProto( gpu_options = tf.GPUOptions( # allow_growth = True ) ) with tf.Graph().as_default(): with tf.Session(config=sess_conf) as sess: batchloader = BatchLoader(with_label=False) with tf.variable_scope("VAE"): vae = VAE[FLAGS.VAE_NAME](batchloader, is_training=True, ru=False) with tf.variable_scope("VAE", reuse=True): vae_test = VAE[FLAGS.VAE_NAME](batchloader, is_training=False, ru=True) saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.LOG_DIR, sess.graph) sess.run(tf.global_variables_initializer()) log_and_print(log_file, "start training") loss_sum = [] reconst_loss_sum = [] kld_sum = [] lr = FLAGS.LEARNING_RATE step = 0 for epoch in range(FLAGS.EPOCH): log_and_print(log_file, "epoch %d" % (epoch+1)) if epoch >= FLAGS.LR_DECAY_START: lr *= 0.95 for batch in range(FLAGS.BATCHES_PER_EPOCH): step += 1 kld_weight = (math.tanh((step - 3500)/1000) + 1) / 2 encoder_input, decoder_input, target = \ batchloader.next_batch(FLAGS.BATCH_SIZE, "train") feed_dict = {vae.encoder_input: encoder_input, vae.decoder_input: decoder_input, vae.target: target, vae.kld_weight: kld_weight, vae.step: step, vae.lr: lr} logits, loss, reconst_loss, kld, merged_summary, _ \ = sess.run([vae.logits, vae.loss, vae.reconst_loss, vae.kld, vae.merged_summary, vae.train_op], feed_dict = feed_dict) reconst_loss_sum.append(reconst_loss) kld_sum.append(kld) loss_sum.append(loss) summary_writer.add_summary(merged_summary, step) if(batch % 100 == 99): log_and_print(log_file, "epoch %d batch %d" % \ ((epoch+1), (batch+1)), br=False) ave_loss = np.average(loss_sum) log_and_print(log_file, "\tloss: %f" % ave_loss, br=False) ave_rnnloss = np.average(reconst_loss_sum) log_and_print(log_file, "\treconst_loss: %f" % ave_rnnloss, br=False) ave_kld = np.average(kld_sum) log_and_print(log_file, "\tkld %f" % ave_kld, br=False) loss_sum = [] reconst_loss_sum = [] kld_sum = [] # train input, output # output input and logits sample_train_input, sample_train_input_list \ = sess.run([vae.encoder_input, vae.encoder_input_list], feed_dict = feed_dict) encoder_input_texts = batchloader.logits2str(sample_train_input_list, 1, onehot=False, numpy=True) log_and_print(log_file, "\ttrain input: %s" % encoder_input_texts[0]) sample_train_outputs = batchloader.logits2str(logits, 1) log_and_print(log_file, "\ttrain output: %s" % sample_train_outputs[0]) # validation output sample_input, _, sample_target = batchloader.next_batch(FLAGS.BATCH_SIZE, "test") sample_input_list, sample_latent_variables = \ sess.run([vae_test.encoder_input_list, vae_test.encoder.latent_variables], feed_dict = {vae_test.encoder_input: sample_input}) sample_logits, valid_loss, merged_summary = \ sess.run([vae_test.logits, vae_test.reconst_loss, vae_test.merged_summary], feed_dict = {vae_test.target: sample_target, vae_test.latent_variables: sample_latent_variables, vae_test.kld_weight: kld_weight}) log_and_print(log_file, "\tvalid loss: %f" % valid_loss) sample_input_texts = batchloader.logits2str(sample_input_list, 1, onehot=False, numpy=True) sample_output_texts = batchloader.logits2str(sample_logits, 1) log_and_print(log_file, "\tsample input: %s" % sample_input_texts[0]) log_and_print(log_file, "\tsample output: %s" % sample_output_texts[0]) summary_writer.add_summary(merged_summary, step) # save model save_path = saver.save(sess, FLAGS.LOG_DIR + ("/model/model%d.ckpt" % (epoch+1))) log_and_print(log_file, "Model saved in file %s" % save_path)