def collate_fn(self, batch): r""" Perform preprocessing and create a final data batch: 1. PAD sequences with the longest sequence in the batch 2. Convert Audio signal to Spectrograms. 3. PAD sequences that can be divided by r. 4. Convert Numpy to Torch tensors. """ # Puts each data field into a tensor with outer dimension batch size if isinstance(batch[0], collections.Mapping): keys = list() wav = [d['wav'] for d in batch] item_idxs = [d['item_idx'] for d in batch] text = [d['text'] for d in batch] text_lenghts = np.array([len(x) for x in text]) max_text_len = np.max(text_lenghts) linear = [self.ap.spectrogram(w).astype('float32') for w in wav] mel = [self.ap.melspectrogram(w).astype('float32') for w in wav] mel_lengths = [m.shape[1] + 1 for m in mel] # +1 for zero-frame # compute 'stop token' targets stop_targets = [ np.array([0.] * (mel_len - 1)) for mel_len in mel_lengths ] # PAD stop targets stop_targets = prepare_stop_target(stop_targets, self.outputs_per_step) # PAD sequences with largest length of the batch text = prepare_data(text).astype(np.int32) wav = prepare_data(wav) # PAD features with largest length + a zero frame linear = prepare_tensor(linear, self.outputs_per_step) mel = prepare_tensor(mel, self.outputs_per_step) assert mel.shape[2] == linear.shape[2] timesteps = mel.shape[2] # B x T x D linear = linear.transpose(0, 2, 1) mel = mel.transpose(0, 2, 1) # convert things to pytorch text_lenghts = torch.LongTensor(text_lenghts) text = torch.LongTensor(text) linear = torch.FloatTensor(linear) mel = torch.FloatTensor(mel) mel_lengths = torch.LongTensor(mel_lengths) stop_targets = torch.FloatTensor(stop_targets) return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[ 0] raise TypeError(("batch must contain tensors, numbers, dicts or lists;\ found {}".format(type(batch[0]))))
def create_speech(m, s, CONFIG, use_cuda, ap): text_cleaner = [CONFIG.text_cleaner] texts = [np.asarray(text_to_sequence(text, text_cleaner), dtype=np.int32) for text in s] texts = prepare_data(texts).astype(np.int32) texts = torch.LongTensor(texts) if use_cuda: texts = texts.cuda() mel_out, linear_outs, alignments, stop_tokens = m.forward(texts.long()) linear_outs = [linear_out.data.cpu().numpy() for linear_out in linear_outs] alignments = [alignment_.cpu().data.numpy() for alignment_ in alignments] specs = [ap._denormalize(linear_out) for linear_out in linear_outs] wavs = [ap.inv_spectrogram(linear_out.T) for linear_out in linear_outs] # wav = wav[:ap.find_endpoint(wav)] out = io.BytesIO() # ap.save_wav(wav, out) return wavs, alignments, specs, stop_tokens
length: [len(input_sent_vect)], keep_rate: 1.0} # for the first decoder step, the state is None if state is not None: feed.update({in_state: state}) index, state = sess.run([sample, out_state], feed) sentence += [data_dict.idx2word[idx] for idx in index] print(' '.join([word for word in sentence if word not in ['<EOS>', '<PAD>', '<BOS>']])) if __name__ == "__main__": if params['input'] == 'GOT': # GOT corpus corpus_path = "/home/luoyy/datasets_small/got" data_raw = data_.got_read(corpus_path) data, labels_arr, _, data_dict = data_.prepare_data(data_raw, params_c) vocab_size = data_dict.vocab_size print("Most common words : {}", [data_dict.idx2word[i] for i in range(vocab_size - 1, vocab_size - 7, -1)]) del(data_raw) elif params['input'] == 'PTB': # data in form [data, labels] train_data_raw, valid_data_raw, test_data_raw = data_.ptb_read('./PTB_DATA/data') # data in form [data, labels] train_data_raw, valid_data_raw, test_data_raw = data_.ptb_read( './PTB_DATA/data') data, labels_arr, _, data_dict = data_.prepare_data( train_data_raw, params_c) with tf.Graph().as_default() as graph: inputs = tf.placeholder(shape=[None, None], dtype=tf.int32) with tf.device("/cpu:0"): embedding = tf.get_variable(
import xgboost as xgb from sklearn.utils import shuffle from utils.data import prepare_data from utils.gini import eval_gini, gini_xgb import numpy as np import pandas as pd import xgboost as xgb import time data_path = './input' train = pd.read_csv(data_path + '/train.csv') test = pd.read_csv(data_path + '/test.csv') prep = prepare_data(train, test) train, targets, test = prep(True, False) X, y = train.as_matrix()[:, 1:], targets.as_matrix() X, y = shuffle(X, y) cutoff = int(len(X) * 0.9) train_X, train_y = X[:cutoff], y[:cutoff] X_test, y_test = X[cutoff:], y[cutoff:] X_sub = test.as_matrix()[:, 1:] del X, y, train, targets, test param = { 'max_depth': 5, 'objective': 'binary:logistic', 'subsample': 0.8,
def main(params): if params.input_ == 'PTB': # data in form [data, labels] train_data_raw, train_label_raw = data_.ptb_read( './DATA/parallel_data_10k/') word_data, encoder_word_data, word_labels_arr, word_embed_arr, word_data_dict = data_.prepare_data( train_data_raw, train_label_raw, params, './DATA/parallel_data_10k/') train_label_raw, valid_label_raw, test_label_raw = label_data_.ptb_read( './DATA/parallel_data_10k/') label_data, label_labels_arr, label_embed_arr, label_data_dict = label_data_.prepare_data( train_label_raw, params) with tf.Graph().as_default() as graph: label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="lable_inputs") word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="word_inputs") d_word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="d_word_inputs") d_label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="d_label_inputs") d_word_labels = tf.placeholder(shape=[None, None], dtype=tf.int32, name="d_word_labels") d_label_labels = tf.placeholder(shape=[None, None], dtype=tf.int32, name="d_label_labels") with tf.device("/cpu:0"): if not params.pre_trained_embed: word_embedding = tf.get_variable( "word_embedding", [data_dict.vocab_size, params.embed_size], dtype=tf.float64) vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs) else: # [data_dict.vocab_size, params.embed_size] word_embedding = tf.Variable( word_embed_arr, trainable=params.fine_tune_embed, name="word_embedding", dtype=tf.float64 ) # creates a variable that can be used as a tensor vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs, name="word_lookup") label_embedding = tf.Variable( label_embed_arr, trainable=params.fine_tune_embed, name="label_embedding", dtype=tf.float64 ) # creates a variable that can be used as a tensor label_inputs_1 = tf.nn.embedding_lookup(label_embedding, label_inputs, name="label_lookup") # inputs = tf.unstack(inputs, num=num_steps, axis=1) sizes = word_data_dict.sizes word_vocab_size = max(sizes[1], sizes[2], sizes[0]) label_vocab_size = label_data_dict.vocab_size seq_length = tf.placeholder_with_default([0.0], shape=[None]) d_seq_length = tf.placeholder(shape=[None], dtype=tf.float64) # qz = q_net(word_inputs, seq_length, params.batch_size) Zsent_distribution, zsent_sample, Zglobal_distribition, zglobal_sample = encoder( vect_inputs, label_inputs_1, seq_length, params.batch_size) word_logits, label_logits, Zsent_dec_distribution, Zglobal_dec_distribution, _, _, _ = decoder( zglobal_sample, d_word_inputs, d_label_inputs, seq_length, params.batch_size, label_embedding, word_embedding, word_vocab_size, label_vocab_size) neg_kld_zsent = -1 * tf.reduce_mean( tf.reduce_sum( kld(Zsent_distribution[0], Zsent_distribution[1], Zsent_dec_distribution[0], Zsent_dec_distribution[1]), axis=1)) neg_kld_zglobal = -1 * tf.reduce_mean( tf.reduce_sum( kld(Zglobal_distribition[0], Zglobal_distribition[1], Zglobal_dec_distribution[0], Zglobal_dec_distribution[1]), axis=1)) # label reconstruction loss d_label_labels_flat = tf.reshape(d_label_labels, [-1]) l_cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=label_logits, labels=d_label_labels_flat) l_mask_labels = tf.sign(tf.cast(d_label_labels_flat, dtype=tf.float64)) l_masked_losses = l_mask_labels * l_cross_entr # reshape again l_masked_losses = tf.reshape(l_masked_losses, tf.shape(d_label_labels)) l_mean_loss_by_example = tf.reduce_sum( l_masked_losses, reduction_indices=1) / d_seq_length label_rec_loss = tf.reduce_mean(l_mean_loss_by_example) label_perplexity = tf.exp(label_rec_loss) # Word reconstruction loss # print(word_logits.shape) d_word_labels_flat = tf.reshape(d_word_labels, [-1]) print(d_word_labels_flat.shape) w_cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=word_logits, labels=d_word_labels_flat) w_mask_labels = tf.sign(tf.cast(d_word_labels_flat, dtype=tf.float64)) w_masked_losses_1 = w_mask_labels * w_cross_entr w_masked_losses = tf.reshape(w_masked_losses_1, tf.shape(d_word_labels)) w_mean_loss_by_example = tf.reduce_sum( w_masked_losses, reduction_indices=1) / d_seq_length word_rec_loss = tf.reduce_mean(w_mean_loss_by_example) word_perplexity = tf.exp(word_rec_loss) rec_loss = word_rec_loss + label_rec_loss #anneal = tf.placeholder(tf.float64) # annealing=tf.to_float(anneal) #annealing = (tf.tanh((tf.to_float(anneal) - 5000)/1800) + 1)/2 # overall loss reconstruction loss - kl_regularization #kld_loss = -1*(neg_kld_zglobal + neg_kld_zsent) #kl_term_weight = tf.multiply( # tf.cast(annealing, dtype=tf.float64), tf.cast(kld_loss, dtype=tf.float64)) alpha = tf.placeholder(tf.float64) alpha_val = tf.to_float(alpha) beta = tf.placeholder(tf.float64) beta_val = tf.to_float(beta) kl_term_weight = tf.multiply(tf.cast(alpha_val, dtype=tf.float64), tf.cast(neg_kld_zsent, dtype=tf.float64)) \ + tf.multiply(tf.cast(beta_val, dtype=tf.float64), tf.cast(neg_kld_zglobal, dtype=tf.float64)) total_lower_bound = rec_loss - kl_term_weight gradients = tf.gradients(total_lower_bound, tf.trainable_variables()) opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate) clipped_grad, _ = tf.clip_by_global_norm(gradients, 5) optimize = opt.apply_gradients( zip(clipped_grad, tf.trainable_variables())) saver = tf.train.Saver(max_to_keep=10) with tf.Session() as sess: print("*********") sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) # try: # path="./models_ckpts_"+params.name+"/vae_lstm_model-11900" # # print(path) # # chkp.print_tensors_in_checkpoint_file(path, tensor_name='', all_tensors=True) # saver.restore(sess,path ) # # saver.restore(sess, "./models_ckpts_1/vae_lstm_model-258600") # except: # print("-----exception occurred--------") # exit() # # traceback.print_exc() # print("Model Restored") total_parameters = 0 #print_vars("trainable variables") for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() print(shape, variable.name) # print(len(shape)) variable_parameters = 1 for dim in shape: print(dim) variable_parameters *= dim.value print(variable_parameters, total_parameters) total_parameters += variable_parameters print(total_parameters) # exit() if params.debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess) summary_writer = tf.summary.FileWriter(params.LOG_DIR, sess.graph) summary_writer.add_graph(sess.graph) #ptb_data = PTBInput(params.batch_size, train_data) num_iters = len(word_data) // params.batch_size cur_it = 0 iters, tlb_arr, wppl_arr, klw_arr, kld_zg_arr, kld_zs_arr = [], [], [], [], [], [] alpha_arr, beta_arr = [], [] for e in range(params.num_epochs): epoch_start_time = datetime.datetime.now() print("Epoch: {} started at: {}".format(e, epoch_start_time)) total_tlb = 0 total_wppl = 0 total_klw = 0 total_kld_zg = 0 total_kld_zs = 0 for it in tqdm(range(num_iters)): params.is_training = True sent_batch = word_data[it * params.batch_size:(it + 1) * params.batch_size] label_batch = label_data[it * params.batch_size:(it + 1) * params.batch_size] sent_dec_l_batch = word_labels_arr[it * params.batch_size:(it + 1) * params.batch_size] sent_l_batch = encoder_word_data[it * params.batch_size:(it + 1) * params.batch_size] label_l_batch = label_labels_arr[it * params.batch_size:(it + 1) * params.batch_size] # zero padding pad = len(max(sent_batch, key=len)) # not optimal!! length_ = np.array([len(sent) for sent in sent_batch ]).reshape(params.batch_size) # prepare encoder and decoder inputs to feed sent_batch = np.array([ sent + [0] * (pad - len(sent)) for sent in sent_batch ]) label_batch = np.array([ sent + [0] * (pad - len(sent)) for sent in label_batch ]) sent_dec_l_batch = np.array([ (sent + [0] * (pad - len(sent))) for sent in sent_dec_l_batch ]) sent_l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in sent_l_batch]) label_l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in label_l_batch]) alpha_v = beta_v = 0 if e > 2000: alpha_v = beta_v = float(e) / 5000 - 0.4 feed = { word_inputs: sent_l_batch, label_inputs: label_l_batch, d_word_inputs: sent_batch, d_label_inputs: label_batch, d_word_labels: sent_dec_l_batch, d_label_labels: label_l_batch, seq_length: length_, d_seq_length: length_, alpha: alpha_v, beta: beta_v } z1a, z1b, z3a, z3b, kzg, kzs, tlb, wppl, lppl, klw, o, alpha_, beta_ = sess.run( [ Zsent_distribution[0], Zsent_distribution[1], Zsent_dec_distribution[0], Zsent_dec_distribution[1], neg_kld_zglobal, neg_kld_zsent, total_lower_bound, word_perplexity, label_perplexity, kl_term_weight, optimize, alpha_val, beta_val ], feed_dict=feed) # print(c.shape) # print(d.shape) # print(c,d) # print(e,f) # print(d[69],d[119]) ##print('zsent', z1a, z1b) # print('zglobal',z2a,z2b) ##print('zsent dec', z3a, z3b) # if cur_it % 100 == 0 and cur_it != 0: # print("TotalLB after {} ({}) iterations (epoch): {} Neg_KLD_Zglobal: " # "{} Neg_KLD_Zsent: {}".format( # cur_it, e, tlb, kzg, kzs)) # print( # "Word Perplexity: {}, Label Perplexity: {}".format(wppl, lppl)) cur_it += 1 # iters.append(cur_it) # tlb_arr.append(tlb) # wppl_arr.append(wppl) total_tlb += tlb total_wppl += wppl total_klw += klw total_kld_zg += -kzg total_kld_zs += -kzs if cur_it % 100 == 0 and cur_it != 0: path_to_save = os.path.join(params.MODEL_DIR, "vae_lstm_model") # print(path_to_save) model_path_name = saver.save(sess, path_to_save, global_step=cur_it) # print(model_path_name) avg_tlb = total_tlb / num_iters avg_wppl = total_wppl / num_iters avg_klw = total_klw / num_iters avg_kld_zg = total_kld_zg / num_iters avg_kld_zs = total_kld_zs / num_iters iters.append(e) tlb_arr.append(avg_tlb) wppl_arr.append(avg_wppl) klw_arr.append(avg_klw) kld_zg_arr.append(avg_kld_zg) kld_zs_arr.append(avg_kld_zs) alpha_arr.append(alpha_) beta_arr.append(beta_) print("Time Taken:", datetime.datetime.now() - epoch_start_time) import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt plot_filename = "./plot_values_" + str(params.num_epochs) + ".txt" with open(plot_filename, 'w') as wf: _ = [wf.write(str(s) + ' ') for s in iters] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in tlb_arr] # wf.write('\n') # _ = [wf.write(str(s) + ' ') for s in wppl_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in klw_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in kld_zg_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in kld_zs_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in alpha_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in beta_arr] plt.plot(iters, tlb_arr, color='blue', label='Total lower bound') plt.title("Total Lower Bound vs Epochs") plt.xlabel('Epochs') plt.ylabel('Total Lower Bound') plt.savefig('./graph_elbo_epochs_' + str(params.num_epochs) + '.png') plt.clf() # plt.plot(iters, wppl_arr, color='red', label='Word Perplexity') # plt.title("Word Perplexity vs Epochs") # plt.xlabel('Epochs') # plt.ylabel('Word Perplexity') # plt.savefig('./graph_ppl_epochs_'+str(params.num_epochs)+'.png') # plt.legend(bbox_to_anchor=(1.05, 1), # loc=1, borderaxespad=0.) # plt.clf() plt.plot(iters, klw_arr, color='green', label='KL term Value') plt.title("KL Term Value vs Epochs") plt.xlabel('Epochs') plt.ylabel('KL term Value') plt.savefig('./graph_klw_epochs_' + str(params.num_epochs) + '.png') plt.clf() plt.plot(iters, kld_zg_arr, color='yellow', label='kld_zg') plt.title("KLD zg") plt.xlabel('Epochs') plt.ylabel('KLD zg term Value') plt.savefig('./graph_kld_zg_epochs_' + str(params.num_epochs) + '.png') plt.clf() plt.plot(iters, kld_zs_arr, color='black', label='kld_zs') plt.title("KLD zs") plt.xlabel('Epochs') plt.ylabel('KLD zs term Value') plt.savefig('./graph_kld_zs_epochs_' + str(params.num_epochs) + '.png') plt.clf() plt.plot(iters, alpha_arr, color='blue', label='alpha') plt.title("Alpha") plt.xlabel('Epochs') plt.ylabel('Alpha Value') plt.savefig('./graph_alpha_epochs_' + str(params.num_epochs) + '.png') plt.clf()
def main(params): if params.input == 'GOT': corpus_path = "/home/luoyy/datasets_small/got" data_raw = data_.got_read(corpus_path) data, labels_arr, embed_arr, data_dict = data_.prepare_data(data_raw, params) elif params.input == 'PTB': # data in form [data, labels] train_data_raw, valid_data_raw, test_data_raw = data_.ptb_read( './PTB_DATA/data') data, labels_arr, embed_arr, data_dict = data_.prepare_data( train_data_raw, params) with tf.Graph().as_default() as graph: inputs = tf.placeholder(shape=[None, None], dtype=tf.int32) d_inputs_ps = tf.placeholder(dtype=tf.int32, shape=[None, None]) labels = tf.placeholder(shape=[None, None], dtype=tf.int32) with tf.device("/cpu:0"): if not params.pre_trained_embed: embedding = tf.get_variable( "embedding", [data_dict.vocab_size, params.embed_size], dtype=tf.float32) vect_inputs = tf.nn.embedding_lookup(embedding, inputs) else: # [data_dict.vocab_size, params.embed_size] embedding = tf.Variable( embed_arr, trainable=params.fine_tune_embed, name="embedding", dtype=tf.float32) vect_inputs = tf.nn.embedding_lookup(embedding, inputs) # inputs = tf.unstack(inputs, num=num_steps, axis=1) vocab_size = data_dict.vocab_size seq_length = tf.placeholder_with_default([0.0], shape=[None]) d_seq_length = tf.placeholder(shape=[None], dtype=tf.float32) qz = q_net(vect_inputs, seq_length, params.batch_size) x_logits, _, _ = vae_lstm({'z': qz}, params.batch_size, d_seq_length, embedding, d_inputs_ps, vocab_size=vocab_size) # loss, masking <PAD> current_len = tf.placeholder_with_default(params.sent_max_size, shape=()) # tf.sequence_mask, tf.contrib.seq2seq.sequence_loss labels_flat = tf.reshape(labels, [-1]) cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=x_logits, labels=labels_flat) mask_labels = tf.sign(tf.to_float(labels_flat)) masked_losses = mask_labels * cross_entr # reshape again masked_losses = tf.reshape(masked_losses, tf.shape(labels)) mean_loss_by_example = tf.reduce_sum(masked_losses, reduction_indices=1) / d_seq_length rec_loss = tf.reduce_mean(mean_loss_by_example) perplexity = tf.exp(rec_loss) # kl divergence calculation kld = -0.5 * tf.reduce_mean( tf.reduce_sum( 1 + tf.log(tf.square(qz.distribution.std) + 0.0001) - tf.square(qz.distribution.mean) - tf.square(qz.distribution.std), 1)) tf.summary.scalar('kl_divergence', kld) # kld weight annealing anneal = tf.placeholder(tf.int32) annealing = (tf.tanh((tf.to_float(anneal) - 3500)/1000) + 1)/2 # overall loss reconstruction loss - kl_regularization lower_bound = rec_loss + tf.multiply( tf.to_float(annealing), tf.to_float(kld)) / 10 #lower_bound = rec_loss sm2 = [tf.summary.scalar('lower_bound', lower_bound), tf.summary.scalar('kld_coeff', annealing)] gradients = tf.gradients(lower_bound, tf.trainable_variables()) opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate) clipped_grad, _ = tf.clip_by_global_norm(gradients, 5) optimize = opt.apply_gradients(zip(clipped_grad, tf.trainable_variables())) #sample logits, states, smpl = vae_lstm({}, 1, d_seq_length, embedding, d_inputs_ps, vocab_size=vocab_size, gen_mode=True) init_state = states[0] fin_output = states[1] # merge summaries merged = tf.summary.merge_all() with tf.Session() as sess: sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) if params.debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess) summary_writer = tf.summary.FileWriter(params.LOG_DIR, sess.graph) summary_writer.add_graph(sess.graph) #ptb_data = PTBInput(params.batch_size, train_data) num_iters = len(data) // params.batch_size cur_it = 0 iters, kld_arr, coeff = [], [], [] for e in range(params.num_epochs): for it in range(num_iters): params.is_training = True batch = data[it * params.batch_size: (it + 1) * params.batch_size] l_batch = labels_arr[it * params.batch_size:(it + 1) * params.batch_size] # zero padding pad = len(max(batch, key=len)) # not optimal!! length_ = np.array([len(sent) for sent in batch]).reshape(params.batch_size) # prepare encoder and decoder inputs to feed batch = np.array([sent + [0] * (pad - len(sent)) for sent in batch]) l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in l_batch]) # encoder feed=[....<EOS>], decoder feed=[<BOS>....], labels=[.....<EOS>] feed = {inputs: l_batch, d_inputs_ps: batch, labels: l_batch, seq_length: length_, d_seq_length: length_, anneal: cur_it, current_len: pad} lb, _, kld_, ann_, r_loss, perplexity_ = sess.run([lower_bound, optimize, kld, annealing, rec_loss, perplexity], feed_dict=feed) cur_it += 1 iters.append(cur_it) kld_arr.append(kld_) coeff.append(ann_) if cur_it % 100 == 0 and cur_it != 0: print("VLB after {} ({}) iterations (epoch): {} KLD: " "{} Annealing Coeff: {} CE: {}".format( cur_it, e,lb, kld_, ann_, r_loss)) print("Perplexity: {}".format(perplexity_)) if cur_it % 150 == 0: if not params.beam_search: params.is_training = False online_inference(sess, data_dict, sample=smpl, seq=d_inputs_ps, in_state=init_state, out_state=fin_output, length=d_seq_length) else: gen_sentence = beam_search(sess, data_dict, states, smpl, (d_inputs_ps, d_seq_length), params, beam_size=params.beam_size) print(gen_sentence) if cur_it % 400 == 0 and cur_it!=0: # saver = tf.train.Saver() summary = sess.run(merged, feed_dict=feed) summary_writer.add_summary(summary) # saver.save(sess, os.path.join(params.LOG_DIR, "lstmlstm_model.ckpt"), cur_it) if params.visualise: if cur_it % 30000 == 0 and cur_it!=0: import matplotlib.pyplot as plt with open("./run_kld" + str(params.dec_keep_rate), 'w') as wf: _ = [wf.write(str(s) + ' ')for s in iters] wf.write('\n') _ = [wf.write(str(s) + ' ')for s in kld_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in coeff] plt.plot(iters, kld_arr, label='KLD') plt.xlabel('Iterations') plt.legend(bbox_to_anchor=(1.05, 1), loc=1, borderaxespad=0.) plt.show() plt.plot(iters, coeff, 'r--', label='annealing') plt.legend(bbox_to_anchor=(1.05, 1), loc=1, borderaxespad=0.) plt.show()
def main(params): if params.input == 'PTB': # data in form [data, labels] train_data_raw, train_label_raw = data_.ptb_read('./DATA/train_untrans_6k/') word_data, encoder_word_data,word_labels_arr, word_embed_arr, word_data_dict = data_.prepare_data(train_data_raw,train_label_raw, params,'./DATA/train_untrans_6k/') train_label_raw, valid_label_raw, test_label_raw = label_data_.ptb_read('./DATA/train_untrans_6k/') label_data, label_labels_arr, label_embed_arr, label_data_dict = label_data_.prepare_data(train_label_raw, params) with tf.Graph().as_default() as graph: label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="lable_inputs") word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="word_inputs") d_word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="d_word_inputs") d_label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="d_label_inputs") d_word_labels = tf.placeholder(shape=[None, None], dtype=tf.int32,name="d_word_labels") d_label_labels = tf.placeholder(shape=[None, None], dtype=tf.int32,name="d_label_labels") with tf.device("/cpu:0"): if not params.pre_trained_embed: word_embedding = tf.get_variable( "word_embedding", [data_dict.vocab_size, params.embed_size], dtype=tf.float64) vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs) else: # [data_dict.vocab_size, params.embed_size] word_embedding = tf.Variable( word_embed_arr, trainable=params.fine_tune_embed, name="word_embedding", dtype=tf.float64) #creates a variable that can be used as a tensor vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs,name="word_lookup") label_embedding = tf.Variable( label_embed_arr, trainable=params.fine_tune_embed, name="label_embedding", dtype=tf.float64) #creates a variable that can be used as a tensor label_inputs_1=tf.nn.embedding_lookup(label_embedding, label_inputs,name="label_lookup") # inputs = tf.unstack(inputs, num=num_steps, axis=1) sizes=word_data_dict.sizes word_vocab_size = max(sizes[1],sizes[2],sizes[0]) label_vocab_size=label_data_dict.vocab_size seq_length = tf.placeholder_with_default([0.0], shape=[None]) d_seq_length = tf.placeholder(shape=[None], dtype=tf.float64) # qz = q_net(word_inputs, seq_length, params.batch_size) zglobal_sample = tf.placeholder(dtype=tf.float64,shape=[None, params.latent_size]) zsent_sample=tf.placeholder(dtype=tf.float64,shape=[None,params.latent_size]) inp_logits=tf.placeholder(dtype=tf.float64,shape=[None,params.label_embed_size]) word_logits,label_logits,_,_,l_smpl,w_smpl,zs=decoder(zglobal_sample, d_word_inputs, d_label_inputs,seq_length,params.batch_size,label_embedding,word_embedding, word_vocab_size, label_vocab_size,gen_mode=True,zsent=zsent_sample,inp_logits=inp_logits) # word_logits,_,_, _,_,w_smpl,_=decoder(zglobal_sample, d_word_inputs, d_label_inputs,seq_length,params.batch_size,label_embedding,word_embedding, word_vocab_size, label_vocab_size,gen_mode_1=False,gen_mode_2=True,zsent=zsent_sample) saver = tf.train.Saver() with tf.Session() as sess: sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) print("here") try: path="./models_ckpts_"+params.name+"/vae_lstm_model-11900" # print(path) # chkp.print_tensors_in_checkpoint_file(path, tensor_name='', all_tensors=True) saver.restore(sess,path ) # saver.restore(sess, "./models_ckpts_1/vae_lstm_model-258600") except: print("-----exception occurred--------") exit() # traceback.print_exc() print("Model Restored") total_parameters = 0 #print_vars("trainable variables") for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() print(shape, variable.name) #print(len(shape)) variable_parameters = 1 for dim in shape: print(dim) variable_parameters *= dim.value print(variable_parameters) total_parameters += variable_parameters print(total_parameters) batch_size=1 # number_of_samples=params.number_of_samples number_of_samples=10000 same_context_sentences=1 sentence_file="./r_VACS_kl_10k.txt" labels_file="./r_VACS_kl_10k_labels.txt" f1=open(sentence_file,'w+') f2=open(labels_file,'w+') print("----------------------SENTENCES------------------------\n\n") for num in range(number_of_samples): params.is_training = False sentence=['<BOS>'] label_seq = ['3'] state = None input_sent_vect = [word_data_dict.word2idx[word] for word in sentence] # z = tf.random_normal(tf.shape([1, params.latent_size]), name='z',dtype=tf.float64).eval() z=np.random.normal(0,1,(1,params.latent_size)) ###initialising random variables for label decoder z_1=np.random.normal(0,1,(1,params.latent_size)) l_1=np.random.rand(1,params.label_embed_size) # print(input_sent_vect) # print(z) # print(z) for i in range(params.gen_length): # generate until <EOS> tag if "4" in label_seq: break input_label_vect = [label_data_dict.word2idx[word] for word in label_seq] feed = {d_label_inputs: np.array(input_label_vect).reshape([1, len(input_label_vect)]),zsent_sample: z_1,inp_logits:l_1, seq_length: [len(input_label_vect)],zglobal_sample:z, d_word_inputs: np.array(input_sent_vect).reshape([1, len(input_sent_vect)]) } # for the first decoder step, the state is None # if state is not None: # feed.update({in_state: state}) a,index = sess.run([label_logits,l_smpl], feed) # print(a,a.shape) if(i==0): logit_arr=np.array(a) else: logit_arr=np.concatenate((logit_arr,a)) # print(index) # exit() index=index[0] label_seq += [label_data_dict.idx2word[int(index)]] label_seq=[word for word in label_seq if word not in ['3','4']] label_out = ' '.join([w for w in label_seq]) print(label_out,len(label_seq)) # sizes=word_data_dict.sizes # print(logit_arr) print(logit_arr.shape) # exit() for num in range(same_context_sentences): print(num) i=0 z_sent_sample=sess.run(zs,feed) b1=sizes[0] b2=sizes[0]+sizes[1] b3=sizes[0]+sizes[1]+sizes[2] sentence=['<BOS>'] input_sent_vect = [word_data_dict.word2idx[word] for word in sentence] while(i<len(label_seq)): # for i in range(len(label_seq)): # generate until <EOS> tag input_sent_vect = [word_data_dict.word2idx[word] for word in sentence] feed = {d_label_inputs: np.array(input_label_vect).reshape([1, len(input_label_vect)]),zsent_sample: z_sent_sample,inp_logits:logit_arr[:i+1], seq_length: [len(input_sent_vect)],zglobal_sample:z, d_word_inputs: np.array(input_sent_vect).reshape([1, len(input_sent_vect)]) } tmp=np.array(input_sent_vect).reshape([1, len(input_sent_vect)]) # print(tmp, tmp.shape) # print(a,a.shape) w_logits= sess.run(word_logits, feed) # print(w_logits) if(label_seq[i]=='0'): w_logits=w_logits[0][:sizes[1]] w_probs=softmax(w_logits) # index_arr=np.argsort(np.array(w_probs)) index_arr=np.random.choice(len(w_probs),5,p=w_probs) index_arr=index_arr+b1 elif(label_seq[i]=='1'): w_logits=w_logits[0][:sizes[2]] w_probs=softmax(w_logits) # index_arr=np.argsort(np.array(w_probs)) index_arr=np.random.choice(len(w_probs),5,p=w_probs) index_arr=index_arr+b2 elif(label_seq[i]=='2'): w_logits=w_logits[0][:sizes[0]] w_probs=softmax(w_logits) # index_arr=np.argsort(np.array(w_probs)) index_arr=np.random.choice(len(w_probs),5,p=w_probs) for j in index_arr: index=j word=word_data_dict.idx2word[int(index)] if(word!="<EOS>" and word!="<BOS>"): i+=1 # print(i,index) # print(word) sentence += [word] break # print(w_logits) # print(w_logits.shape) # print(min(w_logits[0]),max(w_logits[0])) # exit() # print(label_seq[i]) # if(label_seq[i]=='0'): # # print(label_seq[i]) # req_logits=w_logits[0][sizes[0]:sizes[0]+sizes[1]] # req_probs=softmax(req_logits) # req_index=np.argmax(np.array(req_probs)) # index=sizes[0]+req_index # elif (label_seq[i]=='1'): # # print(label_seq[i]) # req_logits=w_logits[0][(sizes[0]+sizes[1]):(sizes[0]+sizes[1]+sizes[2])] # req_probs=softmax(req_logits) # req_index=np.argmax(np.array(req_probs)) # index=sizes[0]+sizes[1]+req_index # elif (label_seq[i]=='2'): # # print(label_seq[i]) # req_logits=w_logits[0][:sizes[0]] # req_probs=softmax(req_logits) # req_index=np.argmax(np.array(req_probs)) # index=req_index # # print(label_seq[i],i,index) # print(b,b.shape) # print(index) sentence=[word for word in sentence if word not in ['<BOS>','<EOS>']] sentence_cm = ' '.join([w for w in sentence]) print(sentence_cm,len(sentence)) print("\n") f1.write(sentence_cm) f1.write("\n") f2.write(label_out) f2.write("\n") print("-----------------------------------------\n") f1.close() f2.close()
# model = CoronaVirusPredictor(n_features=1,n_hidden=512,seq_len=seq_len,n_layers=2) # model,train_hist,_ = train_model(model,X_train,y_train,num_epochs=120) # DAYS_TO_PREDICT = 12 # predicted_cases,_ = predict_daily_cases(model,X_train,y_train,DAYS_TO_PREDICT,seq_len,scaler) # predicted_cases = pd.Series(data=predicted_cases, # index=pd.date_range(start=diff_daily_cases.index[-1], # periods=DAYS_TO_PREDICT + 1, # closed='right')) # # plot_data(predicted_cases,'Predictions',label='Predicted Daily Cases') # plot_real_predicted(diff_daily_cases,predicted_cases) if __name__ == '__main__': setup_params() diff_daily_cases = prepare_data('time_series_19-covid-Confirmed.csv') train_data, test_data = split_data(diff_daily_cases, 20) train_data, test_data, scaler = scale_data(diff_daily_cases, train_data, test_data) seq_len = 5 X_train, y_train = create_sequences(train_data, seq_len) X_test, y_test = create_sequences(test_data, seq_len) model = CoronaVirusPredictor(n_features=1, n_hidden=512, seq_len=seq_len, n_layers=2) model, train_hist, test_hist = train_model(model, X_train, y_train, X_test, y_test)
def collate_fn(self, batch): r""" Perform preprocessing and create a final data batch: 1. Sort batch instances by text-length 2. Convert Audio signal to Spectrograms. 3. PAD sequences wrt r. 4. Load to Torch. """ # Puts each data field into a tensor with outer dimension batch size if isinstance(batch[0], collections.Mapping): text_lenghts = np.array([len(d["text"]) for d in batch]) # sort items with text input length for RNN efficiency text_lenghts, ids_sorted_decreasing = torch.sort( torch.LongTensor(text_lenghts), dim=0, descending=True) wav = [batch[idx]['wav'] for idx in ids_sorted_decreasing] item_idxs = [ batch[idx]['item_idx'] for idx in ids_sorted_decreasing ] text = [batch[idx]['text'] for idx in ids_sorted_decreasing] speaker_name = [ batch[idx]['speaker_name'] for idx in ids_sorted_decreasing ] # compute features mel = [self.ap.melspectrogram(w).astype('float32') for w in wav] linear = [self.ap.spectrogram(w).astype('float32') for w in wav] mel_lengths = [m.shape[1] for m in mel] # compute 'stop token' targets stop_targets = [ np.array([0.] * (mel_len - 1) + [1.]) for mel_len in mel_lengths ] # PAD stop targets stop_targets = prepare_stop_target(stop_targets, self.outputs_per_step) # PAD sequences with longest instance in the batch text = prepare_data(text).astype(np.int32) wav = prepare_data(wav) # PAD features with longest instance linear = prepare_tensor(linear, self.outputs_per_step) mel = prepare_tensor(mel, self.outputs_per_step) assert mel.shape[2] == linear.shape[2] # B x D x T --> B x T x D linear = linear.transpose(0, 2, 1) mel = mel.transpose(0, 2, 1) # convert things to pytorch text_lenghts = torch.LongTensor(text_lenghts) text = torch.LongTensor(text) linear = torch.FloatTensor(linear).contiguous() mel = torch.FloatTensor(mel).contiguous() mel_lengths = torch.LongTensor(mel_lengths) stop_targets = torch.FloatTensor(stop_targets) return text, text_lenghts, speaker_name, linear, mel, mel_lengths, \ stop_targets, item_idxs raise TypeError(("batch must contain tensors, numbers, dicts or lists;\ found {}".format(type(batch[0]))))
def main(params): if params.input == 'PTB': # data in form [data, labels] train_data_raw, train_label_raw = data_.ptb_read('./DATA/train_untrans_6k/') word_data, encoder_word_data,word_labels_arr, word_embed_arr, word_data_dict = data_.prepare_data(train_data_raw,train_label_raw, params,'./DATA/train_untrans_6k/') train_label_raw, valid_label_raw, test_label_raw = label_data_.ptb_read('./DATA/train_untrans_6k/') label_data, label_labels_arr, label_embed_arr, label_data_dict = label_data_.prepare_data(train_label_raw, params) with tf.Graph().as_default() as graph: label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="lable_inputs") word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="word_inputs") d_word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="d_word_inputs") d_label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="d_label_inputs") d_word_labels = tf.placeholder(shape=[None, None], dtype=tf.int32,name="d_word_labels") d_label_labels = tf.placeholder(shape=[None, None], dtype=tf.int32,name="d_label_labels") with tf.device("/cpu:0"): if not params.pre_trained_embed: word_embedding = tf.get_variable( "word_embedding", [data_dict.vocab_size, params.embed_size], dtype=tf.float64) vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs) else: # [data_dict.vocab_size, params.embed_size] word_embedding = tf.Variable( word_embed_arr, trainable=params.fine_tune_embed, name="word_embedding", dtype=tf.float64) #creates a variable that can be used as a tensor vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs,name="word_lookup") label_embedding = tf.Variable( label_embed_arr, trainable=params.fine_tune_embed, name="label_embedding", dtype=tf.float64) #creates a variable that can be used as a tensor label_inputs_1=tf.nn.embedding_lookup(label_embedding, label_inputs,name="label_lookup") # inputs = tf.unstack(inputs, num=num_steps, axis=1) sizes=word_data_dict.sizes word_vocab_size = max(sizes[1],sizes[2],sizes[0]) label_vocab_size=label_data_dict.vocab_size seq_length = tf.placeholder_with_default([0.0], shape=[None]) d_seq_length = tf.placeholder(shape=[None], dtype=tf.float64) # qz = q_net(word_inputs, seq_length, params.batch_size) #Encoder and Decoder model Zsent_distribution, zsent_sample, Zglobal_distribition, zglobal_sample=encoder(vect_inputs, label_inputs_1, seq_length, params.batch_size) word_logits,label_logits,Zsent_dec_distribution, Zglobal_dec_distribution,_,_,_=decoder(zglobal_sample, d_word_inputs, d_label_inputs,seq_length,params.batch_size,label_embedding,word_embedding, word_vocab_size, label_vocab_size) # neg_kld_zsent = -1 * tf.reduce_mean(tf.reduce_sum(kld(Zsent_distribution[0], Zsent_distribution[1], Zsent_dec_distribution[0], Zsent_dec_distribution[1]), axis=1)) # neg_kld_zglobal = -1 * tf.reduce_mean(tf.reduce_sum(kld(Zglobal_distribition[0], Zglobal_distribition[1], Zglobal_dec_distribution[0], Zglobal_dec_distribution[1]), axis=1)) ###KLD MODIFIED neg_kld_zsent = -1 * kld_mod(Zsent_distribution[0], Zsent_distribution[1], Zsent_dec_distribution[0], Zsent_dec_distribution[1]) neg_kld_zglobal = -1 * kld_mod(Zglobal_distribition[0], Zglobal_distribition[1], Zglobal_dec_distribution[0], Zglobal_dec_distribution[1]) ##MAXIMUM LIKELIHOOD LOSS WORDS # w_probs = tf.nn.softmax(word_logits) # d_word_labels_flat = tf.reshape(d_word_labels, [-1]) # w_mask_labels = tf.sign(tf.cast(d_word_labels_flat,dtype=tf.float64)) # w_probs_flat=tf.reshape(w_probs, [-1]) # w_index=tf.range(tf.shape(d_word_labels_flat)[0])*tf.shape(w_probs)[1]+d_word_labels_flat # w_index_probs=tf.gather(w_probs_flat,w_index) # w_log = -tf.log(w_index_probs+1e-8) # w_masked_cost = w_log * w_mask_labels # w_cost_1=tf.reshape(w_masked_cost,tf.shape(word_inputs)) # w_cost = tf.reduce_sum(w_cost_1,axis=1)/(tf.cast(tf.shape(d_seq_length),dtype=tf.float64)) ##MAXIMUM LIKELIHOOD LOSS LABELS # l_probs = tf.nn.softmax(label_logits) # d_label_labels_flat = tf.reshape(d_label_labels, [-1]) # l_mask_labels = tf.sign(tf.cast(d_label_labels_flat,dtype=tf.float64)) # l_probs_flat=tf.reshape(l_probs, [-1]) # l_index=tf.range(tf.shape(d_label_labels_flat)[0])*tf.shape(l_probs)[1]+d_label_labels_flat # l_index_probs=tf.gather(l_probs_flat,l_index) # l_log = -tf.log(l_index_probs+1e-8) # l_masked_cost = l_log * l_mask_labels # l_cost_1=tf.reshape(l_masked_cost,tf.shape(label_inputs)) # l_cost = tf.reduce_sum(l_cost_1,axis=1)/(tf.cast(tf.shape(d_seq_length),dtype=tf.float64)) # x=1/(tf.cast(tf.shape(d_seq_length),dtype=tf.float64)) #######label reconstruction loss d_label_labels_flat = tf.reshape(d_label_labels, [-1]) l_cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=label_logits, labels=d_label_labels_flat) l_mask_labels = tf.sign(tf.cast(d_label_labels_flat,dtype=tf.float64)) l_masked_losses = l_mask_labels * l_cross_entr # reshape again l_masked_losses = tf.reshape(l_masked_losses, tf.shape(d_label_labels)) l_mean_loss_by_example = tf.reduce_sum(l_masked_losses,reduction_indices=1) / d_seq_length label_rec_loss = tf.reduce_mean(l_mean_loss_by_example) label_perplexity = tf.exp(label_rec_loss) ######Word reconstruction loss # print(word_logits.shape) d_word_labels_flat = tf.reshape(d_word_labels, [-1]) print(d_word_labels_flat.shape) w_cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=word_logits, labels=d_word_labels_flat) w_mask_labels = tf.sign(tf.cast(d_word_labels_flat,dtype=tf.float64)) w_masked_losses_1 = w_mask_labels * w_cross_entr w_masked_losses = tf.reshape(w_masked_losses_1, tf.shape(d_word_labels)) w_mean_loss_by_example = tf.reduce_sum(w_masked_losses,reduction_indices=1) / d_seq_length word_rec_loss = tf.reduce_mean(w_mean_loss_by_example) word_perplexity = tf.exp(word_rec_loss) #using maximum likelihood # total_lower_bound=-1*(w_cost+l_cost+neg_kld_zglobal+neg_kld_zsent) #using reconstruction loss # total_lower_bound=word_rec_loss+label_rec_loss-neg_kld_zglobal-neg_kld_zsent rec_loss=word_rec_loss+label_rec_loss kld_loss= -1*(neg_kld_zglobal + neg_kld_zsent) anneal = tf.placeholder(tf.float64) annealing=tf.to_float(anneal) # annealing = (tf.tanh((tf.to_float(anneal) - 3500)/1000) + 1)/2 # overall loss reconstruction loss - kl_regularization kl_term_weight=tf.multiply(tf.cast(annealing,dtype=tf.float64), tf.cast(kld_loss,dtype=tf.float64)) total_lower_bound = rec_loss + kl_term_weight #lower_bound = rec_loss # sm2 = [tf.summary.scalar('lower_bound', lower_bound), # tf.summary.scalar('kld_coeff', annealing)] # gradients = tf.gradients(lower_bound, tf.trainable_variables()) # opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate) # clipped_grad, _ = tf.clip_by_global_norm(gradients, 5) # optimize = opt.apply_gradients(zip(clipped_grad, # tf.trainable_variables())) # #sample gradients = tf.gradients(total_lower_bound, tf.trainable_variables()) opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate) clipped_grad, _ = tf.clip_by_global_norm(gradients, 5) optimize = opt.apply_gradients(zip(clipped_grad, tf.trainable_variables())) saver = tf.train.Saver(max_to_keep=10) with tf.Session() as sess: print("*********") sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) #Uncomment and specify the path to model to restore it # try: # path="./models_ckpts_"+params.name+"/vae_lstm_model-11900" # # print(path) # # chkp.print_tensors_in_checkpoint_file(path, tensor_name='', all_tensors=True) # saver.restore(sess,path ) # # saver.restore(sess, "./models_ckpts_1/vae_lstm_model-258600") # except: # print("-----exception occurred--------") # exit() # # traceback.print_exc() # print("Model Restored") #total number of parameters total_parameters = 0 #print_vars("trainable variables") for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() print(shape, variable.name) #print(len(shape)) variable_parameters = 1 for dim in shape: print(dim) variable_parameters *= dim.value print(variable_parameters, total_parameters) total_parameters += variable_parameters print(total_parameters) # exit() if params.debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess) summary_writer = tf.summary.FileWriter(params.LOG_DIR, sess.graph) summary_writer.add_graph(sess.graph) #ptb_data = PTBInput(params.batch_size, train_data) num_iters = len(word_data) // params.batch_size cur_it = 0 iters, tlb_arr, wppl_arr,klw_arr = [], [], [],[] print("Number of iterations: "+str(num_iters)) #training the model for e in range(params.num_epochs): total_tlb=0 total_wppl=0 total_klw=0 for it in range(num_iters): params.is_training = True sent_batch = word_data[it * params.batch_size: (it + 1) * params.batch_size] label_batch= label_data[it * params.batch_size: (it + 1) * params.batch_size] sent_dec_l_batch = word_labels_arr[it * params.batch_size:(it + 1) * params.batch_size] sent_l_batch = encoder_word_data[it * params.batch_size:(it + 1) * params.batch_size] label_l_batch = label_labels_arr[it * params.batch_size:(it + 1) * params.batch_size] # zero padding pad = len(max(sent_batch, key=len)) # not optimal!! length_ = np.array([len(sent) for sent in sent_batch]).reshape(params.batch_size) # prepare encoder and decoder inputs to feed sent_batch = np.array([sent + [0] * (pad - len(sent)) for sent in sent_batch]) label_batch = np.array([sent + [0] * (pad - len(sent)) for sent in label_batch]) sent_dec_l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in sent_dec_l_batch]) sent_l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in sent_l_batch]) label_l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in label_l_batch]) feed = {word_inputs: sent_l_batch,label_inputs:label_l_batch ,d_word_inputs: sent_batch, d_label_inputs:label_batch, d_word_labels: sent_dec_l_batch, d_label_labels:label_l_batch, seq_length: length_, d_seq_length: length_,anneal: params.anneal_value} # a,b=sess.run([w_masked_losses,w_mean_loss_by_example ],feed_dict=feed) # z1a,z1b,z2a,z2b,z3a,z3b,kzg,kzs,tlb,wppl,lppl, klw,o=sess.run([Zsent_distribution[0],Zsent_distribution[1],Zglobal_distribition[0],Zglobal_distribition[1],Zglobal_dec_distribution[0],Zglobal_dec_distribution[1] ,neg_kld_zglobal,neg_kld_zsent,total_lower_bound,word_perplexity, label_perplexity,kl_term_weight, optimize],feed_dict=feed) # print("============") z1a,z1b,z3a,z3b,kzg,kzs,tlb,wppl,lppl, klw,o=sess.run([Zsent_distribution[0],Zsent_distribution[1],Zsent_dec_distribution[0],Zsent_dec_distribution[1],neg_kld_zglobal,neg_kld_zsent,total_lower_bound,word_perplexity, label_perplexity,kl_term_weight, optimize],feed_dict=feed) # print(c.shape) # print(d.shape) # print(c,d) # print(e,f) # print(d[69],d[119]) # print('zsent',z1a,z1b) # # print('zglobal',z2a,z2b) # print('zsent dec',z3a,z3b) if cur_it % 100 == 0 and cur_it != 0: print("TotalLB after {} ({}) iterations (epoch): {} Neg_KLD_Zglobal: " "{} Neg_KLD_Zsent: {}".format( cur_it, e,tlb, kzg, kzs)) print("Word Perplexity: {}, Label Perplexity: {}".format(wppl,lppl)) cur_it += 1 # iters.append(cur_it) # tlb_arr.append(tlb) # wppl_arr.append(wppl) total_tlb+=tlb total_wppl+=wppl total_klw+=klw if cur_it % 100 == 0 and cur_it!=0: path_to_save=os.path.join(params.MODEL_DIR, "vae_lstm_model") #print(path_to_save) model_path_name=saver.save(sess, path_to_save,global_step=cur_it) # print(model_path_name) avg_tlb=total_tlb/num_iters avg_wppl=total_wppl/num_iters avg_klw=total_klw/num_iters print("----------> After epoch {}: TLB:{}, Word PPL: {}, KLD: {}\n".format( e,avg_tlb, avg_wppl, avg_klw)) print("Word Perplexity: {}, Label Perplexity: {}".format(wppl,lppl)) iters.append(e) tlb_arr.append(avg_tlb) wppl_arr.append(avg_wppl) klw_arr.append(avg_klw) #Save the values and plot the graph import matplotlib.pyplot as plt plot_filename="./plot_values_"+str(params.anneal_value)+".txt" with open(plot_filename, 'w') as wf: _ = [wf.write(str(s) + ' ')for s in iters] wf.write('\n') _ = [wf.write(str(s) + ' ')for s in tlb_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in wppl_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in klw_arr] plt.subplot(3, 1, 1,title="Total Lower Bound vs Epochs") plt.plot(iters, tlb_arr,color='blue', label='Total lower bound') plt.xlabel('Epochs') # plt.title('Lower bound and Word ppl vs iterations') plt.ylabel('Total Lower Bound') plt.subplot(3, 1, 2,title="Word Perplexity vs Epochs") plt.plot(iters, wppl_arr,color='red', label='Word Perplexity') plt.xlabel('Epochs') plt.ylabel('Word Perplexity') # plt.legend(bbox_to_anchor=(1.05, 1), # loc=1, borderaxespad=0.) plt.subplot(3, 1, 3,title="KL Term Value vs Epochs") plt.plot(iters, klw_arr,color='green', label='KL term Value') plt.xlabel('Epochs') plt.ylabel('KL term Value') figure_name='./graph_'+str(params.anneal_value)+'.png' plt.savefig(figure_name) # plt.plot(iters, coeff, 'r--', label='annealing')