def predict(self, checkpoint, x_test, y_test, true_test): pred_logits = [] hypotheses_test = [] references_test = [] symbol=[] if self.config['experiment'] == 'qgen': symbol.append('?') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, checkpoint) for batch_i, (input_batch, output_batch, source_sent_lengths, tar_sent_lengths) in enumerate( data_utils.get_batches(x_test, y_test, self.batch_size)): result = sess.run(self.inference_logits, feed_dict={self.input_data: input_batch, self.source_sentence_length: source_sent_lengths, self.keep_prob: 1.0}) pred_logits.extend(result) for k, pred in enumerate(result): hypotheses_test.append( word_tokenize(" ".join( [self.decoder_idx_word[i] for i in pred if i not in [self.pad, -1, self.eos]])) + symbol) references_test.append([word_tokenize(true_test[batch_i * self.batch_size + k])]) bleu_scores = eval_utils.calculate_bleu_scores(references_test, hypotheses_test) print('BLEU 1 to 4 : {}'.format(' | '.join(map(str, bleu_scores)))) return pred_logits
def validate(self, sess, x_val, y_val, true_val): # Calculate BLEU on validation data hypotheses_val = [] references_val = [] symbol = [] if self.config['experiment'] == 'qgen': symbol.append('?') for batch_i, (input_batch, output_batch, source_sent_lengths, tar_sent_lengths) in enumerate( data_utils.get_batches(x_val, y_val, self.batch_size)): answer_logits = sess.run(self.inference_logits, feed_dict={ self.input_data: input_batch, self.source_sentence_length: source_sent_lengths, self.keep_prob: 1.0 }) for k, pred in enumerate(answer_logits): hypotheses_val.append( word_tokenize(" ".join([ self.decoder_idx_word[i] for i in pred if i not in [self.pad, -1, self.eos] ])) + symbol) references_val.append( [word_tokenize(true_val[batch_i * self.batch_size + k])]) bleu_scores = eval_utils.calculate_bleu_scores(references_val, hypotheses_val) self.epoch_bleu_score_val['1'].append(bleu_scores[0]) self.epoch_bleu_score_val['2'].append(bleu_scores[1]) self.epoch_bleu_score_val['3'].append(bleu_scores[2]) self.epoch_bleu_score_val['4'].append(bleu_scores[3])
def get_diversity_metrics(self, checkpoint, x_test, y_test, num_samples=10, num_iterations = 3): x_test_repeated = np.repeat(x_test, num_samples, axis=0) y_test_repeated = np.repeat(y_test, num_samples, axis=0) entropy_list =[] uni_diversity = [] bi_diversity = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, checkpoint) for _ in tqdm(range(num_iterations)): total_ent = 0 uni = 0 bi = 0 answer_logits = [] pred_sentences = [] for batch_i, (input_batch, output_batch, source_sent_lengths, tar_sent_lengths) in enumerate( data_utils.get_batches(x_test_repeated, y_test_repeated, self.batch_size)): result = sess.run(self.inference_logits, feed_dict={self.input_data: input_batch, self.source_sentence_length: source_sent_lengths, self.keep_prob: 1.0, self.word_dropout_keep_prob: 1.0, self.z_temperature: self.z_temp, self.attention_temperature: self.attention_temp}) answer_logits.extend(result) for idx, (actual, pred) in enumerate(zip(y_test_repeated, answer_logits)): pred_sentences.append(" ".join([self.decoder_idx_word[i] for i in pred if i != self.pad][:-1])) if (idx + 1) % num_samples == 0: word_list = [word_tokenize(p) for p in pred_sentences] corpus = [item for sublist in word_list for item in sublist] total_ent += eval_utils.calculate_entropy(corpus) diversity_result = eval_utils.calculate_ngram_diversity(corpus) uni += diversity_result[0] bi += diversity_result[1] pred_sentences = [] entropy_list.append(total_ent / len(x_test)) uni_diversity.append(uni / len(x_test)) bi_diversity.append(bi / len(x_test)) print('Entropy = {:>.3f} | Distinct-1 = {:>.3f} | Distinct-2 = {:>.3f}'.format(np.mean(entropy_list), np.mean(uni_diversity), np.mean(bi_diversity)))
def train(self, x_train, y_train, x_val, y_val, true_val): print('[INFO] Training process started') learning_rate = self.initial_learning_rate iter_i = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(self.logs_dir, sess.graph) for epoch_i in range(1, self.epochs + 1): start_time = time.time() for batch_i, (input_batch, output_batch, source_sent_lengths, tar_sent_lengths) in enumerate( data_utils.get_batches( x_train, y_train, self.batch_size)): try: iter_i += 1 _, _summary = sess.run( [self.train_op, self.summary_op], feed_dict={ self.input_data: input_batch, self.target_data: output_batch, self.lr: learning_rate, self.source_sentence_length: source_sent_lengths, self.target_sentence_length: tar_sent_lengths, self.keep_prob: self.dropout_keep_prob, }) writer.add_summary(_summary, iter_i) except Exception as e: # print(iter_i, e) pass self.validate(sess, x_val, y_val, true_val) val_bleu_str = str(self.epoch_bleu_score_val['1'][epoch_i - 1]) + ' | ' \ + str(self.epoch_bleu_score_val['2'][epoch_i - 1]) + ' | ' \ + str(self.epoch_bleu_score_val['3'][epoch_i - 1]) + ' | ' \ + str(self.epoch_bleu_score_val['4'][epoch_i - 1]) # Reduce learning rate, but not below its minimum value learning_rate = np.max([ self.min_learning_rate, learning_rate * self.learning_rate_decay ]) saver = tf.train.Saver() saver.save(sess, self.model_checkpoint_dir + str(epoch_i) + ".ckpt") end_time = time.time() # Save the validation BLEU scores so far with open(self.bleu_path + '.pkl', 'wb') as f: pickle.dump(self.epoch_bleu_score_val, f) self.log_str.append( 'Epoch {:>3}/{} - Time {:>6.1f} BLEU: {}'.format( epoch_i, self.epochs, end_time - start_time, val_bleu_str)) with open('logs.txt', 'w') as f: f.write('\n'.join(self.log_str)) print(self.log_str[-1])
def predict(self, checkpoint, x_test, y_test, true_val): pred_logits = [] hypotheses_val = [] references_val = [] symbol=[] if self.config['experiment'] == 'qgen': symbol.append('?') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() tf.set_random_seed(1) saver.restore(sess, checkpoint) # for batch_i, (input_batch, output_batch, source_sent_lengths, tar_sent_lengths) in enumerate( # data_utils.get_batches(x_test, y_test, self.batch_size)): # result = sess.run(self.inference_logits, feed_dict={self.input_data: input_batch, # # self.source_sentence_length: source_sent_lengths, # self.keep_prob: 1.0, # self.word_dropout_keep_prob: 1.0, # self.z_temperature: self.z_temp, # self.attention_temperature: self.attention_temp}) # pred_logits.extend(result) # for k, pred in enumerate(result): # hypotheses_test.append( # word_tokenize(" ".join( # [self.decoder_idx_word[i] for i in pred if i not in [self.pad, -1, self.eos]])) + symbol) # references_test.append([word_tokenize(true_test[batch_i * self.batch_size + k])]) # bleu_scores = eval_utils.calculate_bleu_scores(references_test, hypotheses_test) for batch_i, (input_batch, output_batch, source_sent_lengths, tar_sent_lengths,true_val_tmp) in enumerate( data_utils.get_batches(x_test, y_test, self.batch_size,true_val)): output_batch = np.reshape(np.array(output_batch),(-1,1)) #print('val') #print(input_batch.shape, output_batch.shape, source_sent_lengths, tar_sent_lengths) answer_logits = sess.run(self.inference_logits, feed_dict={self.input_data: input_batch, #self.target_data:output_batch, #self.target_sentence_length: [1 for x in range(self.batch_size)], #self.source_sentence_length: [1 for x in range(self.batch_size)], #source_sent_lengths self.keep_prob: 1.0, self.word_dropout_keep_prob: 1.0, self.z_temperature: self.z_temp, self.attention_temperature: self.attention_temp}) #answer_logits = softmax(answer_logits) #np.concatenate(answer_logits).astype(np.float32) #print('answer_logits', min(answer_logits), max(answer_logits)) hypotheses_val.append(answer_logits) references_val.append(output_batch) # for k, pred in enumerate(answer_logits): # hypotheses_val.append( # word_tokenize( # " ".join([self.decoder_idx_word[i] for i in pred if i not in [self.pad, -1, self.eos]])) + symbol) # references_val.append([word_tokenize(true_val[batch_i * self.batch_size + k])]) #print(max(answer_logits)) #print(min(answer_logits)) #bleu_scores = eval_utils.calculate_bleu_scores(references_val, hypotheses_val) #lin = np.linspace(0,1,11) references_val = np.array(references_val).reshape(1,-1)[0] hypotheses_val = np.array(hypotheses_val).reshape(1,-1)[0] true_val = np.array(true_val).reshape(1,-1)[0] true_val =true_val[:len(hypotheses_val)] # print('references_val',references_val) # print('hypotheses_val',hypotheses_val) # print('true', true_val) print("references_val",max(references_val),min(references_val)) print("hypotheses_val",max(hypotheses_val),min(hypotheses_val)) pd.DataFrame({'hyp':hypotheses_val,'ref':references_val,'real':true_val}).to_csv('../data/hypotheses_val.csv') #from statsmodels.distributions.empirical_distribution import ECDF #ecdf = ECDF(hypotheses_val) #hypotheses_val = ecdf(hypotheses_val)#np.where(ecdf(hypotheses_val)>= 0.5,1,0) # ecdf_real = ECDF(true_val) # true_val = ecdf(true_val) # from collections import Counter from sklearn.metrics import accuracy_score, mean_squared_error print('mean_squared_error', mean_squared_error(hypotheses_val,references_val)) print('corr', np.corrcoef(references_val,hypotheses_val)[0,1]) print(accuracy_score(np.where(references_val>0.5,1,0), np.where(hypotheses_val>0.5,1,0))) import matplotlib.pyplot as plt preds = np.where(hypotheses_val < 0.5,-1,np.where(hypotheses_val > 0.5,1,0)) #preds = np.where(hypotheses_val>0.5,1,-1) plt.plot(np.cumsum(preds[preds != 0] * references_val[preds != 0])) plt.show() #self.epoch_bleu_score_val['1'].append(bleu_scores) # self.epoch_bleu_score_val['2'].append(bleu_scores[1]) # self.epoch_bleu_score_val['3'].append(bleu_scores[2]) # self.epoch_bleu_score_val['4'].append(bleu_scores[3]) #print('BLEU 1 to 4 : {}'.format(' | '.join(map(str, bleu_scores)))) return hypotheses_val
def validate(self, sess, x_val, y_val, true_val): # Calculate BLEU on validation data hypotheses_val = [] references_val = [] symbol=[] if self.config['experiment'] == 'qgen': symbol.append('?') def softmax(x): """Compute softmax values for each sets of scores in x.""" return np.exp(x) / np.sum(np.exp(x), axis=0) for batch_i, (input_batch, output_batch, source_sent_lengths, tar_sent_lengths,true_val_tmp) in enumerate( data_utils.get_batches(x_val, y_val, self.batch_size,true_val)): output_batch = np.reshape(np.array(output_batch),(-1,1)) #print('val') #print(input_batch.shape, output_batch.shape, source_sent_lengths, tar_sent_lengths) answer_logits = sess.run(self.inference_logits, feed_dict={self.input_data: input_batch, #self.target_data:output_batch, #self.target_sentence_length: [1 for x in range(self.batch_size)], #self.source_sentence_length: [1 for x in range(self.batch_size)], #source_sent_lengths self.keep_prob: 1.0, self.word_dropout_keep_prob: 1.0, self.z_temperature: self.z_temp, self.attention_temperature: self.attention_temp}) #answer_logits = softmax(answer_logits) #np.concatenate(answer_logits).astype(np.float32) #print('answer_logits', min(answer_logits), max(answer_logits)) hypotheses_val.append(answer_logits) references_val.append(output_batch) # for k, pred in enumerate(answer_logits): # hypotheses_val.append( # word_tokenize( # " ".join([self.decoder_idx_word[i] for i in pred if i not in [self.pad, -1, self.eos]])) + symbol) # references_val.append([word_tokenize(true_val[batch_i * self.batch_size + k])]) #print(max(answer_logits)) #print(min(answer_logits)) #bleu_scores = eval_utils.calculate_bleu_scores(references_val, hypotheses_val) #lin = np.linspace(0,1,11) references_val = np.array(references_val).reshape(1,-1)[0] hypotheses_val = np.array(hypotheses_val).reshape(1,-1)[0] true_val = np.array(true_val).reshape(1,-1)[0] true_val =true_val[:len(hypotheses_val)] # print('references_val',references_val) # print('hypotheses_val',hypotheses_val) # print('true', true_val) print("references_val",max(references_val),min(references_val)) print("hypotheses_val",max(hypotheses_val),min(hypotheses_val)) pd.DataFrame({'hyp':hypotheses_val,'ref':references_val,'real':true_val}).to_csv('../data/hypotheses_val.csv') from statsmodels.distributions.empirical_distribution import ECDF ecdf = ECDF(hypotheses_val) hypotheses_val = ecdf(hypotheses_val)#np.where(ecdf(hypotheses_val)>= 0.5,1,0) # ecdf_real = ECDF(true_val) # true_val = ecdf(true_val) # from collections import Counter from sklearn.metrics import accuracy_score, mean_squared_error print('mean_squared_error', mean_squared_error(hypotheses_val,references_val)) print('corr', np.corrcoef(references_val,hypotheses_val)[0,1]) bleu_scores = accuracy_score(np.where(references_val>0.,1,0), np.where(hypotheses_val>0.5,1,0)) self.epoch_bleu_score_val['1'].append(np.corrcoef(references_val,hypotheses_val)[0,1])
def train(self, x_train, y_train, x_val, y_val, true_val): print('[INFO] Training process started') learning_rate = self.initial_learning_rate iter_i = 0 lambda_val = 0.0 with tf.Session() as sess: #sess.run(tf.global_variables_initializer()) sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) writer = tf.summary.FileWriter(self.logs_dir, sess.graph) for epoch_i in range(1, self.epochs + 1): start_time = time.time() for batch_i, (input_batch, output_batch, source_sent_lengths, tar_sent_lengths) in enumerate( data_utils.get_batches(x_train, y_train, self.batch_size)): output_batch = np.reshape(np.array(output_batch),(-1,1)) #print(input_batch.shape, output_batch.shape, source_sent_lengths, tar_sent_lengths) try: iter_i += 1 #print('yo') _, _summary= sess.run( [self.train_op, self.summary_op], feed_dict={self.input_data: input_batch, self.target_data: output_batch, self.lr: learning_rate, #self.source_sentence_length: [self.encoder_embeddings_matrix.shape[1] for x in range(self.batch_size)], self.target_sentence_length: [1 for x in range(self.batch_size)], self.keep_prob: self.dropout_keep_prob, self.lambda_coeff: lambda_val, self.z_temperature: self.z_temp, self.word_dropout_keep_prob: self.word_dropout_keep_probability, self.attention_temperature: self.attention_temp, self.gamma_coeff: self.gamma_val }) #print('loss',loss) writer.add_summary(_summary, iter_i) # KL Annealing till some iteration if iter_i <= 3000: lambda_val = np.round((np.tanh((iter_i - 4500) / 1000) + 1) / 2, decimals=6) except Exception as e: print(e) # print(iter_i, e) pass self.validate(sess, x_val, y_val, true_val) # BLEU = MSE val_bleu_str = str(self.epoch_bleu_score_val['1'][epoch_i - 1])# + ' | ' \ # + str(self.epoch_bleu_score_val['2'][epoch_i - 1]) + ' | ' \ # + str(self.epoch_bleu_score_val['3'][epoch_i - 1]) + ' | ' \ # + str(self.epoch_bleu_score_val['4'][epoch_i - 1]) print('val_bleu_str',val_bleu_str) # Reduce learning rate, but not below its minimum value learning_rate = np.max([self.min_learning_rate, learning_rate * self.learning_rate_decay]) saver = tf.train.Saver() saver.save(sess, self.model_checkpoint_dir + str(epoch_i) + ".ckpt") end_time = time.time() # Save the validation BLEU scores so far # with open(self.bleu_path + '.pkl', 'wb') as f: # pickle.dump(self.epoch_bleu_score_val, f) self.log_str.append('Epoch {:>3}/{} - Time {:>6.1f} BLEU: {}'.format(epoch_i, self.epochs, end_time - start_time, val_bleu_str)) with open('logs.txt', 'w') as f: f.write('\n'.join(self.log_str)) print(self.log_str[-1])