def train(self): trainS, trainQ, trainA = vectorize_data(self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valS, valQ, valA = vectorize_data(self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainS) n_val = len(valS) print("Training Size", n_train) print("Validation Size", n_val) tf.set_random_seed(self.random_state) batches = zip(range(0, n_train - self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches = [(start, end) for start, end in batches] best_validation_accuracy = 0 for t in range(1, self.epochs + 1): np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] cost_t = self.model.batch_fit(s, q, a) total_cost += cost_t if t % self.evaluation_interval == 0: train_preds = self.batch_predict(trainS, trainQ, n_train) val_preds = self.batch_predict(valS, valQ, n_val) train_acc = metrics.accuracy_score(np.array(train_preds), trainA) val_acc = metrics.accuracy_score(val_preds, valA) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('-----------------------') # write summary train_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) val_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) merged_summary = tf.summary.merge( [train_acc_summary, val_acc_summary]) summary_str = self.sess.run(merged_summary) self.summary_writer.add_summary(summary_str, t) self.summary_writer.flush() if val_acc > best_validation_accuracy: best_validation_accuracy = val_acc self.saver.save(self.sess, self.model_dir + 'model.ckpt', global_step=t)
def train(self): trainS, trainQ, trainA = vectorize_data( self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valS, valQ, valA = vectorize_data( self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainS) n_val = len(valS) print("Training Size", n_train) print("Validation Size", n_val) tf.set_random_seed(self.random_state) batches = zip(range(0, n_train - self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches = [(start, end) for start, end in batches] best_validation_accuracy = 0 for t in range(1, self.epochs + 1): np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] cost_t = self.model.batch_fit(s, q, a) total_cost += cost_t if t % self.evaluation_interval == 0: train_preds = self.batch_predict(trainS, trainQ, n_train) val_preds = self.batch_predict(valS, valQ, n_val) train_acc = metrics.accuracy_score( np.array(train_preds), trainA) val_acc = metrics.accuracy_score(val_preds, valA) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('-----------------------') # write summary train_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) val_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) merged_summary = tf.summary.merge( [train_acc_summary, val_acc_summary]) summary_str = self.sess.run(merged_summary) self.summary_writer.add_summary(summary_str, t) self.summary_writer.flush() if val_acc > best_validation_accuracy: best_validation_accuracy = val_acc self.saver.save(self.sess, self.model_dir + 'model.ckpt', global_step=t)
def load_data(self): # single babi task # TODO: refactor all this running elsewhere # task data train, test = load_task(data_dir, task_id) vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in train + test))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) self.memory_size = 50 self.max_story_size = max(map(len, (s for s, _, _ in train + test))) self.mean_story_size = int( np.mean(map(len, (s for s, _, _ in train + test)))) self.sentence_size = max( map(len, chain.from_iterable(s for s, _, _ in train + test))) self.query_size = max(map(len, (q for _, q, _ in train + test))) self.memory_size = min(self.memory_size, self.max_story_size) self.vocab_size = len(word_idx) + 1 # +1 for nil word self.sentence_size = max(self.query_size, self.sentence_size) # for the position print("Longest sentence length", self.sentence_size) print("Longest story length", self.max_story_size) print("Average story length", self.mean_story_size) # train/validation/test sets self.S, self.Q, self.A = vectorize_data(train, word_idx, self.sentence_size, self.memory_size) self.trainS, self.valS, self.trainQ, self.valQ, self.trainA, self.valA = cross_validation.train_test_split( self.S, self.Q, self.A, test_size=.1) # TODO: randomstate self.testS, self.testQ, self.testA = vectorize_data( test, word_idx, self.sentence_size, self.memory_size) print(self.testS[0]) print("Training set shape", self.trainS.shape) # params self.n_train = self.trainS.shape[0] self.n_test = self.testS.shape[0] self.n_val = self.valS.shape[0] print("Training Size", self.n_train) print("Validation Size", self.n_val) print("Testing Size", self.n_test)
def __init__(self, dataset_dir, task_id=1, memory_size=50, train=True): self.train = train self.task_id = task_id self.dataset_dir = dataset_dir train_data, test_data = load_task(self.dataset_dir, task_id) data = train_data + test_data self.vocab = set() for story, query, answer in data: self.vocab = self.vocab | set( list(chain.from_iterable(story)) + query + answer) self.vocab = sorted(self.vocab) word_idx = dict((word, i + 1) for i, word in enumerate(self.vocab)) self.max_story_size = max([len(story) for story, _, _ in data]) self.query_size = max([len(query) for _, query, _ in data]) self.sentence_size = max([len(row) for row in \ chain.from_iterable([story for story, _, _ in data])]) self.memory_size = min(memory_size, self.max_story_size) # Add time words/indexes for i in range(self.memory_size): word_idx["time{}".format(i + 1)] = "time{}".format(i + 1) self.num_vocab = len(word_idx) + 1 # +1 for nil word self.sentence_size = max(self.query_size, self.sentence_size) # for the position self.sentence_size += 1 # +1 for time words self.word_idx = word_idx self.mean_story_size = int(np.mean([len(s) for s, _, _ in data])) if train: story, query, answer = vectorize_data(train_data, self.word_idx, self.sentence_size, self.memory_size) # print 'story',story.shape # print 'query[0]',torch.LongTensor(query)[0].shape # print 'answer',answer.shape else: story, query, answer = vectorize_data(test_data, self.word_idx, self.sentence_size, self.memory_size) self.data_story = torch.LongTensor(story) self.data_query = torch.LongTensor(query) self.data_answer = torch.LongTensor(np.argmax(answer, axis=1))
def converse(self): speak.Speak("I am at your service, Enter your query !!!") lines = list() count = 1 line = str(count) + ' ' + 'male young' while (True): lines.append(line) tokenized_line = parse_dialogs_per_response( lines, self.candid2indx) if (not tokenized_line): print(" Enter your data ") user = input() count += 1 u = str(count) + ' ' + user line = u continue else: story, ques, candid_id = tokenized_line[-1] vec_story, vec_ques, vec_resp = vectorize_data( [(story, ques, candid_id)], self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) prediction_id = self.model.predict(vec_story, vec_ques) print('bot : {}'.format(self.indx2candid[prediction_id[0]])) bot_response = self.indx2candid[prediction_id[0]] speak.Speak(bot_response) print(" Enter your data ") user = input() count += 1 u = str(count) + ' ' + user + '\t' + bot_response line = u
def test(self): """Runs testing on testing set data. Loads best performing model weights based on validation accuracy. """ ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") testS, testQ, testA = vectorize_data(self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) print("Testing Size", n_test) print(type(self.testData)) test_preds = self.batch_predict(testS, testQ, n_test) #for i in range(1, 10000, 1000): '''for pred in test_preds: print(pred, self.indx2candid[pred])''' test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc)
def test(self): ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) # Basically recreate the indices of new words in the same way as train function. If index position different in test compared to train, # the look up table embedding features are different for the word, reducing accuracy else: print("...no checkpoint found...") if self.isInteractive: self.interactive() else: testS, testQ, testA = vectorize_data( self.testData, self.word2vec, self.max_sentence_size, self.batch_size, self.n_cand, self.memory_size, self.vocab, self.ivocab, self.embedding_size, uncertain_word=True, uncertain=self.uncertain_word_index) n_test = len(testS) print("Testing Size", n_test) test_preds = self.batch_predict(testS, testQ, n_test) test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc)
def interactive(self): context = [['male', 'young', '$r', '#0']] # context = [] u = None r = None nid = 1 while True: line = input('--> ').strip().lower() if line == 'exit': break if line == 'restart': context = [['female', 'young', '$r', '#0']] # context = [] nid = 1 print("clear memory") continue u = tokenize(line) data = [(context, u, -1)] s, q, a = vectorize_data(data, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) preds = self.model.predict(s, q) r = self.indx2candid[preds[0]] print(r) r = tokenize(r) u.append('$u') u.append('#' + str(nid)) r.append('$r') r.append('#' + str(nid)) context.append(u) context.append(r) nid += 1
def interactive(self): context = [] u = None r = None nid = 1 while True: line = raw_input('--> ').strip().lower() if line == 'exit': break if line == 'restart': context = [] nid = 1 print("clear memory") continue u = tokenize(line) data = [(context, u, -1)] s, q, a = vectorize_data( data, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) preds = self.model.predict(s, q) r = self.indx2candid[preds[0]] print(r) r = tokenize(r) u.append('$u') u.append('#' + str(nid)) r.append('$r') r.append('#' + str(nid)) context.append(u) context.append(r) nid += 1
def interactive(self): context = [] u = None r = None nid = 1 while True: line = input('--> ').strip().lower() if line == 'exit': break if line == 'restart': context = [] nid = 1 print("clear memory") continue u = tokenize(line) data = [(context, u, -1)] s, q, a = vectorize_data(data, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) s = Variable(torch.from_numpy(np.stack(s))) q = Variable(torch.from_numpy(np.stack(q))) a = Variable(torch.from_numpy(np.stack(a))) preds = list(self.model.predict(s, q).data.numpy().tolist()) r = self.indx2candid[preds[0]] print(r) r = tokenize(r) u.append('$u') u.append('#' + str(nid)) r.append('$r') r.append('#' + str(nid)) context.append(u) context.append(r) nid += 1
def getVecor(index): start = int(index) - 1 end = int(index) sV, Q, _ = vectorize_data(data[start:end], word_idx, sentence_size, memory_size) return sV, Q
def train(self): trainS, trainQ, trainA = vectorize_data(self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valS, valQ, valA = vectorize_data(self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainS) n_val = len(valS) print("Training Size", n_train) print("Validation Size", n_val) tf.set_random_seed(self.random_state) batches = zip(range(0, n_train - self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches = [(start, end) for start, end in batches] best_validation_accuracy = 0 for t in range(1, self.epochs + 1): np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] cost_t = self.model.batch_fit(s, q, a) total_cost += cost_t if t % self.evaluation_interval == 0: train_preds = self.batch_predict(trainS, trainQ, n_train) val_preds = self.batch_predict(valS, valQ, n_val) train_acc = metrics.accuracy_score(np.array(train_preds), trainA) val_acc = metrics.accuracy_score(val_preds, valA) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('-----------------------') if val_acc > best_validation_accuracy: best_validation_accuracy = val_acc self.saver.save(self.sess, self.model_dir + 'model.ckpt', global_step=t)
def test(self,introspect=False): print('begin test...') trainS, trainQ, trainA, trainTag = vectorize_data( self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) self.train_data = [trainS, trainQ, trainA, trainTag] ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") if self.isInteractive: self.interactive() else: testS, testQ, testA,testTag= vectorize_data( self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) print("Testing Size", n_test) test_preds = self.batch_predict(testS, testQ, n_test,testTag,introspect) test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc)
def test_ds(self, dataset_dir): _, testData, _ = load_dialog_task(dataset_dir, self.task_id, self.candid2indx, self.OOV) testS, testQ, testA = vectorize_data(testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) test_preds = self.batch_predict(testS, testQ, n_test) test_acc = metrics.accuracy_score(test_preds, testA) print('{}: {:.2%}'.format(dataset_dir, test_acc))
def test(self): ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") if self.isInteractive: self.interactive() else: testS, testQ, testA = vectorize_data( self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) print("Testing Size", n_test) test_preds = self.batch_predict(testS, testQ, n_test) test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc)
def test(self): load_checkpoit(self.model, self.model.optimizer, self.model_dir + 'best_model') if self.isInteractive: self.interactive() else: testS, testQ, testA = vectorize_data(self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) print("Testing Size", n_test) test_preds = self.batch_predict(testS, testQ, n_test) test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc)
def test_accuracy(self, test_data_dir): """ Compute and return the testing accuracy for the data directory given in argument. It is a more general method than `Chatbot.test` as it can be used on different datasets than the one given at initialisation. :param test_data_dir: Directory's path where to find the testing dataset :return: The accuracy score for the testing file """ _, testData, _ = load_dialog_task(test_data_dir, self.task_id, self.candid2indx, self.OOV) testP, testS, testQ, testA = vectorize_data( testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size, self._profiles_mapping) test_preds = self.model.batch_predict(testP, testS, testQ) test_acc = metrics.accuracy_score(test_preds, testA) return test_acc
def test2(self, dir): #candid_dic, isOOV, data_dir self.testData = myload_dialog_task(self.candid2indx, self.OOV, dir) # print(len(self.testData)) # print(self.testData) testP, testS, testQ, testA = vectorize_data( self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) # for i in range(len(testP)): # print("P",testP[i]) # length and A are correct test_preds = self.batch_predict(testP, testS, testQ, n_test) test_acc = metrics.accuracy_score(test_preds, testA) #print("Testing Accuracy:", test_acc) current = [] for i in range(len(test_preds)): current.append(self.indx2candid[test_preds[i]]) print("Testing Accuracy:", test_acc) return current
def interactive(self): context = [] u = None r = None nid = 1 while True: line = input('--> ').strip().lower() if line == 'exit': break if line == 'restart': context = [] nid = 1 print("clear memory") continue u = tokenize(line) data = [(context, u, -1)] # Need to take care of the candidate sentence size > sentence size. In both main function and here # Whichever of candidate_size or candidate_sentence_size is higher, that should be allowed s, q, a = vectorize_data(data, self.word2vec, self.max_sentence_size, self.batch_size, self.n_cand, self.memory_size, self.vocab, self.ivocab, self.embedding_size, uncertain_word=True, uncertain=self.uncertain_word_index) preds = self.model.predict(s, q) r = self.indx2candid[preds[0]] print(r) r = tokenize(r) u.append('$u') u.append('#' + str(nid)) r.append('$r') r.append('#' + str(nid)) context.append(u) context.append(r) nid += 1
def test(self): """ Load a model from a previous training session and prints the accuracy for the testing dataset. """ ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") if self.isInteractive: self.interactive() else: testP, testS, testQ, testA = vectorize_data( self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size, self._profiles_mapping) n_test = len(testS) print("Testing Size", n_test) test_preds = self.model.batch_predict(testP, testS, testQ) test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc)
def test(self): """Runs testing on testing set data. Loads best performing model weights based on validation accuracy. """ ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") testS, testQ, testA = vectorize_data( self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) print("Testing Size", n_test) print(type(self.testData)) print(" what does the test data look like ") print(self.testData) cindy = input(" Read the test data carefully !! ") print(" what does testS and testQ and testA look like ") print(testS) print(testQ) print(testA) print(" Hey there !!! before batch predict ") test_preds = self.batch_predict(testS, testQ, n_test) print(" Hey There !!! After batch predict") #for i in range(1, 10000, 1000): for pred in test_preds: print(pred, self.indx2candid[pred]) gigi = input("Drill me Baby !!! ") test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc)
def interactive(model, indx2candid, cands_tensor, word_idx, sentence_size, memory_size, cuda=False): context = [] u = None r = None nid = 1 while True: line = input('--> ').strip().lower() if line == 'exit': break if line == 'restart': context = [] nid = 1 print("clear memory") continue u = tokenize(line) data = [(context, u, -1)] s, q, a, entity_dict = vectorize_data(data, word_idx, sentence_size, memory_size) memory = V(torch.from_numpy(np.stack(s))) utter = V(torch.from_numpy(np.stack(q))) if cuda: memory = transfer_to_gpu(memory) utter = transfer_to_gpu(utter) context_, cand_ = model(utter, memory, cands_tensor) preds = model.predict(context_, cand_) r = indx2candid[preds.data[0]] print(r) r = tokenize(r) u.append('$u') u.append('#' + str(nid)) r.append('$r') r.append('#' + str(nid)) context.append(u) context.append(r) nid += 1
def train(self): """Runs the training algorithm over training set data. Performs validation at given evaluation intervals. """ trainP, trainS, trainQ, trainA = vectorize_data( self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valP, valS, valQ, valA = vectorize_data(self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainS) n_val = len(valS) print("Training Size", n_train) print("Validation Size", n_val) tf.set_random_seed(self.random_state) batches = zip(range(0, n_train - self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches = [(start, end) for start, end in batches] best_validation_accuracy = 0 # Training loop # for each in self.indx2candid: # print(self.indx2candid[each]+"\n") for t in range(1, self.epochs + 1): print('Epoch', t) np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: p = trainP[start:end] s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] cost_t = self.model.batch_fit(p, s, q, a) total_cost += cost_t if t % self.evaluation_interval == 0: # Perform validation train_preds = self.batch_predict(trainP, trainS, trainQ, n_train) #print("train_preds is ", train_preds) # for a in train_preds: # print("key",self.indx2candid[a]) print("___________________") val_preds = self.batch_predict(valP, valS, valQ, n_val) #print("val_preds is ", val_preds) # print(dict(self.candid2indx[key] for key in val_preds)) # print("___________________") train_acc = metrics.accuracy_score(np.array(train_preds), trainA) val_acc = metrics.accuracy_score(val_preds, valA) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('-----------------------') # Write summary train_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) val_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) merged_summary = tf.summary.merge( [train_acc_summary, val_acc_summary]) summary_str = self.sess.run(merged_summary) self.summary_writer.add_summary(summary_str, t) self.summary_writer.flush() if val_acc > best_validation_accuracy: best_validation_accuracy = val_acc self.saver.save(self.sess, self.model_dir + 'model.ckpt', global_step=t)
vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position print("Longest sentence length", sentence_size) print("Longest story length", max_story_size) print("Average story length", mean_story_size) # train/validation/test sets trainS = [] valS = [] trainQ = [] valQ = [] trainA = [] valA = [] for task in train: S, Q, A = vectorize_data(task, word_idx, sentence_size, memory_size) ts, vs, tq, vq, ta, va = cross_validation.train_test_split(S, Q, A, test_size=0.1, random_state=FLAGS.random_state) trainS.append(ts) trainQ.append(tq) trainA.append(ta) valS.append(vs) valQ.append(vq) valA.append(va) trainS = reduce(lambda a,b : np.vstack((a,b)), (x for x in trainS)) trainQ = reduce(lambda a,b : np.vstack((a,b)), (x for x in trainQ)) trainA = reduce(lambda a,b : np.vstack((a,b)), (x for x in trainA)) valS = reduce(lambda a,b : np.vstack((a,b)), (x for x in valS)) valQ = reduce(lambda a,b : np.vstack((a,b)), (x for x in valQ)) valA = reduce(lambda a,b : np.vstack((a,b)), (x for x in valA))
def train(self): trainStory, trainQuery, trainSystem, trainAnswer, trainWholeU, trainWholeS, trainResults, _ = vectorize_data( self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valStory, valQuery, valSystem, valAnswer, valWholeU, valWholeS, valResults, _ = vectorize_data( self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainStory) n_val = len(valStory) print("Training Size", n_train) print("Validation Size", n_val) tf.set_random_seed(self.random_state) batches_train = zip( range(0, n_train - self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches_train = [(start_t, end_t) for start_t, end_t in batches_train] batches_val = zip(range(0, n_val - self.batch_size, self.batch_size), range(self.batch_size, n_val, self.batch_size)) batches_val = [(start_v, end_v) for start_v, end_v in batches_val] best_validation_accuracy = 0 cost_array = [] cost_val_array = [] for t in range(1, self.epochs + 1): print('Epoch', t) np.random.shuffle(batches_train) np.random.shuffle(batches_val) total_cost = 0.0 total_cost_val = 0.0 # Get the right batches to feed the network to calculate the trainingloss for start_t, end_t in batches_train: cost_t = self.model.batch_fit( trainStory[start_t:end_t], trainWholeU[start_t:end_t], trainWholeS[start_t:end_t], trainQuery[start_t:end_t], trainAnswer[start_t:end_t], trainResults[start_t:end_t], FLAGS.source, FLAGS.resFlag) total_cost += cost_t # Get the right batches to feed the network to calculate the validation loss for start_v, end_v in batches_val: cost_t_val = self.model.batch_fit( valStory[start_v:end_v], valWholeU[start_v:end_v], valWholeS[start_v:end_v], valQuery[start_v:end_v], valAnswer[start_v:end_v], valResults[start_v:end_v], FLAGS.source, FLAGS.resFlag) total_cost_val += cost_t_val cost_val_array.append(total_cost_val) cost_array.append(total_cost) train_preds = self.batch_predict(trainStory, trainWholeU, trainWholeS, trainQuery, trainResults, n_train) val_preds = self.batch_predict(valStory, valWholeU, valWholeS, valQuery, valResults, n_val) train_acc = metrics.accuracy_score(np.array(train_preds), trainAnswer) val_acc = metrics.accuracy_score(val_preds, valAnswer) train_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) val_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) merged_summary = tf.summary.merge( [train_acc_summary, val_acc_summary]) summary_str = self.sess.run(merged_summary) self.summary_writer.add_summary(summary_str, t) self.summary_writer.flush() if val_acc > best_validation_accuracy: best_validation_accuracy = val_acc self.saver.save(self.sess, self.model_dir + 'model.ckpt', global_step=t) if FLAGS.acc_each_epoch == True: test_acc, follow_up_wrong, one_api_mistake, two_api_mistakes, three_or_more_api__call_mistakes, one_answer_mistake, two_answer_mistakes, three_or_more_answer_mistakes, conv_right, conv_wrong = self.test( ) if t % self.evaluation_interval == 0: self.print_epoch_info(total_cost, total_cost_val, train_acc, val_acc, test_acc) elif FLAGS.acc_ten_epoch: if t % self.evaluation_interval == 0: test_acc, follow_up_wrong, one_api_mistake, two_api_mistakes, three_or_more_api__call_mistakes, one_answer_mistake, two_answer_mistakes, three_or_more_answer_mistakes, conv_right, conv_wrong = self.test( ) self.print_epoch_info(total_cost, total_cost_val, train_acc, val_acc, test_acc) if FLAGS.acc_each_epoch == True or FLAGS.acc_ten_epoch == True: self.plot_acc(test_acc) self.plot_loss(cost_array, cost_val_array, train_acc)
def test(self): ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") testStory, testQuery, testSystem, testAnswer, testWholeU, testWholeS, testResults, story_words = vectorize_data( self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testStory) test_preds = self.batch_predict(testStory, testWholeU, testWholeS, testQuery, testResults, n_test) if FLAGS.error == True: follow_up_wrong, one_api_mistake, two_api_mistakes, three_or_more_api__call_mistakes, one_answer_mistake, two_answer_mistakes, three_or_more_answer_mistakes, conversation_number_right, conversation_number_wrong = self.error_inspect( test_preds, testAnswer, story_words) test_acc = metrics.accuracy_score(test_preds, testAnswer) self.test_acc_list.append(test_acc) print("Testing Accuracy:", test_acc) print('----------------------\n') if FLAGS.error == True: return self.test_acc_list, follow_up_wrong, one_api_mistake, two_api_mistakes, three_or_more_api__call_mistakes, \ one_answer_mistake, two_answer_mistakes, three_or_more_answer_mistakes, conversation_number_right, conversation_number_wrong
vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position print("Longest sentence length", sentence_size) print("Longest story length", max_story_size) print("Average story length", mean_story_size) # train/validation/test sets trainS = [] valS = [] trainQ = [] valQ = [] trainA = [] valA = [] for task in train: S, Q, A = vectorize_data(task, word_idx, sentence_size, memory_size) ts, vs, tq, vq, ta, va = cross_validation.train_test_split( S, Q, A, test_size=0.1, random_state=FLAGS.random_state) trainS.append(ts) trainQ.append(tq) trainA.append(ta) valS.append(vs) valQ.append(vq) valA.append(va) trainS = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainS)) trainQ = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainQ)) trainA = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainA)) valS = reduce(lambda a, b: np.vstack((a, b)), (x for x in valS)) valQ = reduce(lambda a, b: np.vstack((a, b)), (x for x in valQ)) valA = reduce(lambda a, b: np.vstack((a, b)), (x for x in valA))
query_size = max(map(len, (q for _, q, _ in data))) answer_size = max(map(len, (a for _, _, a in data))) del data sentence_size = max(query_size, sentence_size, answer_size) # for the position sentence_size += 1 # +1 for time words +1 for go +1 for eos memory_size = min(FLAGS.memory_size, max_story_size) #+ FLAGS.additional_info_memory_size vocab = Vocab() vocab.add_vocab(words) # for i in range(memory_size): # vocab.word_to_index('time{}'.format(i + 1)) S, Q, A, A_fact, A_weight = vectorize_data(train, vocab, sentence_size, memory_size, fact=FLAGS.model_type) # Add time words/indexes additional_vocab_size = 50 # for additional infor from knowledge base vocab_size = vocab.vocab_size #+ additional_vocab_size # +1 for nil word # sentence_size= max(sentence_size,20) # set the same certain length for decoder print('Vocabulary size:', vocab_size) print("Longest sentence length", sentence_size) print("Longest story length", max_story_size) print("Average story length", mean_story_size) print('Memory size', memory_size) # train/validation/test sets # pdb.set_trace()
if FLAGS.word2vec: loaded_embeddings = utils.loadEmbedding_rand( '/Users/yangyang/Dialog project/yy-dstc6/scripts/GoogleNews-vectors-negative300.bin', word_idx, True) if FLAGS.paragram: loaded_embeddings = utils.loadEmbedding_rand( '/Users/yangyang/Dialog project/yy-dstc6/scripts/paragram999.txt', word_idx, False) else: loaded_embeddings = utils.loadEmbedding_rand(None, word_idx, True) # vectorize data trainS, trainA, train_label = utils.vectorize_data(train, word_idx, sentence_size, FLAGS.batch_size, memory_size, cand_idx, FLAGS) valS, valA, val_label = utils.vectorize_data(val, word_idx, sentence_size, FLAGS.batch_size, memory_size, cand_idx, FLAGS) testS, testA, test_label = utils.vectorize_data(test, word_idx, sentence_size, FLAGS.batch_size, memory_size, cand_idx, FLAGS) testasrS, testasrA, testasr_label = utils.vectorize_data( testasr, word_idx, sentence_size, FLAGS.batch_size, memory_size, cand_idx, FLAGS) f_param = open(FLAGS.model_path + 'param', 'wb')
# Log some statistics about the dataset logger.info("Vocabulary size {}".format(vocab_size)) logger.info("Longest sentence length {}".format(sentence_size)) logger.info("Longest query length {}".format(query_size)) logger.info("Biggest answer candidates set size {}".format(max_candidates_size)) logger.info("Average answer candidates set size {}".format(mean_candidates_size)) logger.info("Longest story length {}".format(max_story_size)) logger.info("Average story length {0:.1f}".format(mean_story_size)) # Highest sentence/query length sentence_size = max(query_size, sentence_size) # Size of the answer candidates candidates_size = max_candidates_size fs_train, fq_train, fc_train, fa_train = vectorize_data(train, word2idx, sentence_size, memory_size, candidates_size, "{}{}train_".format(path, 'filtered_{}/'.format(FLAGS.term_freq_thr) if FLAGS.discard_rare_words else '')) fs_test, fq_test, fc_test, fa_test = vectorize_data(test, word2idx, sentence_size, memory_size, candidates_size, "{}{}test_".format(path, 'filtered_{}/'.format(FLAGS.term_freq_thr) if FLAGS.discard_rare_words else '')) fs_train = tables.open_file(fs_train, mode='r') fq_train = tables.open_file(fq_train, mode='r') fc_train = tables.open_file(fc_train, mode='r') fa_train = tables.open_file(fa_train, mode='r') fs_test = tables.open_file(fs_test, mode='r') fq_test = tables.open_file(fq_test, mode='r') fc_test = tables.open_file(fc_test, mode='r') fa_test = tables.open_file(fa_test, mode='r') # Size of the training set n_train = len(train)
def load_data(data_dir, task_ids, memory_size, num_caches, random_seed): # Load all train and test data train = [] for i in task_ids: tr = load_task(data_dir, i) train.append(tr) te = load_task(data_dir, None, load_test=True) test = list(te.values()) data = list(chain.from_iterable(train + test)) vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a + ['.']) for s, _, q, a, _ in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) reverse_word_idx = ['NIL'] + sorted(word_idx.keys(), key=lambda x: word_idx[x]) max_story_size = max(map(len, (s for s, _, _, _, _ in data))) mean_story_size = int(np.mean([len(s) for s, _, _, _, _ in data])) sentence_size = max( map(len, chain.from_iterable(s for s, _, _, _, _ in data))) query_size = max(map(len, (q for _, _, q, _, _ in data))) memory_size = min(memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for the NIL word sentence_size = max(query_size, sentence_size) # for the position logging.info("Longest sentence length: %d" % sentence_size) logging.info("Longest story length: %d" % max_story_size) logging.info("Average story length: %d" % mean_story_size) # Train/validation/test splits trainS = [] valS = [] trainO = [] valO = [] trainQ = [] valQ = [] trainA = [] valA = [] trainL = [] valL = [] for task in train: S, O, Q, A, L = vectorize_data(task, word_idx, sentence_size, memory_size, num_caches) ts, vs, to, vo, tq, vq, ta, va, tl, vl = cross_validation.train_test_split( S, O, Q, A, L, test_size=0.1, random_state=random_seed) trainS.append(ts) trainO.append(to) trainQ.append(tq) trainA.append(ta) trainL.append(tl) valS.append(vs) valO.append(vo) valQ.append(vq) valA.append(va) valL.append(vl) trainS = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainS)) trainO = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainO)) trainQ = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainQ)) trainA = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainA)) trainL = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainL)) valS = reduce(lambda a, b: np.vstack((a, b)), (x for x in valS)) valO = reduce(lambda a, b: np.vstack((a, b)), (x for x in valO)) valQ = reduce(lambda a, b: np.vstack((a, b)), (x for x in valQ)) valA = reduce(lambda a, b: np.vstack((a, b)), (x for x in valA)) valL = reduce(lambda a, b: np.vstack((a, b)), (x for x in valL)) test_data = {} for f in te: test_data[f] = vectorize_data(te[f], word_idx, sentence_size, memory_size, num_caches) logging.info("Training set shape: %s" % str(trainS.shape)) train_data = trainS, trainO, trainQ, trainA, trainL val_data = valS, valO, valQ, valA, valL return train_data, val_data, test_data, word_idx, reverse_word_idx, vocab_size, sentence_size, memory_size
# load glove word_idx, word2vec = data_utils.load_glove(num_tokens, embedding_size) vocab_size = len(word_idx) + 1 # stat info on data set sent_size_list = map(len, [essay for essay in essay_list]) max_sent_size = max(sent_size_list) mean_sent_size = int(np.mean(map(len, [essay for essay in essay_list]))) print 'max sentence size: {} \nmean sentence size: {}\n'.format(max_sent_size, mean_sent_size) with open(out_dir+'/params', 'a') as f: f.write('max sentence size: {} \nmean sentence size: {}\n'.format(max_sent_size, mean_sent_size)) print 'The length of score range is {}'.format(len(score_range)) E = data_utils.vectorize_data(essay_list, word_idx, max_sent_size) labeled_data = zip(E, resolved_scores, sent_size_list) # split the data on the fly trainE, testE, train_scores, test_scores, train_essay_id, test_essay_id = cross_validation.train_test_split( E, resolved_scores, essay_id, test_size=.2, random_state=random_state) memory = [] memory_score = [] memory_sent_size = [] memory_essay_ids = [] # pick sampled essay for each score for i in score_range: for j in range(num_samples): if i in train_scores:
word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean(map(len, (s for s, _, _ in data)))) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position print("Longest sentence length", sentence_size) print("Longest story length", max_story_size) print("Average story length", mean_story_size) # train/validation/test sets S, Q, A = vectorize_data(train, word_idx, sentence_size, memory_size) trainS, valS, trainQ, valQ, trainA, valA = cross_validation.train_test_split(S, Q, A, test_size=.1, random_state=FLAGS.random_state) testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, memory_size) print(testS[0]) print("Training set shape", trainS.shape) # params n_train = trainS.shape[0] n_test = testS.shape[0] n_val = valS.shape[0] print("Training Size", n_train) print("Validation Size", n_val) print("Testing Size", n_test)
word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean(list(map(len, (s for s, _, _ in data))))) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position print("Longest sentence length", sentence_size) print("Longest story length", max_story_size) print("Average story length", mean_story_size) # train/validation/test sets S, Q, A = vectorize_data(train, word_idx, sentence_size, memory_size) trainS, valS, trainQ, valQ, trainA, valA = model_selection.train_test_split( S, Q, A, test_size=.1) testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, memory_size) print("Training set shape", trainS.shape) # params n_train = trainS.shape[0] n_test = testS.shape[0] n_val = valS.shape[0] print("Training Size", n_train) print("Validation Size", n_val) print("Testing Size", n_test)
def train(self): trainS, trainQ, trainA = vectorize_data(self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valS, valQ, valA = vectorize_data(self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainS) n_val = len(valS) print("Training Size", n_train) print("Validation Size", n_val) # tf.set_random_seed(self.random_state) batches = zip(range(0, n_train - self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches = [(start, end) for start, end in batches] best_validation_accuracy = 0 times = [] for t in range(1, self.epochs + 1): np.random.shuffle(batches) total_cost = 0.0 start_time = timeit.default_timer() for start, end in batches: s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] s = Variable(torch.from_numpy(np.stack(s))) q = Variable(torch.from_numpy(np.stack(q))) a = Variable(torch.from_numpy(np.stack(a))) cost_t = self.model.batch_fit(s, q, a) total_cost += cost_t.data[0] end_time = timeit.default_timer() times.append(end_time - start_time) if t % self.evaluation_interval == 0: train_preds = self.batch_predict(trainS, trainQ, n_train) val_preds = self.batch_predict(valS, valQ, n_val) train_acc = metrics.accuracy_score(np.array(train_preds), trainA) val_acc = metrics.accuracy_score(val_preds, valA) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('Average time per epoch: ', np.sum(times) / len(times)) print('-----------------------') if val_acc > best_validation_accuracy: best_validation_accuracy = val_acc save_checkpoint( { 'epoch': t + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.model.optimizer.state_dict(), }, True, filename=self.model_dir + 'best_model')