def create_batch(self, qid_list): qvec = (np.zeros(self.batchsize * self.max_length)).reshape( self.batchsize, self.max_length) cvec = (np.zeros(self.batchsize * self.max_length)).reshape( self.batchsize, self.max_length) ivec = (np.zeros(self.batchsize * 2048 * 14 * 14)).reshape( self.batchsize, 2048, 14, 14) avec = (np.zeros(self.batchsize)).reshape(self.batchsize) # glove_matrix = np.zeros(self.batchsize * self.max_length * GLOVE_EMBEDDING_SIZE).reshape(\ # self.batchsize, self.max_length, GLOVE_EMBEDDING_SIZE) for i, qid in enumerate(qid_list): # load raw question information q_str = self.getQuesStr(qid) q_ans = self.getAnsObj(qid) q_iid = self.getImgId(qid) # convert question to vec q_list = VQADataProvider.seq_to_list(q_str) # t_qvec, t_cvec, t_glove_matrix = self.qlist_to_vec(self.max_length, q_list) t_qvec, t_cvec = self.qlist_to_vec(self.max_length, q_list) try: qid_split = qid.split(QID_KEY_SEPARATOR) data_split = qid_split[0] if data_split == 'genome': t_ivec = np.load( config.DATA_PATHS['genome']['features_prefix'] + str(q_iid) + '.jpg.npz')['x'] else: t_ivec = np.load( config.DATA_PATHS[data_split]['features_prefix'] + str(q_iid).zfill(12) + '.jpg.npz')['x'] t_ivec = (t_ivec / np.sqrt((t_ivec**2).sum())) except: t_ivec = 0. write_log( 'data not found for qid : ' + str(q_iid) + ' ' + self.mode, 'log.txt') # convert answer to vec if self.mode == 'val' or self.mode == 'test-dev' or self.mode == 'test': q_ans_str = self.extract_answer(q_ans) else: q_ans_str = self.extract_answer_prob(q_ans) t_avec = self.answer_to_vec(q_ans_str) qvec[i, ...] = t_qvec cvec[i, ...] = t_cvec ivec[i, ...] = t_ivec avec[i, ...] = t_avec # glove_matrix[i,...] = t_glove_matrix return qvec, cvec, ivec, avec # , glove_matrix
def make_vocab_files(): """ Produce the question and answer vocabulary files. """ write_log('making question vocab... ' + config.QUESTION_VOCAB_SPACE, 'log.txt') qdic, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE) question_vocab = make_question_vocab(qdic) write_log('making answer vocab... ' + config.ANSWER_VOCAB_SPACE, 'log.txt') _, adic = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE) answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS) return question_vocab, answer_vocab
def get_batch_vec(self): if self.batch_len is None: self.n_skipped = 0 qid_list = self.getQuesIds() random.shuffle(qid_list) self.qid_list = qid_list self.batch_len = len(qid_list) self.batch_index = 0 self.epoch_counter = 0 def has_at_least_one_valid_answer(t_qid): answer_obj = self.getAnsObj(t_qid) answer_list = [ans['answer'] for ans in answer_obj] for ans in answer_list: if self.adict.has_key(ans): return True counter = 0 t_qid_list = [] t_iid_list = [] while counter < self.batchsize: t_qid = self.qid_list[self.batch_index] t_iid = self.getImgId(t_qid) if self.mode == 'val' or self.mode == 'test-dev' or self.mode == 'test': t_qid_list.append(t_qid) t_iid_list.append(t_iid) counter += 1 elif has_at_least_one_valid_answer(t_qid): t_qid_list.append(t_qid) t_iid_list.append(t_iid) counter += 1 else: self.n_skipped += 1 if self.batch_index < self.batch_len - 1: self.batch_index += 1 else: self.epoch_counter += 1 qid_list = self.getQuesIds() random.shuffle(qid_list) self.qid_list = qid_list self.batch_index = 0 write_log( "%d questions were skipped in a single epoch" % self.n_skipped, 'log.txt') self.n_skipped = 0 t_batch = self.create_batch(t_qid_list) return t_batch + (t_qid_list, t_iid_list, self.epoch_counter)
def load_genome_json(): """ Parses the genome json file. Returns the question dictionary and the answer dictionary. """ qdic, adic = {}, {} with open(config.DATA_PATHS['genome']['genome_file'], 'r') as f: qdata = json.load(f) for q in qdata: key = 'genome' + QID_KEY_SEPARATOR + str(q['id']) qdic[key] = {'qstr': q['question'], 'iid': q['image']} adic[key] = [{'answer': q['answer']}] write_log('parsed ' + str(len(qdic)) + ' questions for genome', 'log.txt') return qdic, adic
def draw_qt_acc(target_key_list, figname): fig = plt.figure() for k in target_key_list: write_log(str(k) + str(type(k)), 'visualize_log.txt') t_val = np.array([qt_dic[k] for qt_dic in qt_dic_list]) plt.plot(it, t_val, label=str(k)) plt.legend(fontsize='small') plt.ylim(0, 100.) #plt.legend(prop={'size':6}) plt.xlabel('Iterations') plt.ylabel('Accuracy on Val [%]') plt.savefig(figname, dpi=200) plt.clf() plt.close("all")
def load_vqa_json(data_split): """ Parses the question and answer json files for the given data split. Returns the question dictionary and the answer dictionary. """ qdic, adic = {}, {} with open(config.DATA_PATHS[data_split]['ques_file'], 'r') as f: qdata = json.load(f)['questions'] for q in qdata: qdic[data_split + QID_KEY_SEPARATOR + str(q['question_id'])] = \ {'qstr': q['question'], 'iid': q['image_id']} if 'test' not in data_split: with open(config.DATA_PATHS[data_split]['ans_file'], 'r') as f: adata = json.load(f)['annotations'] for a in adata: adic[data_split + QID_KEY_SEPARATOR + str(a['question_id'])] = \ a['answers'] write_log('parsed ' + str(len(qdic)) + ' questions for ' + data_split, 'log.txt') return qdic, adic
def save_qtype(qtype_list, save_filename, mode): if mode == 'val': savepath = os.path.join('./eval', save_filename) # TODO img_pre = '/tempspace/zwang6/VQA/Images/mscoco/val2014' elif mode == 'test-dev': savepath = os.path.join('./test-dev', save_filename) # TODO img_pre = '/tempspace/zwang6/VQA/Images/mscoco/test2015' elif mode == 'test': savepath = os.path.join('./test', save_filename) # TODO img_pre = '/tempspace/zwang6/VQA/Images/mscoco/test2015' else: raise Exception('Unsupported mode') if os.path.exists(savepath): shutil.rmtree(savepath) if not os.path.exists(savepath): os.makedirs(savepath) for qt in qtype_list: count = 0 for t_question in stat_list: #print count, t_question if count < 40 / len(qtype_list): t_question_list = t_question['q_list'] saveflag = False #print 'debug****************************' #print qt #print t_question_list #print t_question_list[0] == qt[0] #print t_question_list[1] == qt[1] if t_question_list[0] == qt[0] and t_question_list[ 1] == qt[1]: saveflag = True else: saveflag = False if saveflag == True: t_iid = t_question['iid'] if mode == 'val': t_img = Image.open(os.path.join(img_pre, \ 'COCO_val2014_' + str(t_iid).zfill(12) + '.jpg')) elif mode == 'test-dev' or 'test': t_img = Image.open(os.path.join(img_pre, \ 'COCO_test2015_' + str(t_iid).zfill(12) + '.jpg')) # for caption #print t_iid #annIds = caps.getAnnIds(t_iid) #anns = caps.loadAnns(annIds) #cap_list = [ann['caption'] for ann in anns] ans_list = t_question['ans_list'] draw = ImageDraw.Draw(t_img) for i in range(len(ans_list)): try: draw.text((10, 10 * i), str(ans_list[i])) except: pass ans = t_question['answer'] pred = t_question['pred'] if ans == -1: pre = '' elif ans == pred: pre = 'correct ' else: pre = 'failure ' #print ' aaa ', ans, pred ans = re.sub('/', ' ', str(ans)) pred = re.sub('/', ' ', str(pred)) img_title = pre + str(' '.join(t_question_list)) + '. a_' + \ str(ans) + ' p_' + str(pred) + '.png' count += 1 write_log(os.path.join(savepath, img_title), 'visualize_log.txt') t_img.save(os.path.join(savepath, img_title))
def main(): if not os.path.exists('./result'): os.makedirs('./result') question_vocab, answer_vocab = {}, {} if os.path.exists('./result/cdict.json') and os.path.exists( './result/adict.json') and os.path.exists('./result/vdict.json'): write_log('restoring vocab', 'log.txt') with open('./result/cdict.json', 'r') as f: question_char_vocab = json.load(f) with open('./result/vdict.json', 'r') as f: question_vocab = json.load(f) with open('./result/adict.json', 'r') as f: answer_vocab = json.load(f) else: question_vocab, question_char_vocab, answer_vocab = make_vocab_files() with open('./result/cdict.json', 'w') as f: json.dump(question_char_vocab, f) with open('./result/vdict.json', 'w') as f: json.dump(question_vocab, f) with open('./result/adict.json', 'w') as f: json.dump(answer_vocab, f) write_log( 'question character vocab size: ' + str(len(question_char_vocab)), 'log.txt') write_log('question vocab size: ' + str(len(question_vocab)), 'log.txt') write_log('answer vocab size: ' + str(len(answer_vocab)), 'log.txt') with open('./result/proto_train.prototxt', 'w') as f: f.write(str(qlstm(config.TRAIN_DATA_SPLITS, config.BATCH_SIZE, \ config.MAX_WORDS_IN_QUESTION, config.LENGTH_OF_LONGEST_WORD, len(question_char_vocab), len(question_vocab)))) with open('./result/proto_test.prototxt', 'w') as f: f.write(str(qlstm('val', config.VAL_BATCH_SIZE, \ config.MAX_WORDS_IN_QUESTION, config.LENGTH_OF_LONGEST_WORD, len(question_char_vocab), len(question_vocab)))) caffe.set_device(config.GPU_ID) caffe.set_mode_gpu() solver = caffe.get_solver('./qlstm_solver.prototxt') train_loss = np.zeros(config.MAX_ITERATIONS) # results = [] for it in range(config.MAX_ITERATIONS): solver.step(1) # store the train loss train_loss[it] = solver.net.blobs['loss'].data if it != 0 and it % config.PRINT_INTERVAL == 0: write_log('------------------------------------', 'log.txt') write_log('Iteration: ' + str(it), 'log.txt') c_mean_loss = train_loss[it - config.PRINT_INTERVAL:it].mean() write_log('Train loss: ' + str(c_mean_loss), 'log.txt') if it != 0 and it % config.VALIDATE_INTERVAL == 0: # acutually test solver.test_nets[0].save('./result/tmp.caffemodel') write_log('Validating...', 'log.txt') test_loss, acc_overall, acc_per_ques, acc_per_ans = exec_validation( config.GPU_ID, 'val', it=it) write_log('Iteration: ' + str(it), 'log.txt') write_log('Test loss: ' + str(test_loss), 'log.txt') write_log('Overall Accuracy: ' + str(acc_overall), 'log.txt') write_log('Per Question Type Accuracy is the following:', 'log.txt') for quesType in acc_per_ques: write_log("%s : %.02f" % (quesType, acc_per_ques[quesType]), 'log.txt') write_log('Per Answer Type Accuracy is the following:', 'log.txt') for ansType in acc_per_ans: write_log("%s : %.02f" % (ansType, acc_per_ans[ansType]), 'log.txt')
def train(self): self.setup() self.sess.run(tf.global_variables_initializer()) #Load the pre-trained model if provided if self.conf.pretrain_file is not '': self.load(self.loader, self.conf.pretrain_file) curr_valid_fold = self.conf.fold for epoch in range(self.conf.num_epochs + 1): start_time = time.time() train_offset = 0 # training while train_offset < self.reader.get_train_length(): if self.conf.is_between_class_train: (batch_input, batch_labels) = self.reader.get_batch_bc( curr_valid_fold, train_offset, self.conf.batch_size) else: (batch_input, batch_labels) = self.reader.get_batch( curr_valid_fold, train_offset, self.conf.batch_size) batch_input = batch_input.reshape( (batch_input.shape[0], 1, batch_input.shape[1], 1)) feed_dict = { self.net_input: batch_input, self.label_batch: batch_labels, self.keep_prob: 0.5, self.curr_step: epoch, self.isTrain: True } loss_value, _, pred, lr = self.sess.run([ self.reduced_loss, self.train_optimizer, self.test_prediction, self.learning_rate ], feed_dict=feed_dict) train_offset = train_offset + self.conf.batch_size # validation valid_offset = 0 error_sum = 0 while valid_offset < self.reader.get_valid_length(): (valid_input, valid_labels) = self.reader.get_validation_batch_10_crops( curr_valid_fold, valid_offset, self.conf.valid_batch_size) valid_input = valid_input.reshape( (valid_input.shape[0], 1, valid_input.shape[1], 1)) feed_dict = { self.net_input: valid_input, self.label_batch: valid_labels, self.keep_prob: 1.0, self.isTrain: False } valid_pred = self.sess.run([self.test_prediction], feed_dict=feed_dict) valid_pred = np.squeeze(np.asarray(valid_pred)) # averaging over 10 rows # averaging over 10 predictions to get the final predication of this sample valid_batch_pred_mat = np.zeros( (self.conf.valid_batch_size, self.conf.num_classes)) valid_batch_labels = np.zeros( (self.conf.valid_batch_size, self.conf.num_classes)) for mm in range(self.conf.valid_batch_size): crop = valid_pred[mm * self.conf.num_of_valid_crop:(mm + 1) * self.conf.num_of_valid_crop, :] valid_batch_pred_mat[mm, :] = np.average(crop, axis=0) valid_batch_labels[mm, :] = valid_labels[ mm * self.conf.num_of_valid_crop, :] error_sum = error_sum + np.sum( np.argmax(valid_batch_pred_mat, 1) != np.argmax( valid_batch_labels, 1)) valid_offset = valid_offset + self.conf.valid_batch_size valid_error = 100 * (error_sum / self.reader.get_valid_length()) duration = time.time() - start_time epoch_str = 'epoch {:d} \t loss = {:.3f}, valid_err = {:.3f}, fold = {:d}, is_bc = {}, duration = {:.3f}, lr = {:.5f}'.format( epoch, loss_value, valid_error, curr_valid_fold, self.conf.is_between_class_train, duration, lr) print(epoch_str) write_log( epoch_str, 'fold' + str(curr_valid_fold) + '_bc_' + str(self.conf.is_between_class_train) + '_' + self.conf.logfile) # saving model of needed if epoch > 0: if epoch % self.conf.save_interval == 0: self.save(self.saver, epoch)