def pre_process_dataset(image_dir, qjson, ajson, img_prefix): print('Preprocessing datatset. \n') vqa = VQA(ajson, qjson) img_names = [f for f in os.listdir(image_dir) if '.jpg' in f] img_names = img_names[:30000] print("length: ", len(img_names)) img_ids = [] for fname in img_names: img_id = fname.split('.')[0].rpartition(img_prefix)[-1] img_ids.append(int(img_id)) print("Done collecting image ids") ques_ids = vqa.getQuesIds(img_ids) q2i = defaultdict(lambda: len(q2i)) pad = q2i["<pad>"] start = q2i["<sos>"] end = q2i["<eos>"] UNK = q2i["<unk>"] a2i_count = {} for ques_id in ques_ids: qa = vqa.loadQA(ques_id)[0] qqa = vqa.loadQQA(ques_id)[0] ques = qqa['question'][:-1] [q2i[x] for x in ques.lower().strip().split(" ")] answers = qa['answers'] for ans in answers: if not ans['answer_confidence'] == 'yes': continue ans = ans['answer'].lower() if ans not in a2i_count: a2i_count[ans] = 1 else: a2i_count[ans] = a2i_count[ans] + 1 print("Done collecting Q/A") a_sort = sorted(a2i_count.items(), key=operator.itemgetter(1), reverse=True) i2a = {} count = 0 a2i = defaultdict(lambda: len(a2i)) for word, _ in a_sort: a2i[word] i2a[a2i[word]] = word count = count + 1 if count == 1000: break print("Done collecting words") return q2i, a2i, i2a, a2i_count
print 'finished processing features' ################################ # # process the train question # ################################ taskType = 'OpenEnded' dataType = 'mscoco' dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/' % (dataDir, dataSubType) vqa = VQA(annFile, quesFile) train_question_ids = vqa.getQuesIds() train_image_ids = vqa.getImgIds() train_questions = [] train_answers = [] question_dict_count = dict() answer_dict_count = dict() for idx, q_id in enumerate(train_question_ids): question = vqa.qqa[q_id]['question'] question = process_sentence(question) question = question.split() for word in question: question_dict_count[word] = question_dict_count.get(word, 0) + 1 answer = vqa.loadQA(q_id)[0]['answers'][0]['answer'] answer_new = [process_answer(ans) for ans in answer] for word in answer_new:
import multiprocessing import sys dataDir = '../../VQA' sys.path.insert(0, '%s/PythonHelperTools/vqaTools' % (dataDir)) from vqa import VQA from vqaEvaluation.vqaEval import VQAEval import os import time annFile = sys.argv[1] quesFile = sys.argv[2] resFile = sys.argv[3] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) all_qids = vqa.getQuesIds() def vqaeval(iter): qid = all_qids[iter] qid_list = [] qid_list.append(qid) vqaEval.evaluate(qid_list) qid_acc_dict = {qid: vqaEval.accuracy['overall']} return qid_acc_dict def reduce_acc(results_list): result_dict = reduce(lambda r, d: r.update(d) or r, results_list, {}) # Get question ids corresponding to 3 answer types - yes/no; Number; Others binary_qids = vqa.getQuesIds(ansTypes='yes/no')
image_dir = "./data/train2014" img_prefix = "COCO_train2014_" qjson = "./data/v2_OpenEnded_mscoco_train2014_questions.json" ajson = "./data/v2_mscoco_train2014_annotations.json" vqa = VQA(ajson, qjson) img_names = [f for f in os.listdir(image_dir) if '.jpg' in f] img_names = img_names[:30000] print("len ",len(img_names)) img_ids = [] for fname in img_names: img_id = fname.split('.')[0].rpartition(img_prefix)[-1] img_ids.append(int(img_id)) ques_ids = vqa.getQuesIds(img_ids) q2i = defaultdict(lambda: len(q2i)) pad = q2i["<pad>"] start = q2i["<sos>"] end = q2i["<eos>"] UNK = q2i["<unk>"] a2i_count = {} for ques_id in ques_ids: qa = vqa.loadQA(ques_id)[0] qqa = vqa.loadQQA(ques_id)[0] ques = qqa['question'][:-1] [q2i[x] for x in ques.lower().strip().split(" ")]
print('Steps {} Loss {:.4f}'.format(s, train_loss.result())) self.train_step(self.train_iter.next()) print('Steps {} Loss {:.4f}'.format(steps, train_loss.result())) self.model.save() print('model saved') print('training finished') if __name__ == "__main__": #train_data = VQA(r'D:\documents\coding\Data\coco\v2_mscoco_train2014_annotations.json', #r'D:\documents\coding\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json', #r'D:\documents\coding\Data\coco\train2014\COCO_train2014_{0}.jpg', #r'D:\documents\coding\Data\coco\v2_mscoco_train2014_complementary_pairs.json') train_data = VQA( r'D:\lgy\Document\Python\Data\coco\v2_mscoco_train2014_annotations.json', r'D:\lgy\Document\Python\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json', r'D:\lgy\Document\Python\Data\coco\train2014\COCO_train2014_{0}.jpg') train_iter = VQAIter(train_data, train_data.getQuesIds(ansTypes=['other', 'yes/no']), hp.batch_size, hp.num_chunks) max_qst_len = hp.max_qst_len max_ans_len = hp.max_ans_len model = Transformer(hp.num_layers, hp.d_model, hp.num_heads, hp.dff, max_qst_len + 3, hp.dropout_rate) trainer = Trainer(train_iter, model, 16, max_qst_len, max_ans_len) trainer.train(hp.steps, hp.steps_per_save, hp.steps_per_chunk, hp.steps_per_report)
dataType = 'mscoco' dataSubType = 'train2014' annFile = '%s/Annotations/%s%s_%s_annotations.json' % (dataDir, versionType, dataType, dataSubType) quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % ( dataDir, versionType, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType) # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) # load and display QA annotations for given question types """ All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder. """ annIds = vqa.getQuesIds(quesTypes='how many') anns = vqa.loadQA(annIds) randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgId = randomAnn['image_id'] imgFilename = 'COCO_' + dataSubType + '_' + str(imgId).zfill(12) + '.jpg' if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis('off') plt.show() # load and display QA annotations for given answer types """ ansTypes can be one of the following yes/no
image_feat[len(train_image_ids):] = val_image_feature print 'finished processing features' ################################ # # process the train question # ################################ taskType = 'OpenEnded' dataType = 'mscoco' dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType) imgDir ='%s/Images/%s/' %(dataDir, dataSubType) vqa = VQA(annFile, quesFile) train_question_ids = vqa.getQuesIds() train_image_ids = vqa.getImgIds() train_questions = [] train_answers = [] question_dict_count = dict() answer_dict_count = dict() for idx, q_id in enumerate(train_question_ids): question = vqa.loadQuestion(q_id)[0] question = process_sentence(question) question = question.split() for word in question: question_dict_count[word] = question_dict_count.get(word, 0) + 1 answer = vqa.loadAnswer(q_id)[0] answer_new = [process_answer(ans) for ans in answer] for word in answer_new:
class VqaDataset(Dataset): def __init__(self, image_dir, question_json_file_path, annotation_json_file_path, image_filename_pattern, collate=False, q2i=None, a2i=None, i2a=None, a2i_count=None, img_names=None, img_ids=None, ques_ids=None, method='simple', dataset_type='train', enc_dir=''): print(method) self.image_dir = image_dir self.qjson = question_json_file_path self.ajson = annotation_json_file_path img_prefix = image_filename_pattern.split('{}')[0] self.collate = collate self.q2i = q2i self.a2i = a2i self.i2a = i2a self.a2i_count = a2i_count self.img_ids = img_ids self.ques_ids = ques_ids self.img_names = img_names self.method = method self.vqa = VQA(self.ajson, self.qjson) if self.method == 'simple': self.transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) else: self.transform = transforms.Compose( [transforms.Resize((448, 448)), transforms.ToTensor()]) if not collate: self.img_names = [ f for f in os.listdir(self.image_dir) if '.jpg' in f ] #print(self.img_names) self.img_ids = [] for fname in self.img_names: img_id = fname.split('.')[0].rpartition(img_prefix)[-1] self.img_ids.append(int(img_id)) self.ques_ids = self.vqa.getQuesIds(self.img_ids) self.q2i, self.a2i, self.i2a, self.a2i_count = pre_process_dataset( image_dir, self.qjson, self.ajson, img_prefix) #print("Image names ",self.img_names) self.q2i_len = len(self.q2i) self.a2i_len = len(self.a2i.keys()) self.q2i_keys = self.q2i.keys() self.enc_dir = enc_dir def __len__(self): return len(self.ques_ids) def __getitem__(self, idx): ques_id = self.ques_ids[idx] img_id = self.vqa.getImgIds([ques_id])[0] qa = self.vqa.loadQA(ques_id)[0] qqa = self.vqa.loadQQA(ques_id)[0] img_name = self.img_names[self.img_ids.index(img_id)] if self.method == 'simple': img = default_loader(self.image_dir + '/' + img_name) #imgT = self.transform(img).permute(1, 2, 0) imgT = self.transform(img).float() else: img = default_loader(self.image_dir + '/' + img_name) imgT = self.transform(img).float() ques = qqa['question'][:-1] quesI = [self.q2i["<sos>"]] + [ self.q2i[x.lower()] for x in ques.split(" ") if x.lower() in self.q2i_keys ] + [self.q2i["<eos>"]] if not self.collate: quesI = quesI + [self.q2i["<pad>"]] * (8 - len(quesI)) if self.method == 'simple': quesT = torch.zeros(self.q2i_len).float() for idx in quesI: quesT[idx] = 1 else: quesT = torch.from_numpy(np.array(quesI)).long() answers = qa['answers'] max_count = 0 answer = "" for ans in answers: #if not ans['answer_confidence'] == 'yes': # continue ans = ans['answer'].lower() if ans in self.a2i.keys() and self.a2i_count[ans] > max_count: max_count = self.a2i_count[ans] answer = ans if answer == "": # only for validation gT = torch.from_numpy(np.array([self.a2i_len])).long() else: gT = torch.from_numpy(np.array([self.a2i[answer]])).long() if not self.collate: return {'img': imgT, 'ques': quesT, 'gt': gT} return imgT, quesT, gT, img_name, ques, answer