class preprocessing: def __init__(self, annotation_file=annFile, question_file=quesFile): self.vqar=VQA(annFile, quesFile) self.annIds = self.vqar.getQuesIds() self.anns = self.vqar.loadQA(self.annIds) #every questions with the dictionary loaded self.l=[a['multiple_choice_answer'] for a in self.anns] self.c=collections.Counter(self.l) self.Selected_key=[] self.Selected_keys={} self.i=0 for a in self.c.most_common(1000): self.Selected_key.extend([a[0]]) self.Selected_keys[a[0]] = self.i self.i+=1 self.Question_element=[] for ele in self.anns: if ele['multiple_choice_answer'] in self.Selected_keys.keys(): self.Question_element.extend([ele]) self.qqa = {} self.qqa = {ann['question_id']: [] for ann in self.Question_element} print 'assigning questions ' y=0 for ques in self.vqar.questions['questions']: print 'done',y y+=1 if ques['question_id'] in self.qqa.keys(): self.qqa[ques['question_id']] = ques print 'assigning questions finished' ques_words=[] for ann in self.Question_element: quesId = ann['question_id'] for words in self.qqa[quesId]['question']: ques_words.extend([words]) s=collections.Counter(ques_words) self.Selected_ques={} j=0 for a in s.most_common(5000): self.Selected_ques[a[0]] = j j+=1 print 'elements list completed' def load_class_dict(self): return self.Selected_keys def load_Q_final(self): return self.Selected_ques
print("\n") print("Overall Accuracy is: %.02f\n" %(vqaEval.accuracy['overall'])) print("Per Question Type Accuracy is the following:") for quesType in vqaEval.accuracy['perQuestionType']: print("%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType])) print("\n") print("Per Answer Type Accuracy is the following:") for ansType in vqaEval.accuracy['perAnswerType']: print("%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType])) print("\n") # demo how to use evalQA to retrieve low score result evals = [quesId for quesId in vqaEval.evalQA if vqaEval.evalQA[quesId]<35] #35 is per question percentage accuracy if len(evals) > 0: print('ground truth answers') randomEval = random.choice(evals) randomAnn = vqa.loadQA(randomEval) vqa.showQA(randomAnn) print('\n') print('generated answer (accuracy %.02f)'%(vqaEval.evalQA[randomEval])) ann = vqaRes.loadQA(randomEval)[0] print("Answer: %s\n" %(ann['answer'])) imgId = randomAnn[0]['image_id'] imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg' if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis('off') plt.show()
print('Enter the quesTypes (\'what color\', \'is this\', ..., \'all\')') # quesTypes = input() quesTypes = 'what is' if quesTypes == 'all': annIdsA = vqa.getQuesIds() tannIdsA = tvqa.getQuesIds() imgIdsA = vqa.getImgIds() timgIdsA = tvqa.getImgIds() else: annIdsA = vqa.getQuesIds(quesTypes=quesTypes) tannIdsA = tvqa.getQuesIds(quesTypes=quesTypes) imgIdsA = vqa.getImgIds(quesTypes=quesTypes) timgIdsA = tvqa.getImgIds(quesTypes=quesTypes) annsA = vqa.loadQA(annIdsA) tannsA = tvqa.loadQA(tannIdsA) if len(annsA) > TR_LIMIT_SIZE: annsA[TR_LIMIT_SIZE:] = [] imgIdsA[TR_LIMIT_SIZE:] = [] if len(tannsA) > VAL_LIMIT_SIZE: tannsA[VAL_LIMIT_SIZE:] = [] timgIdsA[VAL_LIMIT_SIZE:] = [] train = get_inputList(vqa, annsA) test = get_inputList(tvqa, tannsA) vocab = sorted(list(set(train + test))) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1
json_read = modelReader.read() model = model_from_json(json_read) model.load_weights('./model_weights_100iter.h5py') vqaVal = VQA(annFile2, quesFile2) newdataSubType = 'analysis1' outputQuestionFile = '%s/Questions/%s_%s_%s_questions.json' % ( dataDir, taskType, dataType, newdataSubType) outputAnnotationFile = '%s/Annotations/%s_%s_annotations.json' % ( dataDir, dataType, newdataSubType) # vqaAnalysis = vqaVal newQuestion = 'yes' questionIndex = 0 ids = vqaVal.getQuesIds() anns = vqaVal.loadQA(ids) if not os.path.exists(outputAnnotationFile) or os.stat( outputAnnotationFile).st_size == 0: outputQuestionWriter = open(outputQuestionFile, 'w') outputAnnotationWriter = open(outputAnnotationFile, 'w') outputQuestions = {} outputAnnotations = {} outputAnnotations['info'] = {} outputAnnotations['info'][ 'description'] = 'This is the dataset created for further analysis of the VQA task.' outputAnnotations['info']['url'] = ' ' outputAnnotations['info']['version'] = '1.0' outputAnnotations['info']['year'] = 2015
dataSubType = 'train2014' annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType) quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType) imgDir = '%s/%s/' % (dataDir, dataSubType) # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) # load and display QA annotations for given question types """ All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder. """ annIds = vqa.getQuesIds(quesTypes='how many') anns = vqa.loadQA(annIds) randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgId = randomAnn['image_id'] imgFilename = 'COCO_' + dataSubType + '_' + str(imgId).zfill(12) + '.jpg' if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis('off') plt.show() # load and display QA annotations for given answer types """ ansTypes can be one of the following yes/no number
break return pad_sequences(rX, maxlen=ques_maxlen), pad_sequences(rY, maxlen=ans_maxlen) print("Enter the quesTypes ('what color', 'is this', ..., 'all')") # quesTypes = input() quesTypes = "is this" if quesTypes == "all": annIdsA = vqa.getQuesIds() tannIdsA = tvqa.getQuesIds() else: annIdsA = vqa.getQuesIds(quesTypes=quesTypes) tannIdsA = tvqa.getQuesIds(quesTypes=quesTypes) annsA = vqa.loadQA(annIdsA) tannsA = tvqa.loadQA(tannIdsA) train = get_inputList(vqa, annsA) test = get_inputList(tvqa, tannsA) vocab = sorted(list(set(train + test))) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) train = get_inputVec(vqa, annsA) test = get_inputVec(tvqa, tannsA) ques_maxlen = max(map(len, (x for x, _ in train + test))) ans_maxlen = max(map(len, (x for _, x in train + test)))
dataSubType = 'train2014' qtype = ['what color','what is on the','what sport is'] # path to images data_dir = '/srv/share/data/mscoco/coco/images/train2014/' model = '/home/ashwin/caffe/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel' prototxt = '/home/ashwin/caffe/models/bvlc_reference_caffenet/deploy.prototxt' # load QAs vqa = VQA(annFile, quesFile) # add question type annIds = [] anns = [] ids = [] for qitem in qtype: annIds = vqa.getQuesIds(quesTypes= qtype) anns.extend(vqa.loadQA(annIds)) ids.extend(vqa.getImgIds(quesTypes = qtype)) UIDs = list(np.unique(np.array(ids))) # extract fc7 features caffe.set_mode_gpu() caffe.set_device(1) net = caffe.Net(prototxt,model,caffe.TEST) transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2,0,1)) transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1] transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB net.blobs['data'].reshape(1,3,227,227)
def main(): glove_word_vec_file = './../glove/glove.6B.300d.txt' word_vec_dict = readGloveData(glove_word_vec_file) vqaTrain = VQA(annFile, quesFile) annotations = vqaTrain.dataset['annotations'] questions = vqaTrain.questions['questions'] answerFeatures = createAnswerFeatures(annotations) # Dumping answer features answer_features_list = open('answer_feature_list.json', 'w') answer_features_list.write(json.dumps(answerFeatures)) # For getting image vectors imageDict = pramod.generateDictionary(tfile) feats = sio.loadmat('./../features/coco/vgg_feats.mat')['feats'] data = [] X_train = [] Y_train = [] X_test = [] Y_test = [] FILE_INDEX = 0 for question in questions: # quesItem = {} # print question questionVector = getBOWVector( question['question'].strip().replace('?', ' ?').split(), word_vec_dict) imgID = question['image_id'] imageVector = np.asarray(feats[:, imageDict[imgID]]) # quesItem['image_id'] = imgID # quesItem['question'] = question['question'].replace('?', ' ?').split(' ') annotations = vqaTrain.loadQA(ids=[question['question_id']]) for annotation in annotations: ansString = annotation['multiple_choice_answer'] answerVector = getAnswerVector(ansString, answerFeatures) temp_X_train = np.append(imageVector, questionVector) temp_Y_train = answerVector X_train.append(temp_X_train) Y_train.append(temp_Y_train) if len(X_train) >= FILE_LIMIT: train_x_file = open( FILE_PATH + X_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w') train_y_file = open( FILE_PATH + Y_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w') np.save(train_x_file, X_train) np.save(train_y_file, Y_train) X_train = [] Y_train = [] FILE_INDEX = FILE_INDEX + 1 # print len(X_train) # if len(annotations) != 1: # print imgID, " has annotations ", len(annotations) # for ann in annotations: # quesItemCopy = dict(quesItem) # ansString = ann['multiple_choice_answer'] # quesItemCopy['answer'] = ansString # data.append(quesItemCopy) if len(X_train) > 0: train_x_file = open( FILE_PATH + X_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w') train_y_file = open( FILE_PATH + Y_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w') np.save(train_x_file, X_train) np.save(train_y_file, Y_train) X_train = [] Y_train = []
taskType = "OpenEnded" dataType = "mscoco" # 'mscoco' for real and 'abstract_v002' for abstract dataSubType = "train2014" annFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, dataSubType) quesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, dataSubType) imgDir = "%s/Images/%s/%s/" % (dataDir, dataType, dataSubType) # initialize VQA api for QA annotations vqa = VQA(annFile, quesFile) # load and display QA annotations for given question types """ All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder. """ annIds = vqa.getQuesIds(quesTypes="how many") anns = vqa.loadQA(annIds) randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgId = randomAnn["image_id"] imgFilename = "COCO_" + dataSubType + "_" + str(imgId).zfill(12) + ".jpg" if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis("off") plt.show() # load and display QA annotations for given answer types """ ansTypes can be one of the following yes/no number
class COCODataset(Dataset): def __init__(self, vocab, answers, rootDir='../../data2', dataSubType='train2014'): annFile ='{}/v2_mscoco_{}_annotations.json'.format(rootDir, dataSubType) quesFile ='{}/v2_OpenEnded_mscoco_{}_questions.json'.format(rootDir, dataSubType) self.vqa = VQA(annFile, quesFile) self.imgDir = '{}/{}'.format(rootDir, dataSubType) self.vocab = vocab self.answers = answers self.quesIds = self.vqa.getQuesIds() self.dataSubType = dataSubType self.transform = transforms.Compose([ transforms.Resize(299), transforms.CenterCrop(299), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) def __len__(self): return len(self.quesIds) def __getitem__(self, index): """ returns: question: tensor of word-indices transformed image: tensor of shape (3, 299, 299) answers: tensor of indices mapped to 3000 most frequently occurring answers answers not found among 300 most frequently occurring answers are eliminated """ quesId = self.quesIds[index] img_id = self.vqa.qqa[quesId]['image_id'] img_id = str(img_id).zfill(12) path = 'COCO_{}_{}.jpg'.format(self.dataSubType, img_id) # print(os.path.join(self.imgDir, path)) image = Image.open(os.path.join(self.imgDir, path)).convert('RGB') image = self.transform(image) # Convert question to word ids vocab = self.vocab question = self.vqa.qqa[quesId]['question'] # print(question) tokens = nltk.tokenize.word_tokenize(question.lower()) question_list = [] question_list.append(vocab('<start>')) question_list.extend([vocab(token) for token in tokens]) question_list.append(vocab('<end>')) question_tensor = torch.Tensor(question_list) qa = self.vqa.loadQA(quesId) ans_list = [a['answer'] for a in qa[0]['answers']] # print(ans_list) ans_index_list = [self.answers(ans) for ans in ans_list] #if ans in self.answers.ans2idx.keys()] answer_tensor = torch.Tensor(ans_index_list) return question_tensor, image, answer_tensor def subset(self, fraction=0.5, count=None, shuffle=True): ''' give subset of certain fraction/count prioritizes count ''' if not count: count = int(len(self.quesIds) * fraction) print('Getting subset of length', count, 'out of', len(self)) subset = copy.deepcopy(self) if shuffle: random.shuffle(subset.quesIds) subset.quesIds = subset.quesIds[:count] return subset
class DataReader: def __init__(self, data_type = TRAIN_DATA_TYPE, shape = (224,224)): """ shape为输出图像数据的shape data_type为需导入的数据集的类型 """ self.data_type = data_type annFile='{}\\annotations\\{}{}_{}_annotations.json'.format(DATA_PATH,VERSION_TYPE,DATA_TYPE,self.data_type) quesFile ='{}\\Questions\\{}{}_{}_{}_questions.json'.format(DATA_PATH,VERSION_TYPE,TASK_TYPE,DATA_TYPE,self.data_type) self.vqa = VQA(annFile, quesFile) self.img_ids = list(self.vqa.imgToQA.keys()) self.pos = 0 self.shape = shape questions = self.vqa.getQuestionsFile() questions = questions['questions'] # qf为通过id索引查找question的字典 self.qf = dict() for q in questions: self.qf[q["question_id"]] = q["question"] def get_pic_data(self,pic_id): """ 获取图像数据 pic_id:图像的id return numpy三维数组 """ imgFilename = 'COCO_' + self.data_type + '_'+ str(pic_id).zfill(12) + '.jpg' path = '{}\\images\\{}\\'.format(DATA_PATH,self.data_type) img = Image.open(path+imgFilename) img = img.resize(self.shape) return numpy.array(img) def get_pic_qa(self,pic_id): """ 获取图像的问题和回答 return ['question_type':str,'question_id':num,'answers':[{'answer':str,'answer_confidence':'yes'|'maybe'|'no','answer_id':num}...]] """ q = self.vqa.getQuesIds(imgIds=[pic_id]) qas = self.vqa.loadQA(q) result = [] for qa in qas: q = self.get_question(qa['question_id']) result.append({"question":q,"answers":qa["answers"]}) return result def set_pos(self,pos = 0): """ 设置当前读取游标 """ self.pos = pos if pos < len(self.img_ids) else len(self.img_ids) + 1 def get_pos(self): """ 获取当前pos """ return self.pos def get_next_pic_id(self): """ 获取下一张图片的id(即当前游标所在图像的id) 并且使索引+1 """ img_id = self.img_ids[self.pos] self.pos = self.pos + 1 if not self.pos + 1 == len(self.img_ids) else 0 return img_id def get_question(self,question_id): return self.qf[question_id]
modelReader = open("./model_definition_100iter.json") json_read = modelReader.read() model = model_from_json(json_read) model.load_weights("./model_weights_100iter.h5py") vqaVal = VQA(annFile2, quesFile2) newdataSubType = "analysis1" outputQuestionFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, newdataSubType) outputAnnotationFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, newdataSubType) # vqaAnalysis = vqaVal newQuestion = "yes" questionIndex = 0 ids = vqaVal.getQuesIds() anns = vqaVal.loadQA(ids) if not os.path.exists(outputAnnotationFile) or os.stat(outputAnnotationFile).st_size == 0: outputQuestionWriter = open(outputQuestionFile, "w") outputAnnotationWriter = open(outputAnnotationFile, "w") outputQuestions = {} outputAnnotations = {} outputAnnotations["info"] = {} outputAnnotations["info"]["description"] = "This is the dataset created for further analysis of the VQA task." outputAnnotations["info"]["url"] = " " outputAnnotations["info"]["version"] = "1.0" outputAnnotations["info"]["year"] = 2015 outputAnnotations["info"]["contributor"] = "vishaal"