示例#1
0
def build_vocab(annfile, quesfile, threshold):
    """Build a simple vocabulary wrapper."""
    vqa = VQA(annfile, quesfile)
    counter = Counter()
    ids = vqa.getQuesIds()
    for i, id in enumerate(ids):
        question = vqa.qqa[id]['question']
        tokens = nltk.tokenize.word_tokenize(question.lower())
        counter.update(tokens)

        if (i + 1) % len(ids) == 0:
            print("[{}/{}] Tokenized the questions.".format(i + 1, len(ids)))

    # If the word frequency is less than 'threshold', then the word is discarded.
    words = [word for word, cnt in counter.items() if cnt >= threshold]

    # Create a vocab wrapper and add some special tokens.
    vocab = Vocabulary()
    vocab.add_word('<pad>')
    vocab.add_word('<start>')
    vocab.add_word('<end>')
    vocab.add_word('<unk>')

    # Add the words to the vocabulary.
    for i, word in enumerate(words):
        vocab.add_word(word)
    return vocab
示例#2
0
文件: test.py 项目: cjds/WhatIsTheMan
def main(params):
	dataDir=params['dataDir']
	vqaDir=params['vqaDir']
	taskType='OpenEnded'
	dataType='mscoco' # 'mscoco' for real and 'abstract_v002' for abstract
	dataSubType='train2014'
	annFile='%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType)
	quesFile='%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType)
	imgDir = '%s/Images/%s/%s/' %(dataDir, dataType, dataSubType)
	dataDir=params['dataDir']+"/"
	vqaDir=params['vqaDir']+"/"
	 
	vqa=VQA(annFile, quesFile)
	data=[]
	annIds = list(set(vqa.getImgIds()))[:params['num']]
	for id in annIds:
		imgFilename = 'mscoco/train2014/COCO_' + dataSubType + '_'+ str(id).zfill(12) + '.jpg'
		copyfile(dataDir+imgFilename, vqaDir+imgFilename)
		caption=[]
		caption2=[]
		for i in vqa.imgToQA[id]:
			caption.append(vqa.qqa[i['question_id']]['question'])
			caption.append(vqa.qqap[i]['question_id']['choices'])
		data.append({"file_path":imgFilename,"captions":caption})


	with open('data.json','w') as outfile:
	 	json.dump(data,outfile)
示例#3
0
def evaluate_and_dump_predictions(pred, qids, qfile, afile, ix_ans_dict, filename):
    """
    dumps predictions to some default file
    :param pred: list of predictions, like [1, 2, 3, 2, ...]. one number for each example
    :param qids: question ids in the same order of predictions, they need to align and match
    :param qfile:
    :param afile:
    :param ix_ans_dict:
    :return:
    """
    assert len(pred) == len(qids), "Number of predictions need to match number of question IDs"
    answers = []
    for i, val in enumerate(pred):
        qa_pair = {}
        qa_pair['question_id'] = int(qids[i])
        qa_pair['answer'] = ix_ans_dict[str(val + 1)]  # note indexing diff between python and torch
        answers.append(qa_pair)
    vqa = VQA(afile, qfile)
    fod = open(filename, 'wb')
    json.dump(answers, fod)
    fod.close()
    # VQA evaluation
    vqaRes = vqa.loadRes(filename, qfile)
    vqaEval = VQAEval(vqa, vqaRes, n=2)
    vqaEval.evaluate()
    acc = vqaEval.accuracy['overall']
    print("Overall Accuracy is: %.02f\n" % acc)
    return acc
示例#4
0
def main(params):
    dataDir = params['dataDir']
    vqaDir = params['vqaDir']
    taskType = 'OpenEnded'
    dataType = 'mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
    dataSubType = 'train2014'
    annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                         dataSubType)
    quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                         dataType, dataSubType)
    imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType)
    dataDir = params['dataDir'] + "/"
    vqaDir = params['vqaDir'] + "/"

    vqa = VQA(annFile, quesFile)
    data = []
    annIds = list(set(vqa.getImgIds()))[:params['num']]
    for id in annIds:
        imgFilename = 'mscoco/train2014/COCO_' + dataSubType + '_' + str(
            id).zfill(12) + '.jpg'
        copyfile(dataDir + imgFilename, vqaDir + imgFilename)
        caption = []
        caption2 = []
        for i in vqa.imgToQA[id]:
            caption.append(vqa.qqa[i['question_id']]['question'])
            caption.append(vqa.qqap[i]['question_id']['choices'])
        data.append({"file_path": imgFilename, "captions": caption})

    with open('data.json', 'w') as outfile:
        json.dump(data, outfile)
示例#5
0
    def __init__(self, split):
        dataDir = 'data'
        versionType = 'v2_'  # this should be '' when using VQA v2.0 dataset
        taskType = 'OpenEnded'  # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0
        dataType = 'mscoco'  # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0.
        if 'train' in split:
            dataSubType = 'train2014'
        elif 'val' in split:
            dataSubType = 'val2014'
        annFile = '%s/Annotations/%s%s_%s_annotations.json' % (
            dataDir, versionType, dataType, dataSubType)
        quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % (
            dataDir, versionType, taskType, dataType, dataSubType)
        imgDir = '%s/Images/%s/' % (dataDir, dataSubType)

        self.dataSubType = dataSubType
        self.split = split
        self.imgDir = imgDir

        print(annFile, quesFile)

        # Initialize VQA API
        vqa = VQA(annFile, quesFile)
        self.vqa = vqa

        #img_ids = vqa.getImgIds() # get all
        #self.img_ids = img_ids
        self.question_ids = vqa.getQuesIds()  # get all

        imagenet_mean = [0.485, 0.456, 0.406]
        imagenet_std = [0.229, 0.224, 0.225]
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # ImageNet standard
            transforms.ToTensor(),
            transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
        ])

        # Create vocabulary mapping letters to numbers
        self.all_letters = string.ascii_letters

        # Get top 3000 answers
        with open(annFile, 'r') as f:
            train_ann = json.load(f)
        all_answers = []
        for list in train_ann['annotations']:
            for answer in list['answers']:
                all_answers.append(answer['answer'])
        if self.split == 'train':
            print('Computing top K answers')
            top_answers = self.get_top_k_answers(all_answers, 3000)
            self.top_answers = top_answers
            self.inverse_top_answers = {v: k for k, v in top_answers.items()}
            feature_file = open('train.pickle', 'rb')
            self.img_features = pickle.load(feature_file)

        if self.split == 'val':
            feature_file = open('val.pickle', 'rb')
            self.img_features = pickle.load(feature_file)
示例#6
0
def vqaEval(config=Config(), epoch_list=range(10)):
    accuracy_dic = {}
    best_accuracy, best_epoch = 0.0, -1

    # set up file names and paths
    annFile = config.selected_val_annotations_path
    quesFile = config.selected_val_questions_path

    for epoch in epoch_list:

        resFile = config.result_path % (epoch)

        vqa = VQA(annFile, quesFile)
        vqaRes = vqa.loadRes(resFile, quesFile)
        vqaEval = VQAEval(
            vqa, vqaRes, n=2
        )  #n is precision of accuracy (number of places after decimal), default is 2

        # evaluate results
        """
        If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function
        By default it uses all the question ids in annotation file
        """
        vqaEval.evaluate()

        # print accuracies
        accuracy = vqaEval.accuracy['overall']
        print "Overall Accuracy is: %.02f\n" % (accuracy)
        """
        print "Per Question Type Accuracy is the following:"
        for quesType in vqaEval.accuracy['perQuestionType']:
    	    print "%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType])
        print "\n"
        """
        accuracy_dic[epoch] = {'overall': accuracy}
        print "Per Answer Type Accuracy is the following:"
        for ansType in vqaEval.accuracy['perAnswerType']:
            accuracy_dic[epoch][ansType] = vqaEval.accuracy['perAnswerType'][
                ansType]

#print "%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType])

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_epoch = epoch

    #print "** Done for every epoch! **"
    #print "Accuracy Dictionry"
    #print accuracy_dic
    print "Best Epoch is %d with Accuracy %.02f" % (best_epoch, best_accuracy)
    return accuracy_dic
示例#7
0
class preprocessing:
	def __init__(self, annotation_file=annFile, question_file=quesFile):
		self.vqar=VQA(annFile, quesFile)
		self.annIds = self.vqar.getQuesIds()
		self.anns = self.vqar.loadQA(self.annIds)  #every questions with the dictionary loaded

		self.l=[a['multiple_choice_answer'] for a in self.anns]
		self.c=collections.Counter(self.l)
		self.Selected_key=[]
		self.Selected_keys={}
		self.i=0
		for a in self.c.most_common(1000):
			self.Selected_key.extend([a[0]])
			self.Selected_keys[a[0]] = self.i
			self.i+=1

		self.Question_element=[]
		for ele in self.anns:
			if ele['multiple_choice_answer'] in self.Selected_keys.keys():
				self.Question_element.extend([ele])
		self.qqa = {}
		self.qqa = {ann['question_id']:       [] for ann in self.Question_element}
		print 'assigning questions '
		y=0
		for ques in self.vqar.questions['questions']:
			print 'done',y
			y+=1
			if ques['question_id'] in self.qqa.keys():
				self.qqa[ques['question_id']] = ques
		print 'assigning questions finished'
		ques_words=[]
		for ann in self.Question_element:
			quesId = ann['question_id']
			for words in self.qqa[quesId]['question']:
				ques_words.extend([words])
		s=collections.Counter(ques_words)
		self.Selected_ques={}
		j=0
		for a in s.most_common(5000):
			self.Selected_ques[a[0]] = j
			j+=1

		print 'elements list completed'

	def load_class_dict(self):
		return self.Selected_keys

	def load_Q_final(self):
		return self.Selected_ques
示例#8
0
def vqaEval(config = Config(), epoch_list = range(10)):
    accuracy_dic = {}
    best_accuracy, best_epoch = 0.0, -1

    # set up file names and paths
    annFile = config.selected_val_annotations_path
    quesFile = config.selected_val_questions_path

    for epoch in epoch_list:

        resFile = config.result_path%(epoch)

        vqa = VQA(annFile, quesFile)
        vqaRes = vqa.loadRes(resFile, quesFile)
        vqaEval = VQAEval(vqa, vqaRes, n=2)   #n is precision of accuracy (number of places after decimal), default is 2

        # evaluate results
        """
        If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function
        By default it uses all the question ids in annotation file
        """
        vqaEval.evaluate()

        # print accuracies
        accuracy = vqaEval.accuracy['overall']
        print "Overall Accuracy is: %.02f\n" %(accuracy)
        """
        print "Per Question Type Accuracy is the following:"
        for quesType in vqaEval.accuracy['perQuestionType']:
    	    print "%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType])
        print "\n"
        """
        accuracy_dic[epoch] = {'overall' : accuracy}
        print "Per Answer Type Accuracy is the following:"
        for ansType in vqaEval.accuracy['perAnswerType']:
            accuracy_dic[epoch][ansType] = vqaEval.accuracy['perAnswerType'][ansType]
	    #print "%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType])

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_epoch = epoch


    #print "** Done for every epoch! **"
    #print "Accuracy Dictionry"
    #print accuracy_dic
    print "Best Epoch is %d with Accuracy %.02f"%(best_epoch, best_accuracy)
    return accuracy_dic
示例#9
0
 def __init__(self,
              root,
              annotation_file,
              question_file,
              vocab,
              answers,
              transform=None):
     """Set the path for images, captions and vocabulary wrapper.
     
     Args:
         root: image directory.
         annotation_file: path to vqa annotation file
         question_file: path to vqa question file
         vocab: vocabulary wrapper.
         answer: answer wrapper
         transform: image transformer.
     """
     self.root = root
     self.vqa = VQA(annotation_file, question_file)
     self.image_format = "COCO_{}_{{:012}}.jpg".format(
         self.vqa.dataset['data_subtype'])
     self.qids = [
         ann["question_id"] for ann in self.vqa.dataset["annotations"]
     ]
     self.vocab = vocab
     self.answers = answers
     self.transform = transform
示例#10
0
def build_answers(annfile, quesfile):
    """Build an answers wrapper."""
    vqa = VQA(annfile, quesfile)
    counter = Counter()

    print('len of annotations dict:', len(vqa.dataset['annotations']))

    for ann_id in range(len(vqa.dataset['annotations'])):
        ans_dict = vqa.dataset['annotations'][ann_id]['answers']

        for dic in ans_dict:
            counter[dic['answer']] += 1

        if (ann_id + 1) % len(vqa.dataset['annotations']) == 0:
            print("[{}/{}] Answers tally completed.".format(
                ann_id + 1, len(vqa.dataset['annotations'])))

    # print('counter', counter)
    # print('most common', counter.most_common(2))
    answers = [ans[0] for ans in counter.most_common(3000)]

    # Create an answer wrapper
    answer = Answer()
    answer.add_ans('<unk>')

    # Add the words to the vocabulary.
    for i, ans in enumerate(answers):
        answer.add_ans(ans)

    return answer
示例#11
0
def build_ans(annotation_file, question_file, number):
    """Build a simple answer wrapper."""
    vqa = VQA(annotation_file, question_file)
    counter = Counter()
    start = time.time()
    
    # Iterate through all questions and count frequency of words.
    all_annotations = vqa.dataset["annotations"]
    for i, annotation in enumerate(all_annotations, 1):
        answers = annotation["answers"]
        answers = [a["answer"].lower() for a in answers]
        counter.update(answers)
        
        if i % 1000 == 0:
            print("[{}/{}] Counting the answers. ({:.3f}s)".format(
                      i, len(all_annotations), time.time()-start),
                  end="\r"
                 )
    
    # Final progress counter line.
    print("[{}/{}] Tokenized the captions. ({:.3f}s)".format(
                      i, len(all_annotations), time.time()-start),
         )
    
    # If the word frequency is less than 'threshold', then the word is discarded.
    top_ans = [ans for ans,freq in counter.most_common(number)]
    
    # Create a answer wrapper and add don't know answer.
    answers = mappings.Answer()
    answers.add_ans("<don't know>")
    
    # Add the answers to the wrapper.
    for i, a in enumerate(top_ans):
        answers.add_ans(a)
    return answers
示例#12
0
 def __init__(self, vocab, answers, rootDir='../../data2', dataSubType='train2014'):
     
     annFile ='{}/v2_mscoco_{}_annotations.json'.format(rootDir, dataSubType)
     quesFile ='{}/v2_OpenEnded_mscoco_{}_questions.json'.format(rootDir, dataSubType)
     self.vqa = VQA(annFile, quesFile)
     self.imgDir = '{}/{}'.format(rootDir, dataSubType)
     self.vocab = vocab
     self.answers = answers
     self.quesIds = self.vqa.getQuesIds()
     self.dataSubType = dataSubType
     self.transform = transforms.Compose([
                      transforms.Resize(299),
                      transforms.CenterCrop(299),
                      transforms.ToTensor(),
                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                           std=[0.229, 0.224, 0.225]),
                      ])
示例#13
0
 def __init__(self, data_type = TRAIN_DATA_TYPE, shape = (224,224)):
     """
     shape为输出图像数据的shape
     data_type为需导入的数据集的类型
     """
     self.data_type = data_type
     annFile='{}\\annotations\\{}{}_{}_annotations.json'.format(DATA_PATH,VERSION_TYPE,DATA_TYPE,self.data_type)
     quesFile ='{}\\Questions\\{}{}_{}_{}_questions.json'.format(DATA_PATH,VERSION_TYPE,TASK_TYPE,DATA_TYPE,self.data_type)
     self.vqa = VQA(annFile, quesFile)
     self.img_ids = list(self.vqa.imgToQA.keys())
     self.pos = 0
     self.shape = shape
     questions = self.vqa.getQuestionsFile()
     questions = questions['questions']
     # qf为通过id索引查找question的字典
     self.qf = dict()
     for q in questions:
         self.qf[q["question_id"]] = q["question"]
def main(params):
	dataDir=params['dataDir']
	vqaDir=params['vqaDir']
	copyfileBool= True if params['copyfile']=='True'  else False
	taskType='MultipleChoice'
	dataType='mscoco' # 'mscoco' for real and 'abstract_v002' for abstract
	dataSubType='train2014' #folder in which it is contained
	annFile='%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType)
	quesFile='%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType)
	imgDir = '%s/Images/%s/%s/' %(dataDir, dataType, dataSubType)
	dataDir=params['dataDir']+"/"
	vqaDir=params['vqaDir']+"/"
	 
	vqa=VQA(annFile, quesFile)
	data=[]
	data2=[]
	annIds = list(set(vqa.getImgIds()))[:params['num']]
	for id in annIds:
		imgFilename = 'mscoco/train2014/COCO_' + dataSubType + '_'+ str(id).zfill(12) + '.jpg'
		if copyfileBool:
			copyfile(dataDir+imgFilename, vqaDir+imgFilename)
		caption=[]
		options=[]

		for i in vqa.imgToQA[id]:
			caption.append(vqa.qqa[i['question_id']]['question'])
			choices=  ', '.join(vqa.qqa[i['question_id']]['multiple_choices'])
			options.append(choices)
		data.append({"file_path":imgFilename,"captions":caption})
		data2.append({"file_path":imgFilename,"captions":options})


	with open('data.json','w') as outfile:
	 	json.dump(data,outfile)
	with open('data2.json','w') as outfile:
	 	json.dump(data2,outfile)
示例#15
0
def build_vocab(annotation_file, question_file, threshold):
    """Build a simple vocabulary wrapper."""
    vqa = VQA(annotation_file, question_file)
    counter = Counter()
    start = time.time()

    # Iterate through all questions and count frequency of words.
    all_questions = vqa.questions["questions"]
    for i, question in enumerate(all_questions, 1):
        question = question["question"]
        tokens = nltk.tokenize.word_tokenize(question.lower())
        counter.update(tokens)

        if i % 1000 == 0:
            print("[{}/{}] Tokenized the captions. ({:.3f}s)".format(
                i, len(all_questions),
                time.time() - start),
                  end="\r")

    # Final progress counter line.
    print(
        "[{}/{}] Tokenized the captions. ({:.3f}s)".format(
            i, len(all_questions),
            time.time() - start), )
    print("Using threshold: {}".format(threshold))
    # If the word frequency is less than 'threshold', then the word is discarded.
    words = [word for word, cnt in counter.items() if cnt >= threshold]

    # Create a vocab wrapper and add some special tokens.
    vocab = mappings.Vocabulary()
    vocab.add_word('<pad>')
    vocab.add_word('<start>')
    vocab.add_word('<end>')
    vocab.add_word('<unk>')

    # Add the words to the vocabulary.
    for i, word in enumerate(words):
        vocab.add_word(word)
    return vocab
import numpy as np
from tqdm import tqdm

dataDir = '../../Data'
versionType = 'v2_'  # this should be '' when using VQA v2.0 dataset
taskType = 'OpenEnded'  # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0
dataType = 'mscoco'  # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0.
dataSubType = 'train2014'
annFile = '%s/Annotations/%s%s_%s_annotations.json' % (dataDir, versionType,
                                                       dataType, dataSubType)
quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % (
    dataDir, versionType, taskType, dataType, dataSubType)
imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType)
num_words = 1000
# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

# load and display QA annotations for given question types
"""
All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder.
"""

## load question types

Quest_Dir = "../QuestionTypes/mscoco_question_types.txt"
f = open(Quest_Dir, "r")
contents = f.read().split("\n")

if contents[-1] == "":
    contents = contents[:-1]
示例#17
0
文件: test.py 项目: yappi62/keras
END_MARK = 1

dataDir = "../../VQA"
# print('Enter the taskType (\'OpenEnded\', \'MultipleChoice\')')
# taskType=input()
taskType = "OpenEnded"
dataType = "mscoco"
annFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, "train2014")
quesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, "train2014")
imgDir = "%s/Images/%s/" % (dataDir, "train2014")
tannFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, "val2014")
tquesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, "val2014")
timgDir = "%s/Images/%s/" % (dataDir, "val2014")

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)
tvqa = VQA(tannFile, tquesFile)

# load and display QA annotations for given question types
"""
quesTypes can be one of the following
what color 
what kind 
what are 
what type 
is the 
is this
how many 
are 
does 
where 
示例#18
0
文件: Solver.py 项目: xiangmingLi/PIL
    def exec_validation(self, sess, mode, folder, it=0, visualize=False):

        dp = VQADataLoader(mode=mode,
                           batchsize=config.VAL_BATCH_SIZE,
                           folder=folder)
        total_questions = len(dp.getQuesIds())
        epoch = 0
        pred_list = []
        testloss_list = []
        stat_list = []
        while epoch == 0:
            q_strs, q_word_vec_list, q_len_list, ans_vectors, img_features, a_word_vec, ans_score, ans_space_score, t_qid_list, img_ids, epoch = dp.next_batch(
                config.BATCH_SIZE)
            feed_dict = {
                self.model.q_input: q_word_vec_list,
                self.model.ans1: ans_vectors,
                self.model.seqlen: q_len_list,
                self.model.img_vec: img_features,
                self.lr: config.VQA_LR,
                self.model.keep_prob: 1.0,
                self.model.is_training: False
            }

            t_predict_list, predict_loss = sess.run(
                [self.model.predict1, self.model.softmax_cross_entrophy1],
                feed_dict=feed_dict)
            t_pred_str = [
                dp.vec_to_answer(pred_symbol) for pred_symbol in t_predict_list
            ]
            testloss_list.append(predict_loss)
            ans_vectors = np.asarray(ans_vectors).argmax(1)
            for qid, iid, ans, pred in zip(t_qid_list, img_ids, ans_vectors,
                                           t_pred_str):
                # pred_list.append({u'answer':pred, u'question_id': int(dp.getStrippedQuesId(qid))})
                pred_list.append((pred, int(dp.getStrippedQuesId(qid))))
                if visualize:
                    q_list = dp.seq_to_list(dp.getQuesStr(qid))
                    if mode == 'test-dev' or 'test':
                        ans_str = ''
                        ans_list = [''] * 10
                    else:
                        ans_str = dp.vec_to_answer(ans)
                        ans_list = [
                            dp.getAnsObj(qid)[i]['answer'] for i in xrange(10)
                        ]
                    stat_list.append({ \
                        'qid': qid,
                        'q_list': q_list,
                        'iid': iid,
                        'answer': ans_str,
                        'ans_list': ans_list,
                        'pred': pred})
            percent = 100 * float(len(pred_list)) / total_questions
            sys.stdout.write('\r' + ('%.2f' % percent) + '%')
            sys.stdout.flush()

        print 'Deduping arr of len', len(pred_list)
        deduped = []
        seen = set()
        for ans, qid in pred_list:
            if qid not in seen:
                seen.add(qid)
                deduped.append((ans, qid))
        print 'New len', len(deduped)
        final_list = []
        for ans, qid in deduped:
            final_list.append({u'answer': ans, u'question_id': qid})

        mean_testloss = np.array(testloss_list).mean()

        if mode == 'val':
            valFile = './%s/val2015_resfile_%d' % (folder, it)
            with open(valFile, 'w') as f:
                json.dump(final_list, f)
            if visualize:
                visualize_failures(stat_list, mode)
            annFile = config.DATA_PATHS['val']['ans_file']
            quesFile = config.DATA_PATHS['val']['ques_file']
            vqa = VQA(annFile, quesFile)
            vqaRes = vqa.loadRes(valFile, quesFile)
            vqaEval = VQAEval(vqa, vqaRes, n=2)
            vqaEval.evaluate()
            acc_overall = vqaEval.accuracy['overall']
            acc_perQuestionType = vqaEval.accuracy['perQuestionType']
            acc_perAnswerType = vqaEval.accuracy['perAnswerType']
            return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType
        elif mode == 'test-dev':
            filename = './%s/vqa_OpenEnded_mscoco_test-dev2015_%s-%d-' % (
                folder, folder, it) + str(it).zfill(8) + '_results'
            with open(filename + '.json', 'w') as f:
                json.dump(final_list, f)
            if visualize:
                visualize_failures(stat_list, mode)
        elif mode == 'test':
            filename = './%s/vqa_OpenEnded_mscoco_test2015_%s-%d-' % (
                folder, folder, it) + str(it).zfill(8) + '_results'
            with open(filename + '.json', 'w') as f:
                json.dump(final_list, f)
            if visualize:
                visualize_failures(stat_list, mode)
示例#19
0
# taskType=input()
taskType = 'OpenEnded'
dataType = 'mscoco'
annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                     'train2014')
quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                     dataType, 'train2014')
imgDir = '%s/Images/%s/' % (dataDir, 'train2014')
tannFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                      'val2014')
tquesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                      dataType, 'val2014')
timgDir = '%s/Images/%s/' % (dataDir, 'val2014')

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)
tvqa = VQA(tannFile, tquesFile)

# load and display QA annotations for given question types
"""
quesTypes can be one of the following
what color 
what kind 
what are 
what type 
is the 
is this
how many 
are 
does 
where 
示例#20
0
def evalResults():
    dataDir = './../VQA'
    taskType = 'MultipleChoice'
    dataType = 'mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
    dataSubType = 'train2014'
    annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                         dataSubType)
    quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                         dataType, dataSubType)
    imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType)
    vqaTrain = VQA(annFile, quesFile)
    dummyano = vqaTrain.dataset['annotations']
    answerFeatures = ld.createAnswerFeatures(dummyano)

    dataDir2 = './../VQA'
    taskType2 = 'MultipleChoice'
    dataType2 = 'mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
    dataSubType2 = 'analysis1'
    annFile2 = '%s/Annotations/%s_%s_annotations.json' % (dataDir2, dataType2,
                                                          dataSubType2)
    quesFile2 = '%s/Questions/%s_%s_%s_questions.json' % (
        dataDir2, taskType2, dataType2, dataSubType2)
    imgDir2 = '%s/Images/%s/%s/' % (dataDir2, dataType2, dataSubType2)

    modelReader = open('./model_definition_100iter.json')
    json_read = modelReader.read()
    model = model_from_json(json_read)
    model.load_weights('./model_weights_100iter.h5py')

    vqaVal = VQA(annFile2, quesFile2)
    FILE_INDEX = 0

    total = 0.0
    correct = 0.0

    resultsDicts = []
    x_test = []
    y_test = []
    glove_word_vec_file = './../glove/glove.6B.300d.txt'
    word_vec_dict = ld.readGloveData(glove_word_vec_file)
    imageDict = pramod.generateDictionary(tfile)
    feats = sio.loadmat('./../features/coco/vgg_feats.mat')['feats']
    for quesID, annotation in vqaVal.qa.iteritems():
        # print quesID
        # if quesID not in vqaVal.qqa.keys():
        # 	continue
        question = vqaVal.qqa[quesID]
        # print question
        questionVector = ld.getBOWVector(
            question['question'].strip().replace('?', ' ?').split(),
            word_vec_dict)
        imgID = annotation['image_id']
        imageVector = np.asarray(feats[:, imageDict[imgID]])
        temp_dict = {}
        ansString = annotation['multiple_choice_answer']
        temp_dict['question_id'] = quesID
        # answerVector = ld.getAnswerVector(ansString, answerFeatures)
        temp_x_test = np.append(imageVector, questionVector)
        # temp_y_test = answerVector
        x_test = np.asarray([temp_x_test])
        # y_test = np.asarray([temp_y_test])
        predictions = model.predict_classes(x_test, verbose=False)
        temp_dict['answer'] = answerFeatures[predictions[0]]
        resultsDicts.append(temp_dict)
    writer = open('./../Results/MultipleChoice_mscoco_analysis1_results.json',
                  'w')
    json_dump = json.dumps(resultsDicts)
    writer.write(json_dump)
# quesFile    ='%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType)
# imgDir      ='%s/Images/%s/%s/' %(dataDir, dataType, dataSubType)
# resultType  ='second'
# fileTypes   = ['results', 'accuracy', 'evalQA', 'evalQuesType', 'evalAnsType']
# vqaVal = VQA(annFile, quesFile)

dataDir = './../VQA'
taskType = 'MultipleChoice'
dataType = 'mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType = 'train2014'
annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                     dataSubType)
quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                     dataType, dataSubType)
imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType)
vqaTrain = VQA(annFile, quesFile)
dummyano = vqaTrain.dataset['annotations']
answerFeatures = ld.createAnswerFeatures(dummyano)

sys.path.insert(0, '%s/PythonHelperTools/vqaTools' % (dataDir))
sys.path.insert(0, '%s/PythonEvaluationTools' % (dataDir))

dataDir = './../VQA'
taskType2 = 'MultipleChoice'
dataType2 = 'mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType2 = 'val2014'
annFile2 = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                      dataSubType2)
quesFile2 = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                      dataType, dataSubType2)
imgDir2 = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType2)
示例#22
0
class data_vqa:
    """ Data class of VQA dataset. """
    def __init__(
        self,
        resize_size=RESIZE_SIZE,
        batch_size=BATCH_SIZE,
        num_threads=NUM_THREADS,
        fixed_num=FIXED_NUM,
    ):
        """ Initlization """

        print '[__init__]'

        self.fixed_num = fixed_num

        # Ininlize the offical json processing api
        if os.path.isfile(pkl_file):
            print '[info] init with saved pkl file.'
            load = open(pkl_file, 'rb')
            self.imgid_dict = pickle.load(load)
            self.question_processed = pickle.load(load)
            self.confidence = pickle.load(load)
            self.answers = pickle.load(load)
            self.answer_dict = pickle.load(load)
            self.max_len_question = pickle.load(load)
            load.close()
        else:
            print '[info] init without saved pkl file.'
            self.data = VQA(annFile, quesFile)
            self.data_ids = self.data.getQuesIds()
            self.data_len = len(self.data_ids)
            print(self.data_len)
            self.copy_data()
            del self.data
            del self.data_ids
            self.question_processed = self.process_question(\
                    self.questions,
                    self.max_len_question)
            del self.questions
            self.build_dict_question()
            self.build_dict_answer()
            save = open(pkl_file, 'wb')
            pickle.dump(self.imgid_dict, save, -1)
            pickle.dump(self.question_processed, save, -1)
            pickle.dump(self.confidence, save, -1)
            pickle.dump(self.answers, save, -1)
            pickle.dump(self.answer_dict, save, -1)
            pickle.dump(self.max_len_question, save, -1)
            save.close()
            print '[info]saved pkl file.'

        # Build the reader of the tfrecord file
        # The tfrecord file is generated by tr.write.py

        feature = {
            'image': tf.FixedLenFeature([], tf.string),
            'imgid': tf.FixedLenFeature([], tf.int64)
        }
        filename_queue = tf.train.string_input_producer([trDir])
        reader = tf.TFRecordReader()
        (_, serialized_example) = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example,
                                           features=feature)
        image = tf.decode_raw(features['image'], tf.uint8)
        image = tf.cast(image, tf.float32)
        image = image / 255.
        imgid = tf.cast(features['imgid'], tf.int32)
        image = tf.reshape(image, [resize_size, resize_size, 3])
        (self.op_images, self.op_imgids) = \
            tf.train.shuffle_batch([image, imgid],
                                   batch_size=batch_size,
                                   capacity=20480,
                                   num_threads=num_threads,
                                   min_after_dequeue=10240)

    def copy_data(self):
        """ Copy the data from the official json api """

        print '    [copy_data]'
        self.answers = [[
            self.data.qa[data_id]['answers'][i]['answer'].encode(
                'ascii', 'ignore').lower() for i in range(10)
        ] for data_id in self.data_ids]
        self.confidence = [[(lambda x: (1. if x == 'yes' else 0.5))(
            self.data.qa[data_id]['answers'][i]['answer_confidence'].encode(
                'ascii', 'ignore')) for i in range(10)]
                           for data_id in self.data_ids]
        self.imgids = [
            self.data.qa[data_id]['image_id'] for data_id in self.data_ids
        ]
        self.questions = \
            [self.preprocessing(self.data.qqa[ques_id]['question'])
             for ques_id in self.data_ids]
        self.max_len_question = max(
            [len(question.split()) for question in self.questions])
        print self.max_len_question

    def build_dict_question(self):
        """ Build the mapping from image's imgid to index of 
            image's questions index """

        print '    [build_dict_question]'
        self.imgid_dict = {}
        imgid_set = list(set(self.imgids))
        for imgid in imgid_set:
            self.imgid_dict[imgid] = []
        for i in range(self.data_len):
            imgid = self.imgids[i]
            self.imgid_dict[imgid].append(i)

    def test_question(self):
        print '    [test_question]'
        chars = set()
        for question in self.questions:
            chars.update(question)
        char_list = list(chars)
        print len(char_list)

    def build_dict_answer(self):
        """ Build the mapping from answer's char set to id """

        print '    [build_dict_answer]'
        answer_list = []
        for answers in self.answers:
            for answer in answers:
                answer_list.append(answer)
        counts = Counter(answer_list)
        top_n = counts.most_common(self.fixed_num)
        fixed_list = [elem[0] for elem in top_n]

        # print(fixed_list)

        total = 0
        for elem in top_n:
            total += elem[1]
        print top_n[self.fixed_num - 1][1]
        print total
        print len(answer_list)

        self.answer_dict = dict((c, i) for (i, c) in enumerate(fixed_list))

    def preprocessing(self, text):
        """ Replace the unusual character in the text """

        to_replace = [
            '!',
            '#',
            '%',
            '$',
            "'",
            '&',
            ')',
            '(',
            '+',
            '*',
            '-',
            ',',
            '/',
            '.',
            '1',
            '0',
            '3',
            '2',
            '5',
            '4',
            '7',
            '6',
            '9',
            '8',
            ';',
            ':',
            '?',
            '_',
            '^',
        ]
        lowered = text.encode('ascii', 'ignore').lower()
        replacing = lowered
        for char_to_replace in to_replace:
            replacing = replacing.replace(char_to_replace,
                                          ' ' + char_to_replace + ' ')
        stemming = ' '
        splited = replacing.split()
        # return replacing
        return stemming.join([stem(item) for item in splited])

    def tokenization(self, stentance, preprocess=True):
        """ Split the stentance into words """

        if preprocess == True:
            stentance = self.preprocessing(stentance)
        splited = stentance.split()
        return splited

    def process_question(self, sentences, max_len_question):
        """ Preprocessing the question data """

        print '    [process_question]'
        question_list = []
        for sentence in sentences:
            splited = sentence.split()
            for word in splited:
                question_list.append(word)
        counts = Counter(question_list)
        top_n = counts.most_common(self.fixed_num)
        fixed_list = [elem[0] for elem in top_n]

        # print(fixed_list)

        total = 0
        for elem in top_n:
            total += elem[1]
        print top_n[self.fixed_num - 1][1]
        print total
        print len(question_list)

        self.question_dict = dict((c, i) for (i, c) in enumerate(fixed_list))

        processed_question = []
        for sentence in sentences:
            splited = sentence.split()
            processed_sentence = []
            for word in splited:
                processed_sentence.append(
                    self.question_dict.get(word, self.fixed_num))
            processed_sentence = processed_sentence + [self.fixed_num] \
                * (max_len_question - len(splited))
            processed_question.append(processed_sentence)

        return processed_question

    def get_batch(self, imgids):
        """ Get the next batch of data """

        questions = []
        answers = []
        confidences = []
        # (images, imgids) = sess.run([self.op_images, self.op_imgids])
        for imgid in imgids:
            index = random.choice(self.imgid_dict[imgid])
            questions.append(self.question_processed[index])
            answer_to_choice = random.choice(range(10))
            confidences.append(self.confidence[index][answer_to_choice])
            answer = self.answers[index][answer_to_choice]
            answers.append(self.answer_dict.get(answer, self.fixed_num))
        return (np.array(questions), np.array(answers), np.array(confidences))
示例#23
0
    def __init__(
        self,
        resize_size=RESIZE_SIZE,
        batch_size=BATCH_SIZE,
        num_threads=NUM_THREADS,
        fixed_num=FIXED_NUM,
    ):
        """ Initlization """

        print '[__init__]'

        self.fixed_num = fixed_num

        # Ininlize the offical json processing api
        if os.path.isfile(pkl_file):
            print '[info] init with saved pkl file.'
            load = open(pkl_file, 'rb')
            self.imgid_dict = pickle.load(load)
            self.question_processed = pickle.load(load)
            self.confidence = pickle.load(load)
            self.answers = pickle.load(load)
            self.answer_dict = pickle.load(load)
            self.max_len_question = pickle.load(load)
            load.close()
        else:
            print '[info] init without saved pkl file.'
            self.data = VQA(annFile, quesFile)
            self.data_ids = self.data.getQuesIds()
            self.data_len = len(self.data_ids)
            print(self.data_len)
            self.copy_data()
            del self.data
            del self.data_ids
            self.question_processed = self.process_question(\
                    self.questions,
                    self.max_len_question)
            del self.questions
            self.build_dict_question()
            self.build_dict_answer()
            save = open(pkl_file, 'wb')
            pickle.dump(self.imgid_dict, save, -1)
            pickle.dump(self.question_processed, save, -1)
            pickle.dump(self.confidence, save, -1)
            pickle.dump(self.answers, save, -1)
            pickle.dump(self.answer_dict, save, -1)
            pickle.dump(self.max_len_question, save, -1)
            save.close()
            print '[info]saved pkl file.'

        # Build the reader of the tfrecord file
        # The tfrecord file is generated by tr.write.py

        feature = {
            'image': tf.FixedLenFeature([], tf.string),
            'imgid': tf.FixedLenFeature([], tf.int64)
        }
        filename_queue = tf.train.string_input_producer([trDir])
        reader = tf.TFRecordReader()
        (_, serialized_example) = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example,
                                           features=feature)
        image = tf.decode_raw(features['image'], tf.uint8)
        image = tf.cast(image, tf.float32)
        image = image / 255.
        imgid = tf.cast(features['imgid'], tf.int32)
        image = tf.reshape(image, [resize_size, resize_size, 3])
        (self.op_images, self.op_imgids) = \
            tf.train.shuffle_batch([image, imgid],
                                   batch_size=batch_size,
                                   capacity=20480,
                                   num_threads=num_threads,
                                   min_after_dequeue=10240)
# In[3]:

import sys
sys.path.insert(0, './../VQA/PythonHelperTools')
from vqaTools.vqa import VQA

dataDir = './../VQA'
taskType = 'MultipleChoice'
dataType = 'mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType = 'train2014'
annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                     dataSubType)
quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                     dataType, dataSubType)
imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType)
vqaTrain = VQA(annFile, quesFile)
dummyano = vqaTrain.dataset['annotations']
answerFeatures = utilities.createAnswerFeatures(dummyano)

vqaVal = VQA(annFile, quesFile)

# In[4]:

dataset = []

for quesID, annotation in vqaVal.qa.iteritems():
    question = vqaVal.qqa[quesID]
    question_text = question['question'].strip().replace('?', ' ?').split()
    imgID = annotation['image_id']
    ansString = annotation['multiple_choice_answer']
示例#25
0
taskType    ='OpenEnded' # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0
dataType    ='mscoco'  # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0.
dataSubType ='train2014'
annFile     ='%s/Annotations/%s%s_%s_annotations.json'%(dataDir, versionType, dataType, dataSubType)
quesFile    ='%s/Questions/%s%s_%s_%s_questions.json'%(dataDir, versionType, taskType, dataType, dataSubType)
imgDir      ='%s/Images/%s/%s/' %(dataDir, dataType, dataSubType)
resultType  ='fake'
fileTypes   = ['results', 'accuracy', 'evalQA', 'evalQuesType', 'evalAnsType']

# An example result json file has been provided in './Results' folder.

[resFile, accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = ['%s/Results/%s%s_%s_%s_%s_%s.json'%(dataDir, versionType, taskType, dataType, dataSubType, \
resultType, fileType) for fileType in fileTypes]

# create vqa object and vqaRes object
vqa = VQA(annFile, quesFile)
vqaRes = vqa.loadRes(resFile, quesFile)

# create vqaEval object by taking vqa and vqaRes
vqaEval = VQAEval(vqa, vqaRes, n=2)   #n is precision of accuracy (number of places after decimal), default is 2

# evaluate results
"""
If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function
By default it uses all the question ids in annotation file
"""
vqaEval.evaluate()

# print accuracies
print("\n")
print("Overall Accuracy is: %.02f\n" %(vqaEval.accuracy['overall']))
示例#26
0
taskType = 'OpenEnded'
dataType = 'mscoco'
annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                     'train2014')
quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                     dataType, 'train2014')
imgDir = '%s/Images/%s/' % (dataDir, 'train2014')
tannFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                      'val2014')
tquesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                      dataType, 'val2014')
timgDir = '%s/Images/%s/' % (dataDir, 'val2014')

##### initialize VQA api for QA annotations

vqa = VQA(annFile, quesFile)  # training
tvqa = VQA(tannFile, tquesFile)  # validation

# QA annotations for given question types
"""
quesTypes can be one of the following
..
what color 	what kind 	what are 	what type  	is the
is this		how many 	are 		does  		where
is there 	why 		which		do 		what does 
what time 	who 		what sport 	what animal 	what brand
"""

##### Load VQA dataset

print('Enter the quesTypes (\'what color\', \'is this\', ..., \'all\')')
示例#27
0
文件: vqaDemo.py 项目: caomw/VQA
from vqaTools.vqa import VQA
import random
import skimage.io as io
import matplotlib.pyplot as plt
import os

dataDir = "../../VQA"
taskType = "OpenEnded"
dataType = "mscoco"  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType = "train2014"
annFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, dataSubType)
quesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, dataSubType)
imgDir = "%s/Images/%s/%s/" % (dataDir, dataType, dataSubType)

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

# load and display QA annotations for given question types
"""
All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder.
"""
annIds = vqa.getQuesIds(quesTypes="how many")
anns = vqa.loadQA(annIds)
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgId = randomAnn["image_id"]
imgFilename = "COCO_" + dataSubType + "_" + str(imgId).zfill(12) + ".jpg"
if os.path.isfile(imgDir + imgFilename):
    I = io.imread(imgDir + imgFilename)
    plt.imshow(I)
    plt.axis("off")
示例#28
0
def exec_validation(model, opt, mode, folder, it, visualize=False):
    model.eval()
    criterion = nn.NLLLoss()
    dp = VQADataProvider(opt,
                         batchsize=opt.VAL_BATCH_SIZE,
                         mode='val',
                         folder=folder)
    epoch = 0
    pred_list = []
    testloss_list = []
    stat_list = []
    total_questions = len(dp.getQuesIds())

    print('Validating...')
    while epoch == 0:
        t_word, word_length, t_img_feature, t_answer, t_qid_list, t_iid_list, epoch = dp.get_batch_vec(
        )
        word_length = np.sum(word_length, axis=1)

        data = Variable(torch.from_numpy(t_word)).cuda()
        word_length = torch.from_numpy(word_length).cuda()
        img_feature = Variable(torch.from_numpy(t_img_feature)).cuda()
        label = Variable(torch.from_numpy(t_answer)).cuda()
        pred = model(data, word_length, img_feature, 'val')
        pred = (pred.data).cpu().numpy()
        if mode == 'test-dev' or 'test':
            pass
        else:
            loss = criterion(pred, label.long())
            loss = (loss.data).cpu().numpy()
            testloss_list.append(loss)
        t_pred_list = np.argmax(pred, axis=1)
        t_pred_str = [
            dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list
        ]

        for qid, iid, ans, pred in zip(t_qid_list, t_iid_list,
                                       t_answer.tolist(), t_pred_str):
            pred_list.append((pred, int(dp.getStrippedQuesId(qid))))
            if visualize:
                q_list = dp.seq_to_list(dp.getQuesStr(qid))
                if mode == 'test-dev' or 'test':
                    ans_str = ''
                    ans_list = [''] * 10
                else:
                    ans_str = dp.vec_to_answer(ans)
                    ans_list = [
                        dp.getAnsObj(qid)[i]['answer'] for i in range(10)
                    ]
                stat_list.append({\
                                    'qid'   : qid,
                                    'q_list' : q_list,
                                    'iid'   : iid,
                                    'answer': ans_str,
                                    'ans_list': ans_list,
                                    'pred'  : pred })
        percent = 100 * float(len(pred_list)) / total_questions
        sys.stdout.write('\r' + ('%.2f' % percent) + '%')
        sys.stdout.flush()

    print('Deduping arr of len', len(pred_list))
    deduped = []
    seen = set()
    for ans, qid in pred_list:
        if qid not in seen:
            seen.add(qid)
            deduped.append((ans, qid))
    print('New len', len(deduped))
    final_list = []
    for ans, qid in deduped:
        final_list.append({u'answer': ans, u'question_id': qid})

    if mode == 'val':
        mean_testloss = np.array(testloss_list).mean()
        valFile = './%s/val2015_resfile' % folder
        with open(valFile, 'w') as f:
            json.dump(final_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
        annFile = config.DATA_PATHS['val']['ans_file']
        quesFile = config.DATA_PATHS['val']['ques_file']
        vqa = VQA(annFile, quesFile)
        vqaRes = vqa.loadRes(valFile, quesFile)
        vqaEval = VQAEval(vqa, vqaRes, n=2)
        vqaEval.evaluate()
        acc_overall = vqaEval.accuracy['overall']
        acc_perQuestionType = vqaEval.accuracy['perQuestionType']
        acc_perAnswerType = vqaEval.accuracy['perAnswerType']
        return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType
    elif mode == 'test-dev':
        filename = './%s/vqa_OpenEnded_mscoco_test-dev2015_%s-' % (
            folder, folder) + str(it).zfill(8) + '_results'
        with open(filename + '.json', 'w') as f:
            json.dump(final_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
    elif mode == 'test':
        filename = './%s/vqa_OpenEnded_mscoco_test2015_%s-' % (
            folder, folder) + str(it).zfill(8) + '_results'
        with open(filename + '.json', 'w') as f:
            json.dump(final_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
示例#29
0
# import VQA library
from vqaTools.vqa import VQA

# DEFS 
# path to annotations
annFile = '/srv/share/vqa/release_data/mscoco/vqa/mscoco_train2014_annotations.json' # INSERT appropriate path
# path to questions
quesFile = '/srv/share/vqa/release_data/mscoco/vqa/OpenEnded_mscoco_train2014_questions.json' # insert appropriate path
dataSubType = 'train2014'
qtype = ['what color','what is on the','what sport is']
# path to images 
data_dir = '/srv/share/data/mscoco/coco/images/train2014/'
model = '/home/ashwin/caffe/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
prototxt = '/home/ashwin/caffe/models/bvlc_reference_caffenet/deploy.prototxt'
# load QAs
vqa = VQA(annFile, quesFile) 
# add question type

annIds = []
anns = []
ids = []
for qitem in qtype:
  annIds = vqa.getQuesIds(quesTypes= qtype)
  anns.extend(vqa.loadQA(annIds))
  ids.extend(vqa.getImgIds(quesTypes = qtype))

UIDs = list(np.unique(np.array(ids)))

# extract fc7 features
caffe.set_mode_gpu()
caffe.set_device(1)
示例#30
0
import skimage.io as io
import matplotlib.pyplot as plt
import os

dataDir = '/users/Datasets/VQA/data'
taskType = 'OpenEnded'
dataType = 'mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType = 'train2014'
annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                     dataSubType)
quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                     dataType, dataSubType)
imgDir = '%s/%s/' % (dataDir, dataSubType)

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

# load and display QA annotations for given question types
"""
All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder.
"""
annIds = vqa.getQuesIds(quesTypes='how many')
anns = vqa.loadQA(annIds)
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgId = randomAnn['image_id']
imgFilename = 'COCO_' + dataSubType + '_' + str(imgId).zfill(12) + '.jpg'
if os.path.isfile(imgDir + imgFilename):
    I = io.imread(imgDir + imgFilename)
    plt.imshow(I)
    plt.axis('off')
def evalResults():
	dataDir = './../VQA'
	taskType = 'MultipleChoice'
	dataType = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract
	dataSubType = 'train2014'
	annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType, dataSubType)
	quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType, dataType, dataSubType)
	imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType)
	vqaTrain = VQA(annFile, quesFile)
	dummyano = vqaTrain.dataset['annotations']
	answerFeatures = ld.createAnswerFeatures(dummyano)

	dataDir2 = './../VQA'
	taskType2 = 'MultipleChoice'
	dataType2 = 'mscoco' # 'mscoco' for real and 'abstract_v002' for abstract
	dataSubType2 = 'val2014'
	# number = '100'
	annFile2 = '%s/Annotations/%s_%s_annotations.json' % (dataDir2, dataType2, dataSubType2)
	quesFile2 = '%s/Questions/%s_%s_%s_questions.json' % (dataDir2, taskType2, dataType2, dataSubType2)
	resultFile = './../Results/MultipleChoice_mscoco_analysis1_second_results.json' 
	imgDir2 = '%s/Images/%s/%s/' % (dataDir2, dataType2, dataSubType2)

	modelReader = open('./model_definition_100iter.json')
	json_read = modelReader.read()
	model = model_from_json(json_read)
	model.load_weights('./model_weights_100iter.h5py')
	
	vqaVal = VQA(annFile2, quesFile2)
	FILE_INDEX = 0
    
	total = 0.0
	correct = 0.0

	resultsDicts = []
	x_test = []
	y_test = []
	glove_word_vec_file = './../glove/glove.6B.300d.txt'
	word_vec_dict = ld.readGloveData(glove_word_vec_file)
	imageDict = pramod.generateDictionary(tfile)
	feats = sio.loadmat('./../features/coco/vgg_feats.mat')['feats']
	for quesID, annotation in vqaVal.qa.iteritems():
		# print quesID
		# if quesID not in vqaVal.qqa.keys():
		# 	continue
		question = vqaVal.qqa[quesID]
		choicesList = vqaVal.qqa[quesID]['multiple_choices']
		# print choicesList
		setChoices = set(choicesList)
		setAnswers = set(answerFeatures)
		choiceAndAnswer = list(setChoices.intersection(setAnswers))
		choiceIndex = []
		for choice in choiceAndAnswer:
			choiceIndex.append(answerFeatures.index(choice))
		#print choiceIndex
		questionVector = ld.getBOWVector(question['question'].strip().replace('?', ' ?').split(), word_vec_dict) 
		imgID = annotation['image_id']
		imageVector = np.asarray(feats[:, imageDict[imgID]])
		temp_dict = {}
		ansString = annotation['multiple_choice_answer']
		temp_dict['question_id'] = quesID
		# answerVector = ld.getAnswerVector(ansString, answerFeatures)
		temp_x_test = np.append(imageVector, questionVector)
		# temp_y_test = answerVector
		x_test = np.asarray([temp_x_test])
		# y_test = np.asarray([temp_y_test])
		predictions = model.predict_classes(x_test, verbose = False)
		predict_probaResult = model.predict_proba(x_test,verbose = False)
		# print "###############Sanity Check############"
		# print predict_probaResult.size
		# print predict_probaResult
		# print predict_probaResult[7]
		# print predict_probaResult
		maxPred = 0.0
		# print "#######################################"
		print choiceIndex
		for item in choiceIndex:
			print len(choiceIndex), item,answerFeatures[item]
		for item in choiceIndex:
			print item,answerFeatures[item],predict_probaResult[0][item]
			if(maxPred < predict_probaResult[0][item]):
				maxPred = predict_probaResult[0][item]
				maxIndex = item
		print maxPred, maxIndex, answerFeatures[maxIndex]
		# temp_dict['answer'] = answerFeatures[predictions[0]]
		temp_dict['answer'] = answerFeatures[maxIndex]
		resultsDicts.append(temp_dict)
	writer = open(resultFile, 'w')
	json_dump = json.dumps(resultsDicts)
	writer.write(json_dump)
def main():
    glove_word_vec_file = './../glove/glove.6B.300d.txt'
    word_vec_dict = readGloveData(glove_word_vec_file)
    vqaTrain = VQA(annFile, quesFile)
    annotations = vqaTrain.dataset['annotations']
    questions = vqaTrain.questions['questions']
    answerFeatures = createAnswerFeatures(annotations)

    # Dumping answer features
    answer_features_list = open('answer_feature_list.json', 'w')
    answer_features_list.write(json.dumps(answerFeatures))

    # For getting image vectors
    imageDict = pramod.generateDictionary(tfile)
    feats = sio.loadmat('./../features/coco/vgg_feats.mat')['feats']

    data = []
    X_train = []
    Y_train = []
    X_test = []
    Y_test = []
    FILE_INDEX = 0
    for question in questions:
        # quesItem = {}
        # print question
        questionVector = getBOWVector(
            question['question'].strip().replace('?', ' ?').split(),
            word_vec_dict)
        imgID = question['image_id']
        imageVector = np.asarray(feats[:, imageDict[imgID]])
        # quesItem['image_id'] = imgID
        # quesItem['question'] = question['question'].replace('?', ' ?').split(' ')
        annotations = vqaTrain.loadQA(ids=[question['question_id']])
        for annotation in annotations:
            ansString = annotation['multiple_choice_answer']
            answerVector = getAnswerVector(ansString, answerFeatures)
            temp_X_train = np.append(imageVector, questionVector)
            temp_Y_train = answerVector
            X_train.append(temp_X_train)
            Y_train.append(temp_Y_train)
            if len(X_train) >= FILE_LIMIT:
                train_x_file = open(
                    FILE_PATH + X_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy',
                    'w')
                train_y_file = open(
                    FILE_PATH + Y_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy',
                    'w')
                np.save(train_x_file, X_train)
                np.save(train_y_file, Y_train)
                X_train = []
                Y_train = []
                FILE_INDEX = FILE_INDEX + 1
            # print len(X_train)
        # if len(annotations) != 1:
        # print imgID, " has annotations ", len(annotations)

        # for ann in annotations:
        # quesItemCopy = dict(quesItem)
        # ansString = ann['multiple_choice_answer']
        # quesItemCopy['answer'] = ansString
        # data.append(quesItemCopy)
    if len(X_train) > 0:
        train_x_file = open(
            FILE_PATH + X_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w')
        train_y_file = open(
            FILE_PATH + Y_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w')
        np.save(train_x_file, X_train)
        np.save(train_y_file, Y_train)
        X_train = []
        Y_train = []
示例#33
0
# coding: utf-8

from vqaTools.vqa import VQA
import random
import skimage.io as io
import matplotlib.pyplot as plt
import os

dataDir='../'
split = 'train'
annFile='%s/Annotations/%s.json'%(dataDir, split)
imgDir = '%s/Images/' %dataDir

# initialize VQA api for QA annotations
vqa=VQA(annFile)

# load and display QA annotations for given answer types
"""
ansTypes can be one of the following
yes/no
number
other
unanswerable
"""
anns = vqa.getAnns(ansTypes='yes/no');   
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgFilename = randomAnn['image']
if os.path.isfile(imgDir + imgFilename):
	I = io.imread(imgDir + imgFilename)
	plt.imshow(I)
示例#34
0
def exec_validation(device_id, mode, it='', visualize=False):

    caffe.set_device(device_id)
    caffe.set_mode_gpu()
    net = caffe.Net('./result/proto_test.prototxt',\
              './result/tmp.caffemodel',\
              caffe.TEST)

    dp = VQADataProvider(mode=mode, batchsize=64)
    total_questions = len(dp.getQuesIds())
    epoch = 0

    pred_list = []
    testloss_list = []
    stat_list = []

    while epoch == 0:
        t_word, t_cont, t_img_feature, t_answer, t_qid_list, t_iid_list, epoch = dp.get_batch_vec(
        )
        net.blobs['data'].data[...] = np.transpose(t_word, (1, 0))
        net.blobs['cont'].data[...] = np.transpose(t_cont, (1, 0))
        net.blobs['img_feature'].data[...] = t_img_feature
        net.blobs['label'].data[...] = t_answer
        net.forward()
        t_pred_list = net.blobs['prediction'].data.argmax(axis=1)
        t_pred_str = [
            dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list
        ]
        testloss_list.append(net.blobs['loss'].data)
        for qid, iid, ans, pred in zip(t_qid_list, t_iid_list,
                                       t_answer.tolist(), t_pred_str):
            pred_list.append({
                'answer': pred,
                'question_id': int(dp.getStrippedQuesId(qid))
            })
            if visualize:
                q_list = dp.seq_to_list(dp.getQuesStr(qid))
                if mode == 'test-dev' or 'test':
                    ans_str = ''
                    ans_list = [''] * 10
                else:
                    ans_str = dp.vec_to_answer(ans)
                    ans_list = [
                        dp.getAnsObj(qid)[i]['answer'] for i in range(10)
                    ]
                stat_list.append({\
                                    'qid'   : qid,
                                    'q_list' : q_list,
                                    'iid'   : iid,
                                    'answer': ans_str,
                                    'ans_list': ans_list,
                                    'pred'  : pred })
        percent = 100 * float(len(pred_list)) / total_questions
        sys.stdout.write('\r' + ('%.2f' % percent) + '%')
        sys.stdout.flush()

    mean_testloss = np.array(testloss_list).mean()

    if mode == 'val':
        valFile = './result/val2015_resfile'
        with open(valFile, 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
        annFile = config.DATA_PATHS['val']['ans_file']
        quesFile = config.DATA_PATHS['val']['ques_file']
        vqa = VQA(annFile, quesFile)
        vqaRes = vqa.loadRes(valFile, quesFile)
        vqaEval = VQAEval(vqa, vqaRes, n=2)
        vqaEval.evaluate()
        acc_overall = vqaEval.accuracy['overall']
        acc_perQuestionType = vqaEval.accuracy['perQuestionType']
        acc_perAnswerType = vqaEval.accuracy['perAnswerType']
        return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType
    elif mode == 'test-dev':
        filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t' + str(
            it).zfill(8) + '_results'
        with open(filename + '.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
    elif mode == 'test':
        filename = './result/vqa_OpenEnded_mscoco_test2015_v3c' + str(
            it).zfill(8) + '_results'
        with open(filename + '.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
示例#35
0
taskType = 'OpenEnded'
dataType = 'mscoco'
dataSubType = 'train2014'

# dataSubType ='val2014'

annFile = '%s/Annotations/%s%s_%s_annotations.json' % (dataDir, versionType,
                                                       dataType, dataSubType)
quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % (
    dataDir, versionType, taskType, dataType, dataSubType)
imgDir = '%s/Images/%s/%s/' % (dataDir, dataType, dataSubType)
gtDir = '%s/QuestionTypes/abstract_v002_question_types.txt' % dataDir

resize_size = 256

vqa = VQA(annFile, quesFile)
imgids = vqa.getImgIds()
print len(imgids)
imgids = list(set(imgids))
print len(imgids)
writer = tf.python_io.TFRecordWriter('%s/TR/%s_im.tfrecord' %
                                     (dataDir, dataSubType))
idx = 0
for imgid in imgids:
    imgFilename = 'COCO_' + dataSubType + '_' + str(imgid).zfill(12) \
        + '.jpg'
    if os.path.isfile(imgDir + imgFilename):
        image = misc.imread(imgDir + imgFilename)
        if len(image.shape) < 3:
            image = np.array([image for i in range(3)])
        image = misc.imresize(image, [resize_size, resize_size],
示例#36
0
def exec_validation(device_id, mode, it='', visualize=False):

    caffe.set_device(device_id)
    caffe.set_mode_gpu()
    net = caffe.Net('./result/proto_test.prototxt',\
              './result/tmp.caffemodel',\
              caffe.TEST)

    dp = VQADataProvider(mode=mode,batchsize=64)
    total_questions = len(dp.getQuesIds())
    epoch = 0

    pred_list = []
    testloss_list = []
    stat_list = []

    while epoch == 0:
        t_word, t_cont, t_img_feature, t_answer, t_glove_matrix, t_qid_list, t_iid_list, epoch = dp.get_batch_vec()
        net.blobs['data'].data[...] = np.transpose(t_word,(1,0))
        net.blobs['cont'].data[...] = np.transpose(t_cont,(1,0))
        net.blobs['img_feature'].data[...] = t_img_feature
        net.blobs['label'].data[...] = t_answer
        net.blobs['glove'].data[...] = np.transpose(t_glove_matrix, (1,0,2))
        net.forward()
        t_pred_list = net.blobs['prediction'].data.argmax(axis=1)
        t_pred_str = [dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list]
        testloss_list.append(net.blobs['loss'].data)
        for qid, iid, ans, pred in zip(t_qid_list, t_iid_list, t_answer.tolist(), t_pred_str):
            pred_list.append({u'answer':pred, u'question_id': int(dp.getStrippedQuesId(qid))})
            if visualize:
                q_list = dp.seq_to_list(dp.getQuesStr(qid))
                if mode == 'test-dev' or 'test':
                    ans_str = ''
                    ans_list = ['']*10
                else:
                    ans_str = dp.vec_to_answer(ans)
                    ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in xrange(10)]
                stat_list.append({\
                                    'qid'   : qid,
                                    'q_list' : q_list,
                                    'iid'   : iid,
                                    'answer': ans_str,
                                    'ans_list': ans_list,
                                    'pred'  : pred })
        percent = 100 * float(len(pred_list)) / total_questions
        sys.stdout.write('\r' + ('%.2f' % percent) + '%')
        sys.stdout.flush()



    mean_testloss = np.array(testloss_list).mean()

    if mode == 'val':
        valFile = './result/val2015_resfile'
        with open(valFile, 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list,mode)
        annFile = config.DATA_PATHS['val']['ans_file']
        quesFile = config.DATA_PATHS['val']['ques_file']
        vqa = VQA(annFile, quesFile)
        vqaRes = vqa.loadRes(valFile, quesFile)
        vqaEval = VQAEval(vqa, vqaRes, n=2)
        vqaEval.evaluate()
        acc_overall = vqaEval.accuracy['overall']
        acc_perQuestionType = vqaEval.accuracy['perQuestionType']
        acc_perAnswerType = vqaEval.accuracy['perAnswerType']
        return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType
    elif mode == 'test-dev':
        filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t'+str(it).zfill(8)+'_results'
        with open(filename+'.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list,mode)
    elif mode == 'test':
        filename = './result/vqa_OpenEnded_mscoco_test2015_v3c'+str(it).zfill(8)+'_results'
        with open(filename+'.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list,mode)
sys.path.insert(0, "%s/PythonEvaluationTools" % (dataDir))

dataDir = "./../VQA"
taskType2 = "MultipleChoice"
dataType2 = "mscoco"  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType2 = "val2014"
annFile2 = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, dataSubType2)
quesFile2 = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, dataSubType2)
imgDir2 = "%s/Images/%s/%s/" % (dataDir, dataType, dataSubType2)

modelReader = open("./model_definition_100iter.json")
json_read = modelReader.read()
model = model_from_json(json_read)
model.load_weights("./model_weights_100iter.h5py")

vqaVal = VQA(annFile2, quesFile2)


newdataSubType = "analysis1"
outputQuestionFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, newdataSubType)
outputAnnotationFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, newdataSubType)
# vqaAnalysis = vqaVal
newQuestion = "yes"
questionIndex = 0
ids = vqaVal.getQuesIds()
anns = vqaVal.loadQA(ids)


if not os.path.exists(outputAnnotationFile) or os.stat(outputAnnotationFile).st_size == 0:
    outputQuestionWriter = open(outputQuestionFile, "w")
    outputAnnotationWriter = open(outputAnnotationFile, "w")