Python VQA.getQuesIds示例，vqaTools.vqa.VQA.getQuesIds Python示例

示例#1

0

显示文件

def build_vocab(annfile, quesfile, threshold):
    """Build a simple vocabulary wrapper."""
    vqa = VQA(annfile, quesfile)
    counter = Counter()
    ids = vqa.getQuesIds()
    for i, id in enumerate(ids):
        question = vqa.qqa[id]['question']
        tokens = nltk.tokenize.word_tokenize(question.lower())
        counter.update(tokens)

        if (i + 1) % len(ids) == 0:
            print("[{}/{}] Tokenized the questions.".format(i + 1, len(ids)))

    # If the word frequency is less than 'threshold', then the word is discarded.
    words = [word for word, cnt in counter.items() if cnt >= threshold]

    # Create a vocab wrapper and add some special tokens.
    vocab = Vocabulary()
    vocab.add_word('<pad>')
    vocab.add_word('<start>')
    vocab.add_word('<end>')
    vocab.add_word('<unk>')

    # Add the words to the vocabulary.
    for i, word in enumerate(words):
        vocab.add_word(word)
    return vocab

示例#2

0

显示文件

文件： dataset.py 项目： dfan/vqa-baseline

    def __init__(self, split):
        dataDir = 'data'
        versionType = 'v2_'  # this should be '' when using VQA v2.0 dataset
        taskType = 'OpenEnded'  # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0
        dataType = 'mscoco'  # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0.
        if 'train' in split:
            dataSubType = 'train2014'
        elif 'val' in split:
            dataSubType = 'val2014'
        annFile = '%s/Annotations/%s%s_%s_annotations.json' % (
            dataDir, versionType, dataType, dataSubType)
        quesFile = '%s/Questions/%s%s_%s_%s_questions.json' % (
            dataDir, versionType, taskType, dataType, dataSubType)
        imgDir = '%s/Images/%s/' % (dataDir, dataSubType)

        self.dataSubType = dataSubType
        self.split = split
        self.imgDir = imgDir

        print(annFile, quesFile)

        # Initialize VQA API
        vqa = VQA(annFile, quesFile)
        self.vqa = vqa

        #img_ids = vqa.getImgIds() # get all
        #self.img_ids = img_ids
        self.question_ids = vqa.getQuesIds()  # get all

        imagenet_mean = [0.485, 0.456, 0.406]
        imagenet_std = [0.229, 0.224, 0.225]
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # ImageNet standard
            transforms.ToTensor(),
            transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
        ])

        # Create vocabulary mapping letters to numbers
        self.all_letters = string.ascii_letters

        # Get top 3000 answers
        with open(annFile, 'r') as f:
            train_ann = json.load(f)
        all_answers = []
        for list in train_ann['annotations']:
            for answer in list['answers']:
                all_answers.append(answer['answer'])
        if self.split == 'train':
            print('Computing top K answers')
            top_answers = self.get_top_k_answers(all_answers, 3000)
            self.top_answers = top_answers
            self.inverse_top_answers = {v: k for k, v in top_answers.items()}
            feature_file = open('train.pickle', 'rb')
            self.img_features = pickle.load(feature_file)

        if self.split == 'val':
            feature_file = open('val.pickle', 'rb')
            self.img_features = pickle.load(feature_file)

示例#3

0

显示文件

class preprocessing:
	def __init__(self, annotation_file=annFile, question_file=quesFile):
		self.vqar=VQA(annFile, quesFile)
		self.annIds = self.vqar.getQuesIds()
		self.anns = self.vqar.loadQA(self.annIds)  #every questions with the dictionary loaded

		self.l=[a['multiple_choice_answer'] for a in self.anns]
		self.c=collections.Counter(self.l)
		self.Selected_key=[]
		self.Selected_keys={}
		self.i=0
		for a in self.c.most_common(1000):
			self.Selected_key.extend([a[0]])
			self.Selected_keys[a[0]] = self.i
			self.i+=1

		self.Question_element=[]
		for ele in self.anns:
			if ele['multiple_choice_answer'] in self.Selected_keys.keys():
				self.Question_element.extend([ele])
		self.qqa = {}
		self.qqa = {ann['question_id']:       [] for ann in self.Question_element}
		print 'assigning questions '
		y=0
		for ques in self.vqar.questions['questions']:
			print 'done',y
			y+=1
			if ques['question_id'] in self.qqa.keys():
				self.qqa[ques['question_id']] = ques
		print 'assigning questions finished'
		ques_words=[]
		for ann in self.Question_element:
			quesId = ann['question_id']
			for words in self.qqa[quesId]['question']:
				ques_words.extend([words])
		s=collections.Counter(ques_words)
		self.Selected_ques={}
		j=0
		for a in s.most_common(5000):
			self.Selected_ques[a[0]] = j
			j+=1

		print 'elements list completed'

	def load_class_dict(self):
		return self.Selected_keys

	def load_Q_final(self):
		return self.Selected_ques

示例#4

0

显示文件

quesTypes can be one of the following
..
what color 	what kind 	what are 	what type  	is the
is this		how many 	are 		does  		where
is there 	why 		which		do 		what does 
what time 	who 		what sport 	what animal 	what brand
"""

##### Load VQA dataset

print('Enter the quesTypes (\'what color\', \'is this\', ..., \'all\')')
# quesTypes = input()
quesTypes = 'what is'

if quesTypes == 'all':
    annIdsA = vqa.getQuesIds()
    tannIdsA = tvqa.getQuesIds()
    imgIdsA = vqa.getImgIds()
    timgIdsA = tvqa.getImgIds()
else:
    annIdsA = vqa.getQuesIds(quesTypes=quesTypes)
    tannIdsA = tvqa.getQuesIds(quesTypes=quesTypes)
    imgIdsA = vqa.getImgIds(quesTypes=quesTypes)
    timgIdsA = tvqa.getImgIds(quesTypes=quesTypes)

annsA = vqa.loadQA(annIdsA)
tannsA = tvqa.loadQA(tannIdsA)

if len(annsA) > TR_LIMIT_SIZE:
    annsA[TR_LIMIT_SIZE:] = []
    imgIdsA[TR_LIMIT_SIZE:] = []

示例#5

0

显示文件

文件： analysis.py 项目： shubhampachori12110095/visual-question-answering-3

modelReader = open('./model_definition_100iter.json')
json_read = modelReader.read()
model = model_from_json(json_read)
model.load_weights('./model_weights_100iter.h5py')

vqaVal = VQA(annFile2, quesFile2)

newdataSubType = 'analysis1'
outputQuestionFile = '%s/Questions/%s_%s_%s_questions.json' % (
    dataDir, taskType, dataType, newdataSubType)
outputAnnotationFile = '%s/Annotations/%s_%s_annotations.json' % (
    dataDir, dataType, newdataSubType)
# vqaAnalysis = vqaVal
newQuestion = 'yes'
questionIndex = 0
ids = vqaVal.getQuesIds()
anns = vqaVal.loadQA(ids)

if not os.path.exists(outputAnnotationFile) or os.stat(
        outputAnnotationFile).st_size == 0:
    outputQuestionWriter = open(outputQuestionFile, 'w')
    outputAnnotationWriter = open(outputAnnotationFile, 'w')

    outputQuestions = {}
    outputAnnotations = {}

    outputAnnotations['info'] = {}
    outputAnnotations['info'][
        'description'] = 'This is the dataset created for further analysis of the VQA task.'
    outputAnnotations['info']['url'] = ' '
    outputAnnotations['info']['version'] = '1.0'

示例#6

0

显示文件

dataType = 'mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType = 'train2014'
annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                     dataSubType)
quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                     dataType, dataSubType)
imgDir = '%s/%s/' % (dataDir, dataSubType)

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

# load and display QA annotations for given question types
"""
All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder.
"""
annIds = vqa.getQuesIds(quesTypes='how many')
anns = vqa.loadQA(annIds)
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgId = randomAnn['image_id']
imgFilename = 'COCO_' + dataSubType + '_' + str(imgId).zfill(12) + '.jpg'
if os.path.isfile(imgDir + imgFilename):
    I = io.imread(imgDir + imgFilename)
    plt.imshow(I)
    plt.axis('off')
    plt.show()

# load and display QA annotations for given answer types
"""
ansTypes can be one of the following
yes/no

示例#7

0

显示文件

文件： test.py 项目： yappi62/keras

        x = [word_idx[w] for w in ques]
        y = [word_idx[w] for w in ans]
        rX.append(x)
        rY.append(y + [END_MARK])
        limit += 1
        if limit == LIMIT_SIZE:
            break
    return pad_sequences(rX, maxlen=ques_maxlen), pad_sequences(rY, maxlen=ans_maxlen)


print("Enter the quesTypes ('what color', 'is this', ..., 'all')")
# quesTypes = input()
quesTypes = "is this"

if quesTypes == "all":
    annIdsA = vqa.getQuesIds()
    tannIdsA = tvqa.getQuesIds()
else:
    annIdsA = vqa.getQuesIds(quesTypes=quesTypes)
    tannIdsA = tvqa.getQuesIds(quesTypes=quesTypes)

annsA = vqa.loadQA(annIdsA)
tannsA = tvqa.loadQA(tannIdsA)

train = get_inputList(vqa, annsA)
test = get_inputList(tvqa, tannsA)
vocab = sorted(list(set(train + test)))
# Reserve 0 for masking via pad_sequences
vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

示例#8

0

显示文件

文件： fetchQA.py 项目： cantren/VT-F15-ECE6504-HW4

quesFile = '/srv/share/vqa/release_data/mscoco/vqa/OpenEnded_mscoco_train2014_questions.json' # insert appropriate path
dataSubType = 'train2014'
qtype = ['what color','what is on the','what sport is']
# path to images 
data_dir = '/srv/share/data/mscoco/coco/images/train2014/'
model = '/home/ashwin/caffe/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
prototxt = '/home/ashwin/caffe/models/bvlc_reference_caffenet/deploy.prototxt'
# load QAs
vqa = VQA(annFile, quesFile) 
# add question type

annIds = []
anns = []
ids = []
for qitem in qtype:
  annIds = vqa.getQuesIds(quesTypes= qtype)
  anns.extend(vqa.loadQA(annIds))
  ids.extend(vqa.getImgIds(quesTypes = qtype))

UIDs = list(np.unique(np.array(ids)))

# extract fc7 features
caffe.set_mode_gpu()
caffe.set_device(1)
net = caffe.Net(prototxt,model,caffe.TEST)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel
transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB
net.blobs['data'].reshape(1,3,227,227)

示例#9

0

显示文件

文件： vqaDemo.py 项目： caomw/VQA

dataDir = "../../VQA"
taskType = "OpenEnded"
dataType = "mscoco"  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType = "train2014"
annFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, dataSubType)
quesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, dataSubType)
imgDir = "%s/Images/%s/%s/" % (dataDir, dataType, dataSubType)

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

# load and display QA annotations for given question types
"""
All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder.
"""
annIds = vqa.getQuesIds(quesTypes="how many")
anns = vqa.loadQA(annIds)
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgId = randomAnn["image_id"]
imgFilename = "COCO_" + dataSubType + "_" + str(imgId).zfill(12) + ".jpg"
if os.path.isfile(imgDir + imgFilename):
    I = io.imread(imgDir + imgFilename)
    plt.imshow(I)
    plt.axis("off")
    plt.show()

# load and display QA annotations for given answer types
"""
ansTypes can be one of the following
yes/no

示例#10

0

显示文件

文件： tr_read.py 项目： MyWorkShop/VQA-Key-Matching

class data_vqa:
    """ Data class of VQA dataset. """
    def __init__(
        self,
        resize_size=RESIZE_SIZE,
        batch_size=BATCH_SIZE,
        num_threads=NUM_THREADS,
        fixed_num=FIXED_NUM,
    ):
        """ Initlization """

        print '[__init__]'

        self.fixed_num = fixed_num

        # Ininlize the offical json processing api
        if os.path.isfile(pkl_file):
            print '[info] init with saved pkl file.'
            load = open(pkl_file, 'rb')
            self.imgid_dict = pickle.load(load)
            self.question_processed = pickle.load(load)
            self.confidence = pickle.load(load)
            self.answers = pickle.load(load)
            self.answer_dict = pickle.load(load)
            self.max_len_question = pickle.load(load)
            load.close()
        else:
            print '[info] init without saved pkl file.'
            self.data = VQA(annFile, quesFile)
            self.data_ids = self.data.getQuesIds()
            self.data_len = len(self.data_ids)
            print(self.data_len)
            self.copy_data()
            del self.data
            del self.data_ids
            self.question_processed = self.process_question(\
                    self.questions,
                    self.max_len_question)
            del self.questions
            self.build_dict_question()
            self.build_dict_answer()
            save = open(pkl_file, 'wb')
            pickle.dump(self.imgid_dict, save, -1)
            pickle.dump(self.question_processed, save, -1)
            pickle.dump(self.confidence, save, -1)
            pickle.dump(self.answers, save, -1)
            pickle.dump(self.answer_dict, save, -1)
            pickle.dump(self.max_len_question, save, -1)
            save.close()
            print '[info]saved pkl file.'

        # Build the reader of the tfrecord file
        # The tfrecord file is generated by tr.write.py

        feature = {
            'image': tf.FixedLenFeature([], tf.string),
            'imgid': tf.FixedLenFeature([], tf.int64)
        }
        filename_queue = tf.train.string_input_producer([trDir])
        reader = tf.TFRecordReader()
        (_, serialized_example) = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example,
                                           features=feature)
        image = tf.decode_raw(features['image'], tf.uint8)
        image = tf.cast(image, tf.float32)
        image = image / 255.
        imgid = tf.cast(features['imgid'], tf.int32)
        image = tf.reshape(image, [resize_size, resize_size, 3])
        (self.op_images, self.op_imgids) = \
            tf.train.shuffle_batch([image, imgid],
                                   batch_size=batch_size,
                                   capacity=20480,
                                   num_threads=num_threads,
                                   min_after_dequeue=10240)

    def copy_data(self):
        """ Copy the data from the official json api """

        print '    [copy_data]'
        self.answers = [[
            self.data.qa[data_id]['answers'][i]['answer'].encode(
                'ascii', 'ignore').lower() for i in range(10)
        ] for data_id in self.data_ids]
        self.confidence = [[(lambda x: (1. if x == 'yes' else 0.5))(
            self.data.qa[data_id]['answers'][i]['answer_confidence'].encode(
                'ascii', 'ignore')) for i in range(10)]
                           for data_id in self.data_ids]
        self.imgids = [
            self.data.qa[data_id]['image_id'] for data_id in self.data_ids
        ]
        self.questions = \
            [self.preprocessing(self.data.qqa[ques_id]['question'])
             for ques_id in self.data_ids]
        self.max_len_question = max(
            [len(question.split()) for question in self.questions])
        print self.max_len_question

    def build_dict_question(self):
        """ Build the mapping from image's imgid to index of 
            image's questions index """

        print '    [build_dict_question]'
        self.imgid_dict = {}
        imgid_set = list(set(self.imgids))
        for imgid in imgid_set:
            self.imgid_dict[imgid] = []
        for i in range(self.data_len):
            imgid = self.imgids[i]
            self.imgid_dict[imgid].append(i)

    def test_question(self):
        print '    [test_question]'
        chars = set()
        for question in self.questions:
            chars.update(question)
        char_list = list(chars)
        print len(char_list)

    def build_dict_answer(self):
        """ Build the mapping from answer's char set to id """

        print '    [build_dict_answer]'
        answer_list = []
        for answers in self.answers:
            for answer in answers:
                answer_list.append(answer)
        counts = Counter(answer_list)
        top_n = counts.most_common(self.fixed_num)
        fixed_list = [elem[0] for elem in top_n]

        # print(fixed_list)

        total = 0
        for elem in top_n:
            total += elem[1]
        print top_n[self.fixed_num - 1][1]
        print total
        print len(answer_list)

        self.answer_dict = dict((c, i) for (i, c) in enumerate(fixed_list))

    def preprocessing(self, text):
        """ Replace the unusual character in the text """

        to_replace = [
            '!',
            '#',
            '%',
            '$',
            "'",
            '&',
            ')',
            '(',
            '+',
            '*',
            '-',
            ',',
            '/',
            '.',
            '1',
            '0',
            '3',
            '2',
            '5',
            '4',
            '7',
            '6',
            '9',
            '8',
            ';',
            ':',
            '?',
            '_',
            '^',
        ]
        lowered = text.encode('ascii', 'ignore').lower()
        replacing = lowered
        for char_to_replace in to_replace:
            replacing = replacing.replace(char_to_replace,
                                          ' ' + char_to_replace + ' ')
        stemming = ' '
        splited = replacing.split()
        # return replacing
        return stemming.join([stem(item) for item in splited])

    def tokenization(self, stentance, preprocess=True):
        """ Split the stentance into words """

        if preprocess == True:
            stentance = self.preprocessing(stentance)
        splited = stentance.split()
        return splited

    def process_question(self, sentences, max_len_question):
        """ Preprocessing the question data """

        print '    [process_question]'
        question_list = []
        for sentence in sentences:
            splited = sentence.split()
            for word in splited:
                question_list.append(word)
        counts = Counter(question_list)
        top_n = counts.most_common(self.fixed_num)
        fixed_list = [elem[0] for elem in top_n]

        # print(fixed_list)

        total = 0
        for elem in top_n:
            total += elem[1]
        print top_n[self.fixed_num - 1][1]
        print total
        print len(question_list)

        self.question_dict = dict((c, i) for (i, c) in enumerate(fixed_list))

        processed_question = []
        for sentence in sentences:
            splited = sentence.split()
            processed_sentence = []
            for word in splited:
                processed_sentence.append(
                    self.question_dict.get(word, self.fixed_num))
            processed_sentence = processed_sentence + [self.fixed_num] \
                * (max_len_question - len(splited))
            processed_question.append(processed_sentence)

        return processed_question

    def get_batch(self, imgids):
        """ Get the next batch of data """

        questions = []
        answers = []
        confidences = []
        # (images, imgids) = sess.run([self.op_images, self.op_imgids])
        for imgid in imgids:
            index = random.choice(self.imgid_dict[imgid])
            questions.append(self.question_processed[index])
            answer_to_choice = random.choice(range(10))
            confidences.append(self.confidence[index][answer_to_choice])
            answer = self.answers[index][answer_to_choice]
            answers.append(self.answer_dict.get(answer, self.fixed_num))
        return (np.array(questions), np.array(answers), np.array(confidences))

示例#11

0

显示文件

文件： vqaDemo.py 项目： tylin/VQA

how many 
are 
does 
where 
is there 
why 
which 
do 
what does 
what time 
who 
what sport 
what animal 
what brand
"""
annIds = vqa.getQuesIds(quesTypes='how many');   
anns = vqa.loadQA(annIds)
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgId = randomAnn['image_id']
imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg'
if os.path.isfile(imgDir + imgFilename):
	I = io.imread(imgDir + imgFilename)
	plt.imshow(I)
	plt.axis('off')
	plt.show()

# load and display QA annotations for given answer types
"""
ansTypes can be one of the following
yes/no

示例#12

0

显示文件

文件： vqaDemo3.py 项目： yrreprolyat/Visual-Question-Answering

h = open("ans_gt_dict.json", "w")
h.write(jsave0)
h.close()

#gt_mat = np.zeros((4437570,num_words), dtype = int)

img_id_dict = {}
ques_id_dict = {}
ques_img_dict = {}
ques_vec_dict = {}

First = True
i = 0
for cont in tqdm(contents):
    print(cont)
    annIds = vqa.getQuesIds(quesTypes=cont)
    anns = vqa.loadQA(annIds)

    for ann in anns:
        q_id = ann['question_id']
        img_id = ann['image_id']
        img_id_dict[i] = img_id
        ques_id_dict[i] = q_id
        ques_img_dict[q_id] = img_id
        anss = ann['answers']
        gt_dict = {}
        #gt_vec = np.zeros(num_words, dtype = int)
        for ans in anss:
            an = ans['answer']
            if an in ans_gt_dict:
                vec_idx = ans_gt_dict[an]

示例#13

0

显示文件

文件： dataset.py 项目： ghosalya/dotCuda

class COCODataset(Dataset):
    
    def __init__(self, vocab, answers, rootDir='../../data2', dataSubType='train2014'):
        
        annFile ='{}/v2_mscoco_{}_annotations.json'.format(rootDir, dataSubType)
        quesFile ='{}/v2_OpenEnded_mscoco_{}_questions.json'.format(rootDir, dataSubType)
        self.vqa = VQA(annFile, quesFile)
        self.imgDir = '{}/{}'.format(rootDir, dataSubType)
        self.vocab = vocab
        self.answers = answers
        self.quesIds = self.vqa.getQuesIds()
        self.dataSubType = dataSubType
        self.transform = transforms.Compose([
                         transforms.Resize(299),
                         transforms.CenterCrop(299),
                         transforms.ToTensor(),
                         transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                              std=[0.229, 0.224, 0.225]),
                         ])
        
    def __len__(self):
        return len(self.quesIds)
        
    def __getitem__(self, index):
        
        """
        returns:
            question: tensor of word-indices
            transformed image: tensor of shape (3, 299, 299)
            answers: tensor of indices mapped to 3000 most frequently occurring answers
            answers not found among 300 most frequently occurring answers are eliminated
        """
        
        quesId = self.quesIds[index]
        
        img_id = self.vqa.qqa[quesId]['image_id'] 
        img_id = str(img_id).zfill(12)
        path = 'COCO_{}_{}.jpg'.format(self.dataSubType, img_id)
#         print(os.path.join(self.imgDir, path))
        image = Image.open(os.path.join(self.imgDir, path)).convert('RGB')

        image = self.transform(image)
            
            
        # Convert question to word ids
        vocab = self.vocab
        question = self.vqa.qqa[quesId]['question']
#         print(question)
        
        tokens = nltk.tokenize.word_tokenize(question.lower())
        question_list = []
        question_list.append(vocab('<start>'))
        question_list.extend([vocab(token) for token in tokens])
        question_list.append(vocab('<end>'))
        question_tensor = torch.Tensor(question_list)
        
        qa = self.vqa.loadQA(quesId)
        
        ans_list = [a['answer'] for a in qa[0]['answers']]
#         print(ans_list)
        
        ans_index_list = [self.answers(ans) for ans in ans_list] #if ans in self.answers.ans2idx.keys()]
        answer_tensor = torch.Tensor(ans_index_list)
        
        return question_tensor, image, answer_tensor     

    def subset(self, fraction=0.5, count=None, shuffle=True):
        '''
        give subset of certain fraction/count
        prioritizes count
        '''
        if not count:
            count = int(len(self.quesIds) * fraction)
        print('Getting subset of length', count, 'out of', len(self))
        subset = copy.deepcopy(self)
        if shuffle: random.shuffle(subset.quesIds)
        subset.quesIds = subset.quesIds[:count]
        return subset

示例#14

0

显示文件

文件： data_reader.py 项目： CristinaEx/-

class DataReader:
    
    def __init__(self, data_type = TRAIN_DATA_TYPE, shape = (224,224)):
        """
        shape为输出图像数据的shape
        data_type为需导入的数据集的类型
        """
        self.data_type = data_type
        annFile='{}\\annotations\\{}{}_{}_annotations.json'.format(DATA_PATH,VERSION_TYPE,DATA_TYPE,self.data_type)
        quesFile ='{}\\Questions\\{}{}_{}_{}_questions.json'.format(DATA_PATH,VERSION_TYPE,TASK_TYPE,DATA_TYPE,self.data_type)
        self.vqa = VQA(annFile, quesFile)
        self.img_ids = list(self.vqa.imgToQA.keys())
        self.pos = 0
        self.shape = shape
        questions = self.vqa.getQuestionsFile()
        questions = questions['questions']
        # qf为通过id索引查找question的字典
        self.qf = dict()
        for q in questions:
            self.qf[q["question_id"]] = q["question"]

    def get_pic_data(self,pic_id):
        """
        获取图像数据
        pic_id:图像的id
        return numpy三维数组
        """
        imgFilename = 'COCO_' + self.data_type + '_'+ str(pic_id).zfill(12) + '.jpg'
        path = '{}\\images\\{}\\'.format(DATA_PATH,self.data_type)
        img = Image.open(path+imgFilename)
        img = img.resize(self.shape)
        return numpy.array(img)

    def get_pic_qa(self,pic_id):
        """
        获取图像的问题和回答
        return ['question_type':str,'question_id':num,'answers':[{'answer':str,'answer_confidence':'yes'|'maybe'|'no','answer_id':num}...]]
        """
        q = self.vqa.getQuesIds(imgIds=[pic_id])
        qas = self.vqa.loadQA(q)
        result = []
        for qa in qas:
            q = self.get_question(qa['question_id'])
            result.append({"question":q,"answers":qa["answers"]})
        return result

    def set_pos(self,pos = 0):
        """
        设置当前读取游标
        """
        self.pos = pos if pos < len(self.img_ids) else len(self.img_ids) + 1

    def get_pos(self):
        """
        获取当前pos
        """
        return self.pos

    def get_next_pic_id(self):
        """
        获取下一张图片的id(即当前游标所在图像的id)
        并且使索引+1
        """
        img_id = self.img_ids[self.pos]
        self.pos = self.pos + 1 if not self.pos + 1 == len(self.img_ids) else 0
        return img_id

    def get_question(self,question_id):
        return self.qf[question_id]

示例#15

0

显示文件

文件： analysis.py 项目： bhargav/visual-question-answering

modelReader = open("./model_definition_100iter.json")
json_read = modelReader.read()
model = model_from_json(json_read)
model.load_weights("./model_weights_100iter.h5py")

vqaVal = VQA(annFile2, quesFile2)


newdataSubType = "analysis1"
outputQuestionFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, newdataSubType)
outputAnnotationFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, newdataSubType)
# vqaAnalysis = vqaVal
newQuestion = "yes"
questionIndex = 0
ids = vqaVal.getQuesIds()
anns = vqaVal.loadQA(ids)


if not os.path.exists(outputAnnotationFile) or os.stat(outputAnnotationFile).st_size == 0:
    outputQuestionWriter = open(outputQuestionFile, "w")
    outputAnnotationWriter = open(outputAnnotationFile, "w")

    outputQuestions = {}
    outputAnnotations = {}

    outputAnnotations["info"] = {}
    outputAnnotations["info"]["description"] = "This is the dataset created for further analysis of the VQA task."
    outputAnnotations["info"]["url"] = " "
    outputAnnotations["info"]["version"] = "1.0"
    outputAnnotations["info"]["year"] = 2015