示例#1
0
	def get_item(self, idx):

		data = self.vqamb_data[idx]

		current_sample = Sample()

		# store queston and image id
		current_sample.img_id = data['id']
		# current_sample.qa_id = data['qa_id']

		# store points
		current_sample.point = data['point'] # data['points']
		bbox = data['bbox']
		current_sample.gt_bbox = torch.Tensor([bbox['x'], bbox['y'], bbox['x'] + bbox['w'], bbox['y'] + bbox['h']])

		# process question
		question = data["pt_question"]
		tokens = tokenize(question, remove=["?"], keep=["'s"])

		processed = self.text_processor({"tokens": tokens})
		current_sample.text = processed["text"]

		# process answers
		processed = self.answer_processor({"answers": [data['ans']]})
		current_sample.answers = processed["answers"]
		current_sample.targets = processed["answers_scores"][1:] # remove unknown index

		# Detectron features ----------------
		# TODO: read in detectron image instead if detectron is to be built
		detectron_path = self.detectron_folder + str(data['id'])
		point = data['point'] # point = data['points'][0]
		if 'pt' in self.detectron_folder:
			detectron_path += ',' + str(point['x']) + ',' + str(point['y'])
		detectron_path += '.pt'
		
		detectron_feat = torch.load(detectron_path, map_location=torch.device('cpu'))

		# Pad features to fixed length
		if self.config.pad_detectron:
			if detectron_feat.shape[0] > 100:
				detectron_feat = detectron_feat[:100]
			elif detectron_feat.shape[0] < 100:
				pad = torch.zeros(100 - detectron_feat.shape[0], detectron_feat.shape[1])
				detectron_feat = torch.cat([detectron_feat, pad], dim=0)

		current_sample.image_feature_0 = detectron_feat
		# ---------------------------------------------

		# read in bounding boxes (hardcoded for now)
		
		bbox_path = ''
		bbox_path  += str(data['id']) + ',' + str(point['x']) + ',' + str(point['y']) + '.pt'
		bboxes = torch.load(bbox_path, map_location=torch.device('cpu'))

		if bboxes.shape[0] > 100:
			bboxes = bboxes[:100]
		elif bboxes.shape[0] < 100:
			pad = torch.zeros(100 - bboxes.shape[0], bboxes.shape[1])
			bboxes = torch.cat([bboxes, pad], dim=0)

		current_sample.pt_bbox = bboxes

		# read in image bounding boxes
		bbox_path = ''
		bbox_path  += str(data['id']) + '.pt' # + ',' + str(point['x']) + ',' + str(point['y']) + '.pt'
		bboxes = torch.load(bbox_path, map_location=torch.device('cpu'))

		if bboxes.shape[0] > 100:
			bboxes = bboxes[:100]
		elif bboxes.shape[0] < 100:
			pad = torch.zeros(100 - bboxes.shape[0], bboxes.shape[1])
			bboxes = torch.cat([bboxes, pad], dim=0)

		current_sample.img_bbox = bboxes
		
		# Context features --------------------
		if self.config.use_context:
			context_path = self.context_folder + str(data['id'])
			context_path += ',' + str(point['x']) + ',' + str(point['y'])
			context_path += '.pt'

			context_feat = torch.load(context_path, map_location=torch.device('cpu'))
			context_feat = context_feat.squeeze()
			orig_dim = context_feat.shape[0]

			if self.config.pad_context:
				if context_feat.shape[0] > 100:
					context_feat = context_feat[:100]
				elif context_feat.shape[0] < 100:
					pad = torch.zeros(100 - context_feat.shape[0], context_feat.shape[1])
					context_feat = torch.cat([context_feat, pad], dim=0)

			current_sample.context_feature_0 = context_feat
		# ---------------------------------------------

		return current_sample
示例#2
0
    def get_item(self, idx):

        data = self.vqamb_data[idx]

        current_sample = Sample()

        # store queston and image id
        current_sample.img_id = data['id']
        current_sample.qa_id = data['qa_index']

        # store points
        current_sample.points = data['points']

        obj = data['all_objs'][0]
        xmin, ymin, xmax, ymax = obj['x'], obj[
            'y'], obj['x'] + obj['w'], obj['y'] + obj['h']
        current_sample.gt_bbox = torch.Tensor([xmin, ymin, xmax, ymax])

        # process question
        question = data["question"]
        tokens = tokenize(question, remove=["?"])

        processed = self.text_processor({"tokens": tokens})
        current_sample.text = processed["text"]

        # process answers
        processed = self.answer_processor({"answers": data['all_ans']})
        current_sample.answers = processed["answers"]
        current_sample.targets = processed["answers_scores"][
            1:]  # remove unknown index

        # Detectron features ----------------
        # TODO: read in detectron image instead if detectron is to be built
        detectron_path = self.detectron_folder + str(data['id'])
        bbox_path = self.bbox_folder + str(data['id'])
        if 'pt' in self.detectron_folder:
            point = data['points'][0]
            detectron_path += ',' + str(point['x']) + ',' + str(point['y'])
            bbox_path += ',' + str(point['x']) + ',' + str(point['y'])

        detectron_path += '.pt'
        bbox_path += '.pt'

        detectron_feat = torch.load(
            detectron_path, map_location=torch.device('cpu')).squeeze()
        # bbox_feat = torch.load(bbox_path, map_location=torch.device('cpu')).squeeze()
        '''if detectron_feat.shape[0] == 2048:
			detectron_feat = detectron_feat.unsqueeze(0)
			bbox_feat = bbox_feat.unsqueeze(0)
		'''
        '''
		if self.config.grid:
			 detectron_feat = detectron_feat.view(detectron_feat.shape[0], -1).T
		'''
        # x_down = max(int(round(pt['x']/600)), 18)
        # y_down = int(round(pt['y']/800), 25)

        # preproessing for grid features only
        # detectron_feat = detectron_feat.view(detectron_feat.shape[0], -1).T

        # Pad features to fixed length
        if self.config.grid:
            MAX_FEAT = 608

        else:
            MAX_FEAT = 100

        if self.config.pad_detectron:
            if detectron_feat.shape[0] > MAX_FEAT:
                detectron_feat = detectron_feat[:MAX_FEAT]
                # bbox_feat = bbox_feat[:MAX_FEAT]
            elif detectron_feat.shape[0] < MAX_FEAT:
                pad = torch.zeros(MAX_FEAT - detectron_feat.shape[0],
                                  detectron_feat.shape[1])
                detectron_feat = torch.cat([detectron_feat, pad], dim=0)
                pad = torch.zeros(MAX_FEAT - bbox_feat.shape[0],
                                  bbox_feat.shape[1])
                bbox_feat = torch.cat([bbox_feat, pad], dim=0)
        '''
		else:
			if detectron_feat.dim() > 1:
				detectron_feat = torch.zeros(2048)
		'''
        # current_sample.bbox = bbox_feat
        current_sample.image_feature_0 = detectron_feat
        # ---------------------------------------------

        return current_sample