def get_item(self, idx): data = self.vqamb_data[idx] current_sample = Sample() # store queston and image id current_sample.img_id = data['id'] current_sample.qa_id = data['qa_id'] # process question question = data["question"] tokens = tokenize(question, remove=["?"], keep=["'s"]) processed = self.text_processor({"tokens": tokens}) current_sample.text = processed["text"] # process answers processed = self.answer_processor({"answers": [data['answer']]}) current_sample.answers = processed["answers"] current_sample.targets = processed["answers_scores"][1:] # remove unknown index # Detectron features ---------------- # TODO: read in detectron image instead if detectron is to be built detectron_path = self.detectron_folder + str(data['id']) if self.config.spatial: point = data['point'] # current_sample.point = point detectron_path += ',' + str(point['x']) + ',' + str(point['y']) detectron_path += '.pt' detectron_feat = torch.load(detectron_path, map_location=torch.device('cpu')) # Pad features to fixed length if self.config.pad_detectron: if detectron_feat.shape[0] > 100: detectron_feat = detectron_feat[:100] elif detectron_feat.shape[0] < 100: pad = torch.zeros(100 - detectron_feat.shape[0], detectron_feat.shape[1]) detectron_feat = torch.cat([detectron_feat, pad], dim=0) current_sample.image_feature_0 = detectron_feat # --------------------------------------------- return current_sample
def test_forward(self): model_config = self.config.model_attributes.cnn_lstm cnn_lstm = CNNLSTM(model_config) cnn_lstm.build() cnn_lstm.init_losses_and_metrics() self.assertTrue(isinstance(cnn_lstm, torch.nn.Module)) test_sample = Sample() test_sample.text = torch.randint(1, 79, (10, ), dtype=torch.long) test_sample.image = torch.randn(3, 320, 480) test_sample.targets = torch.randn(32) test_sample_list = SampleList([test_sample]) test_sample_list.dataset_type = "train" test_sample_list.dataset_name = "clevr" output = cnn_lstm(test_sample_list) scores = output["scores"] loss = output["losses"]["train/clevr/logit_bce"] accuracy = output["metrics"]["train/clevr/accuracy"] np.testing.assert_almost_equal(loss.item(), 19.2635, decimal=4) np.testing.assert_almost_equal(accuracy.item(), 0) self.assertEqual(scores.size(), torch.Size((1, 32))) expected_scores = [ -0.7598285675048828, -0.07029829174280167, -0.20382611453533173, -0.06990239024162292, 0.7965695858001709, 0.4730074405670166, -0.30569902062416077, 0.4244227707386017, 0.6511023044586182, 0.2480515092611313, -0.5087617635726929, -0.7675772905349731, 0.4361543357372284, 0.0018743239343166351, 0.6774630546569824, 0.30618518590927124, -0.398895800113678, -0.13120117783546448, -0.4433199465274811, -0.25969570875167847, 0.6798790097236633, -0.34090861678123474, 0.0384102463722229, 0.2484571784734726, 0.0456063412129879, -0.428459107875824, -0.026385333389043808, -0.1570669412612915, -0.2377825379371643, 0.3231588304042816, 0.21098048985004425, -0.712349534034729 ] np.testing.assert_almost_equal(scores[0].tolist(), expected_scores, decimal=5)
def test_forward(self): model_config = self.config.model_attributes.cnn_lstm cnn_lstm = CNNLSTM(model_config) cnn_lstm.build() cnn_lstm.init_losses_and_metrics() self.assertTrue(isinstance(cnn_lstm, torch.nn.Module)) test_sample = Sample() test_sample.text = torch.randint(1, 79, (10, ), dtype=torch.long) test_sample.image = torch.randn(3, 320, 480) test_sample.targets = torch.randn(32) test_sample_list = SampleList([test_sample]) test_sample_list.dataset_type = "train" test_sample_list.dataset_name = "clevr" output = cnn_lstm(test_sample_list) scores = output["scores"] loss = output["losses"]["train/logit_bce"] accuracy = output["metrics"]["train/accuracy"] np.testing.assert_almost_equal(loss.item(), 23.4751, decimal=4) np.testing.assert_almost_equal(accuracy.item(), 0) self.assertEqual(scores.size(), torch.Size((1, 32))) expected_scores = [ 2.2298e-02, -2.4975e-01, -1.1960e-01, -5.0868e-01, -9.3013e-02, 1.3202e-02, -1.7536e-01, -3.1180e-01, 1.5369e-01, 1.4900e-01, 1.9006e-01, -1.9457e-01, 1.4924e-02, -1.1032e-01, 1.3777e-01, -3.6255e-01, -2.9327e-01, 5.6247e-04, -4.8732e-01, 4.0949e-01, -1.1069e-01, 2.9696e-01, 4.1903e-02, 6.7062e-02, 7.0094e-01, -1.9898e-01, -2.9502e-03, -3.9040e-01, 1.2218e-01, 3.7895e-02, 2.4472e-02, 1.7213e-01 ] np.testing.assert_almost_equal(scores[0].tolist(), expected_scores, decimal=5)
def get_item(self, idx): data = self.questions[idx] # Each call to get_item from dataloader returns a Sample class object which # collated by our special batch collator to a SampleList which is basically # a attribute based batch in layman terms current_sample = Sample() question = data["question"] tokens = tokenize(question, keep=[";", ","], remove=["?", "."]) processed = self.text_processor({"tokens": tokens}) current_sample.text = processed["text"] processed = self.answer_processor({"answers": [data["answer"]]}) current_sample.answers = processed["answers"] current_sample.targets = processed["answers_scores"] image_path = os.path.join(self.image_path, data["image_filename"]) image = np.true_divide(Image.open(image_path).convert("RGB"), 255) image = image.astype(np.float32) current_sample.image = torch.from_numpy(image.transpose(2, 0, 1)) return current_sample
def get_item(self, idx): data = self.vqamb_data[idx] current_sample = Sample() # store queston and image id current_sample.img_id = data['id'] # current_sample.qa_id = data['qa_id'] # store points current_sample.point = data['point'] # data['points'] bbox = data['bbox'] current_sample.gt_bbox = torch.Tensor([bbox['x'], bbox['y'], bbox['x'] + bbox['w'], bbox['y'] + bbox['h']]) # process question question = data["pt_question"] tokens = tokenize(question, remove=["?"], keep=["'s"]) processed = self.text_processor({"tokens": tokens}) current_sample.text = processed["text"] # process answers processed = self.answer_processor({"answers": [data['ans']]}) current_sample.answers = processed["answers"] current_sample.targets = processed["answers_scores"][1:] # remove unknown index # Detectron features ---------------- # TODO: read in detectron image instead if detectron is to be built detectron_path = self.detectron_folder + str(data['id']) point = data['point'] # point = data['points'][0] if 'pt' in self.detectron_folder: detectron_path += ',' + str(point['x']) + ',' + str(point['y']) detectron_path += '.pt' detectron_feat = torch.load(detectron_path, map_location=torch.device('cpu')) # Pad features to fixed length if self.config.pad_detectron: if detectron_feat.shape[0] > 100: detectron_feat = detectron_feat[:100] elif detectron_feat.shape[0] < 100: pad = torch.zeros(100 - detectron_feat.shape[0], detectron_feat.shape[1]) detectron_feat = torch.cat([detectron_feat, pad], dim=0) current_sample.image_feature_0 = detectron_feat # --------------------------------------------- # read in bounding boxes (hardcoded for now) bbox_path = '' bbox_path += str(data['id']) + ',' + str(point['x']) + ',' + str(point['y']) + '.pt' bboxes = torch.load(bbox_path, map_location=torch.device('cpu')) if bboxes.shape[0] > 100: bboxes = bboxes[:100] elif bboxes.shape[0] < 100: pad = torch.zeros(100 - bboxes.shape[0], bboxes.shape[1]) bboxes = torch.cat([bboxes, pad], dim=0) current_sample.pt_bbox = bboxes # read in image bounding boxes bbox_path = '' bbox_path += str(data['id']) + '.pt' # + ',' + str(point['x']) + ',' + str(point['y']) + '.pt' bboxes = torch.load(bbox_path, map_location=torch.device('cpu')) if bboxes.shape[0] > 100: bboxes = bboxes[:100] elif bboxes.shape[0] < 100: pad = torch.zeros(100 - bboxes.shape[0], bboxes.shape[1]) bboxes = torch.cat([bboxes, pad], dim=0) current_sample.img_bbox = bboxes # Context features -------------------- if self.config.use_context: context_path = self.context_folder + str(data['id']) context_path += ',' + str(point['x']) + ',' + str(point['y']) context_path += '.pt' context_feat = torch.load(context_path, map_location=torch.device('cpu')) context_feat = context_feat.squeeze() orig_dim = context_feat.shape[0] if self.config.pad_context: if context_feat.shape[0] > 100: context_feat = context_feat[:100] elif context_feat.shape[0] < 100: pad = torch.zeros(100 - context_feat.shape[0], context_feat.shape[1]) context_feat = torch.cat([context_feat, pad], dim=0) current_sample.context_feature_0 = context_feat # --------------------------------------------- return current_sample
def get_item(self, idx): data = self.vqamb_data[idx] current_sample = Sample() # store queston and image id current_sample.img_id = data['id'] current_sample.qa_id = data['qa_index'] # store points current_sample.points = data['points'] obj = data['all_objs'][0] xmin, ymin, xmax, ymax = obj['x'], obj[ 'y'], obj['x'] + obj['w'], obj['y'] + obj['h'] current_sample.gt_bbox = torch.Tensor([xmin, ymin, xmax, ymax]) # process question question = data["question"] tokens = tokenize(question, remove=["?"]) processed = self.text_processor({"tokens": tokens}) current_sample.text = processed["text"] # process answers processed = self.answer_processor({"answers": data['all_ans']}) current_sample.answers = processed["answers"] current_sample.targets = processed["answers_scores"][ 1:] # remove unknown index # Detectron features ---------------- # TODO: read in detectron image instead if detectron is to be built detectron_path = self.detectron_folder + str(data['id']) bbox_path = self.bbox_folder + str(data['id']) if 'pt' in self.detectron_folder: point = data['points'][0] detectron_path += ',' + str(point['x']) + ',' + str(point['y']) bbox_path += ',' + str(point['x']) + ',' + str(point['y']) detectron_path += '.pt' bbox_path += '.pt' detectron_feat = torch.load( detectron_path, map_location=torch.device('cpu')).squeeze() # bbox_feat = torch.load(bbox_path, map_location=torch.device('cpu')).squeeze() '''if detectron_feat.shape[0] == 2048: detectron_feat = detectron_feat.unsqueeze(0) bbox_feat = bbox_feat.unsqueeze(0) ''' ''' if self.config.grid: detectron_feat = detectron_feat.view(detectron_feat.shape[0], -1).T ''' # x_down = max(int(round(pt['x']/600)), 18) # y_down = int(round(pt['y']/800), 25) # preproessing for grid features only # detectron_feat = detectron_feat.view(detectron_feat.shape[0], -1).T # Pad features to fixed length if self.config.grid: MAX_FEAT = 608 else: MAX_FEAT = 100 if self.config.pad_detectron: if detectron_feat.shape[0] > MAX_FEAT: detectron_feat = detectron_feat[:MAX_FEAT] # bbox_feat = bbox_feat[:MAX_FEAT] elif detectron_feat.shape[0] < MAX_FEAT: pad = torch.zeros(MAX_FEAT - detectron_feat.shape[0], detectron_feat.shape[1]) detectron_feat = torch.cat([detectron_feat, pad], dim=0) pad = torch.zeros(MAX_FEAT - bbox_feat.shape[0], bbox_feat.shape[1]) bbox_feat = torch.cat([bbox_feat, pad], dim=0) ''' else: if detectron_feat.dim() > 1: detectron_feat = torch.zeros(2048) ''' # current_sample.bbox = bbox_feat current_sample.image_feature_0 = detectron_feat # --------------------------------------------- return current_sample
def get_item(self, idx): data = self.objpart_data[idx] current_sample = Sample() # store queston and image id current_sample.img_id = data['id'] # current_sample.qa_id = data['qa_id'] if data['ans'] == 'part': current_sample.part = 1 else: current_sample.part = 0 # store points current_sample.point = data['point'] # process question question = data["question"] tokens = tokenize(question, remove=["?"]) processed = self.text_processor({"tokens": tokens}) current_sample.text = processed["text"] # process answers processed = self.answer_processor({"answers": [data['ans']]}) current_sample.answers = processed["answers"] current_sample.targets = processed["answers_scores"][ 1:] # remove unknown index # Detectron features ---------------- # TODO: read in detectron image instead if detectron is to be built detectron_path = self.detectron_folder + str(data['id']) if 'pt' in self.detectron_folder: # hacky way of assessing point supervision point = data['point'] detectron_path += ',' + str(point['x']) + ',' + str(point['y']) detectron_path += '.pt' detectron_feat = torch.load( detectron_path, map_location=torch.device('cpu')).squeeze() # hardcode bounding box and read it # x_down = max(int(round(pt['x']/600)), 18) # y_down = int(round(pt['y']/800), 25) # preproessing for grid features only # detectron_feat = detectron_feat.view(detectron_feat.shape[0], -1).T # Pad features to fixed length MAX_FEAT = 100 if self.config.pad_detectron: if detectron_feat.shape[0] > MAX_FEAT: detectron_feat = detectron_feat[:MAX_FEAT] elif detectron_feat.shape[0] < MAX_FEAT: pad = torch.zeros(MAX_FEAT - detectron_feat.shape[0], detectron_feat.shape[1]) detectron_feat = torch.cat([detectron_feat, pad], dim=0) ''' else: if detectron_feat.dim() > 1: detectron_feat = torch.zeros(2048) ''' current_sample.image_feature_0 = detectron_feat # --------------------------------------------- return current_sample