def __getitem__(self, index): raw_inputs, inst_info, inst_info1 = self.get_raw_inputs(index) # full_size = raw_inputs['label'].size params = get_transform_params(full_size, inst_info, self.class_of_interest, self.config, bbox=inst_info1["object"], target_box=inst_info1["target"], random_crop=self.opt.random_crop) outputs = self.preprocess_inputs(raw_inputs, params) if inst_info1["target"].get('inst_ids') is None: mask_target = torch.where( outputs['inst'] == inst_info1["target"]['inst_id'], torch.full_like(outputs['inst'], 1), torch.full_like(outputs['inst'], 0)) else: mask_target1 = torch.where( outputs['inst'] == inst_info1["target"]['inst_ids'][0], torch.full_like(outputs['inst'], 1), torch.full_like(outputs['inst'], 0)) mask_target2 = torch.where( outputs['inst'] == inst_info1["target"]['inst_ids'][1], torch.full_like(outputs['inst'], 1), torch.full_like(outputs['inst'], 0)) mask_target = mask_target1 | mask_target2 outputs['mask_target'] = mask_target.float() # for i in range(outputs['mask_target'].shape[1]): # for j in range(outputs['mask_target'].shape[2]): # if outputs['mask_target'][0,i,j] == 1.0: # print(i,j) if self.config['preprocess_option'] == 'select_region': outputs = self.preprocess_cropping(raw_inputs, outputs, params) return outputs
def __getitem__(self, index): raw_inputs, inst_info = self.get_raw_inputs(index) # full_size = raw_inputs['label'].size params = get_transform_params(full_size, inst_info, self.class_of_interest, self.config, random_crop=self.opt.random_crop) outputs = self.preprocess_inputs(raw_inputs, params) if self.config['preprocess_option'] == 'select_region': outputs = self.preprocess_cropping(raw_inputs, outputs, params) return outputs
def crop_canvas(bbox_sampled, label_original, opt, img_original=None, \ patch_to_obj_ratio=1.2, min_ctx_ratio=1.2, max_ctx_ratio=1.5, resize=True, \ transform_img=False): h, w = label_original.size()[2:4] config = {} config['prob_flip'] = 0.0 config['fineSize'] = opt.fineSize if resize else None config['img_to_obj_ratio'] = opt.contextMargin config['patch_to_obj_ratio'] = patch_to_obj_ratio config['min_ctx_ratio'] = min_ctx_ratio config['max_ctx_ratio'] = max_ctx_ratio params = get_transform_params((w,h), config=config, bbox=bbox_sampled, \ random_crop=False) transform_label = get_transform_fn(opt, params, method=Image.NEAREST, \ normalize=False, resize=resize) transform_image = get_transform_fn(opt, params, resize=resize) output_dict = {} output_dict['label'] = transform_label(tensor2pil(label_original[0])) * 255.0 if transform_img: output_dict['image'] = transform_image(tensor2pil(img_original[0], \ is_img=True)) input_bbox = np.array(params['bbox_in_context']) crop_pos = np.array(params['crop_pos']).astype(int) bbox_cls = params['bbox_cls'] ### generate output bbox img_size = output_dict['label'].size(1) #shape[1] context_ratio = np.random.uniform(low=config['min_ctx_ratio'],\ high=config['max_ctx_ratio']) output_bbox = np.array(get_soft_bbox(input_bbox, img_size, img_size, context_ratio)) mask_in, mask_object_in, mask_context_in = get_masked_image( \ output_dict['label'], input_bbox, bbox_cls) mask_out, mask_object_out, _ = get_masked_image( \ output_dict['label'], output_bbox) output_dict['mask_ctx_in'] = mask_context_in.unsqueeze(0) # (1xCxHxW) output_dict['mask_in'] = mask_in.unsqueeze(0) # (1x1xHxW) output_dict['mask_out'] = mask_out.unsqueeze(0) # (1x1xHxW) output_dict['crop_pos'] = torch.from_numpy(crop_pos) # (1x4) output_dict['label'] = output_dict['label'].unsqueeze(0) # (1x1xHxW) output_dict['cls'] = torch.LongTensor([bbox_cls]) if transform_img: output_dict['image'] = output_dict['image'].unsqueeze(0) * \ (1-output_dict['mask_in']).repeat(1,3,1,1) #else: # Crop window x1, y1, x2, y2 = crop_pos # coordinates of crop window x1 = max(0,x1); y1 = max(0,y1) # make sure in range width = x2 - x1 + 1; height = y2 - y1 + 1 # label_crop = label_original[:,:,y1:y2+1,x1:x2+1] input_bbox_orig = input_bbox.astype(float) input_bbox_orig = [input_bbox_orig[0] / opt.fineSize * width, input_bbox_orig[1] / opt.fineSize * height, input_bbox_orig[2] / opt.fineSize * width, input_bbox_orig[3] / opt.fineSize * height] input_bbox_orig = np.array(input_bbox_orig) output_bbox_orig = output_bbox.astype(float) output_bbox_orig = [output_bbox_orig[0] / opt.fineSize * width, output_bbox_orig[1] / opt.fineSize * height, output_bbox_orig[2] / opt.fineSize * width, output_bbox_orig[3] / opt.fineSize * height] output_bbox_orig = np.array(output_bbox_orig) _, _, mask_ctx_in = get_masked_image(label_crop[0], input_bbox_orig, bbox_cls) mask_out, _, _ = get_masked_image(label_crop[0], output_bbox_orig) output_dict['label_orig'] = label_crop output_dict['mask_ctx_in_orig'] = mask_ctx_in.unsqueeze(0) output_dict['mask_out_orig'] = mask_out.unsqueeze(0) output_bbox_global = [x1 + output_bbox_orig[0], y1 + output_bbox_orig[1], x1 + output_bbox_orig[2], y1 + output_bbox_orig[3]] output_bbox_global = np.array(output_bbox_global) output_dict['output_bbox'] = torch.from_numpy(output_bbox) output_dict['output_bbox_global'] = torch.from_numpy(output_bbox_global) return output_dict