def visualize_training(self, batched_inputs, results): #image,heatmap):#, """ A function used to visualize ground truth images and final network predictions. It shows ground truth bounding boxes on the original image and up to 20 predicted object bounding boxes on the original image. Args: batched_inputs (list): a list that contains input to the model. results (List[Instances]): a list of #images elements. """ from pointscollection.utils import exVisualizer as Visualizer from detectron2.data.detection_utils import convert_image_to_rgb assert len(batched_inputs) == len( results), "Cannot visualize inputs and results of different sizes" # storage = get_event_storage() max_boxes = 100 image_index = 0 # only visualize a single image img = batched_inputs[image_index]["image"] img = convert_image_to_rgb(img.permute(1, 2, 0), "BGR") print(batched_inputs[0]['file_name'], batched_inputs[0]['image_id']) # v_gt = Visualizer(img, None) # # v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes) # anno_img = v_gt.get_image() processed_results = _postprocess(results[image_index], img.shape[0], img.shape[1]) predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu( ).numpy() predicted_mask = processed_results.pred_masks.detach().cpu().numpy() predicted_points = processed_results.pred_points.detach().cpu().numpy() v_pred = Visualizer(img, None) v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes], masks=predicted_mask[0:max_boxes], points=predicted_points[0:max_boxes]) prop_img = v_pred.get_image() vis_img = prop_img # np.vstack((anno_img, prop_img)) # vis_img = vis_img.transpose(2, 0, 1) # vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results" # plt.imshow(vis_img) # plt.show() plt.imsave( 'output/result_show/{:0>12}.png'.format( batched_inputs[0]['image_id']), vis_img)
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DetectionTransform` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: image: Tensor, image in (C, H, W) format. instances: Instances Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: losses (dict[str: Tensor]): mapping from a named loss to a tensor storing the loss. Used during training only. """ images = self.preprocess_image(batched_inputs) if "instances" in batched_inputs[0]: gt_instances = [x["instances"].to(self.device) for x in batched_inputs] elif "targets" in batched_inputs[0]: log_first_n( logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 ) gt_instances = [x["targets"].to(self.device) for x in batched_inputs] else: gt_instances = None # print(images.image_sizes) # print(images.tensor.size()) features = self.backbone(images.tensor) # for k,v in features.items(): # plt.imshow(v[0].squeeze().mean(0).cpu().numpy()) # plt.show() classify_features = [features[f][0] for f in self.cin_features] points_features =[features[f][1] for f in self.pin_features] ins_features=[features[f][0] for f in self.ins_features] # apply the head # print(classify_features[0].size()) pf_b,pf_c,pf_h,pf_w=points_features[-1].size() target_points=points_features[-1].new_zeros(pf_b,2,pf_h,pf_w,requires_grad=False) pred_digits=self.cls_head(classify_features) pred_points=self.pc_head(target_points,points_features) if self.training: # get ground truths for class labels and box targets, it will label each anchor output_size=classify_features[-1].size() gt_clses, gt_belongs, gt_masks, gt_ins= self.get_ground_truth(gt_instances,output_size) # compute the loss return self.losses( gt_clses, gt_belongs, gt_masks, gt_ins, pred_digits, pred_points, ins_features[0] ) else: # do inference to get the output results = self.inference(pred_digits, pred_points,ins_features[0],images) # plt.imshow(np.max(pred_digits[0].cpu().numpy(),0)) # plt.show() # self.visualize_training(batched_inputs,results) processed_results = [] for results_im, input_im, image_size in zip( results, batched_inputs, images.image_sizes ): height = input_im.get("height", image_size[0]) width = input_im.get("width", image_size[1]) # this is to do post-processing with the image size # print(height,width,image_size) result= results_im r = _postprocess(result,height, width) processed_results.append({"instances": r}) return processed_results