def im_detect_bbox(model, images, target_scale, target_max_size, device): """ Performs bbox detection on the original image. """ transform = TT.Compose([ T.Resize(target_scale, target_max_size), TT.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255) ]) images = [transform(image) for image in images] images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) return model(images.to(device))
def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image time1 = time.time() image = self.transforms(original_image) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) time2 = time.time() razn = time2 - time1 print("Pre-Processing time = {}\n".format(razn)) # compute predictions with torch.no_grad(): predictions = self.model(image_list) predictions = [o.to(self.cpu_device) for o in predictions] # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size time1 = time.time() height, width = original_image.shape[:-1] prediction = prediction.resize((width, height)) time2 = time.time() razn = time2 - time1 print("Post-Pocessing time = {}\n".format(razn)) if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) #dev = 'cpu' #model = self.model.to(dev) #inp = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY).to(dev) # Count the number of FLOPs #count_ops(model, inp) return prediction
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) features = self.dsc(features) # 后来加的 # features=self.enhance(features) # # featureList=[] # for x,y in zip(features,featureAdd): # temp=torch.cat((x,y),1) # temp=self.catDscEnhance(temp) # temp=self.Backrelu(temp) # featureList.append(temp) # # features=tuple(featureList) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) time1 = time.time() features = self.backbone(images.tensors) time2 = time.time() razn = time2 - time1 print("Backbone time = {}\n".format(razn)) time1 = time.time() proposals, proposal_losses = self.rpn(images, features, targets) time2 = time.time() razn = time2 - time1 print("RPN time = {}\n".format(razn)) if self.roi_heads: print("roi_heads\n") x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads time1 = time.time() x = features result = proposals detector_losses = {} time2 = time.time() razn = time2 - time1 print("HEAD time = {}\n".format(razn)) if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None, weights_normal=None, timer=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) if not weights_normal or self.roi_heads: # if roi_heads, don't search rpn # -- augment proposals, proposal_losses = self.rpn(images, features, targets) else: proposals, proposal_losses = self.rpn(images, features, targets, weights_normal) if self.roi_heads: if not weights_normal: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: x, result, detector_losses = self.roi_heads( features, proposals, targets, weights_normal) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None, benchmark=False, timers=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if benchmark and timers is not None: timers[0].tic() if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) if benchmark and timers is not None: torch.cuda.synchronize() timers[0].toc() proposals, proposal_losses = self.rpn(images, features, targets, benchmark=benchmark, timers=timers) if self.roi_heads: x, result, detector_losses = self.roi_heads(features, proposals, targets, benchmark=benchmark, timers=timers) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) #chwangteng if self.cfg.MODEL.YOOO_ON: proposals_event, proposal_losses_event = self.rpn_event( images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads if self.cfg.MODEL.YOOO_ON: result = (proposals, proposals_event) detector_losses = {} else: x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) if self.cfg.MODEL.YOOO_ON: losses.update(proposal_losses_event) return losses return result
def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image image = self.transforms(original_image) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) # compute predictions model_backbone = self.model["backbone"] model_fcos = self.model["fcos"] with torch.no_grad(): features = model_backbone(image_list.tensors) predictions, _, _ = model_fcos(image_list, features, targets=None, return_maps=False) predictions = [o.to(self.cpu_device) for o in predictions] # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size height, width = original_image.shape[:-1] prediction = prediction.resize((width, height)) if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) return prediction
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: if len(proposals) == 0: import logging logging.info('# proposal is 0') x = None from maskrcnn_benchmark.structures.bounding_box import BoxList result = [ BoxList.create_empty_list(s) for s in images.image_sizes ] detector_losses = {} else: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device): """ Performs bbox detection on the horizontally flipped image. Function signature is the same as for im_detect_bbox. """ transform = TT.Compose([ T.Resize(target_scale, target_max_size), TT.RandomHorizontalFlip(1.0), TT.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255) ]) images = [transform(image) for image in images] images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) boxlists = model(images.to(device)) # Invert the detections computed on the flipped image boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists] return boxlists_inv
def foward_detector(model, images, targets=None, return_maps=False): map_layer_to_index = {"P3": 0, "P4": 1, "P5": 2, "P6": 3, "P7": 4} feature_layers = map_layer_to_index.keys() model_backbone = model["backbone"] model_fcos = model["fcos"] images = to_image_list(images) features = model_backbone(images.tensors) f = { layer: features[map_layer_to_index[layer]] for layer in feature_layers } losses = {} if model_fcos.training and targets is None: # train G on target domain proposals, proposal_losses, score_maps = model_fcos( images, features, targets=None, return_maps=return_maps) assert len(proposal_losses) == 1 and proposal_losses[ "zero"] == 0 # loss_dict should be empty dict else: # train G on source domain / inference proposals, proposal_losses, score_maps = model_fcos( images, features, targets=targets, return_maps=return_maps) if model_fcos.training: # training m = { layer: { map_type: score_maps[map_type][map_layer_to_index[layer]] for map_type in score_maps } for layer in feature_layers } losses.update(proposal_losses) return losses, f, m else: # inference result = proposals return result
def detect(self, im, min_confidence=None): ''' :param im (np.ndarray): an image as returned by OpenCV :return: ''' image = self.transforms(im) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) # compute predictions with torch.no_grad(): predictions = self.model(image_list) predictions = [o.to(self.cpu_device) for o in predictions] assert len(predictions) == 1 predictions = predictions[0] if min_confidence is None: min_confidence = _MODEL_NAMES_TO_INFO_[self.model_name]["best_min_confidence"] predictions = self.select_top_predictions(predictions, min_confidence) return self._bbox_list_to_py_bbox_list(predictions)
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ #from boxx import show #show(images.tensors.permute(0,2,3,1).int().cpu().numpy()[:,:,:,::-1]+127, figsize=(10,10)) if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads(features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def __call__(self, batch): transposed_batch = list(zip(*batch)) images = to_image_list(transposed_batch[0], self.size_divisible) targets = transposed_batch[1] img_ids = transposed_batch[2] return images, targets, img_ids
def forward(self, arguments, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ outputs = [] final_proposals = [] if self.training: iteration = arguments["iteration"] if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) cls_features, last_backbone_cls = self.cls_backbone(images.tensors) reg_features = self.reg_backbone(images.tensors) for cls_feature, reg_feature in zip(cls_features, reg_features): cls_feature_align = self.deform_conv_cls(cls_feature) reg_feature_align = self.deform_conv_reg(reg_feature) cls_feature_weight = F.sigmoid( self.cls_weight_conv(cls_feature_align)) reg_feature_weight = F.sigmoid( self.reg_weight_conv(reg_feature_align)) cls_output = torch.add( torch.mul(reg_feature_align, reg_feature_weight), cls_feature_align) reg_output = torch.add( torch.mul(cls_feature_align, cls_feature_weight), reg_feature_align) output = torch.add(cls_output, reg_output) outputs.append(output) outputs = tuple(outputs) proposals, proposal_losses = self.rpn(images, outputs, outputs, targets) if self.extra == True: if self.training: extra_proposals, extra_proposal_losses = self.extra_rpn( images, None, reg_features, targets, iteration) if self.training: cls_head_losses = self.cls_head(last_backbone_cls, targets, iteration) if self.roi_heads: x_cls, x_reg, result, detector_losses = self.roi_heads( features, features, proposals, targets) else: # RPN-only models don't have roi_heads result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) if self.extra == True: losses.update(extra_proposal_losses) losses.update(cls_head_losses) return losses return result
def run_accCal(model_path, test_base_path, save_base_path, labels_dict, config_file, input_size=640, confidence_thresholds=(0.3, )): save_res_path = os.path.join(save_base_path, 'all') if os.path.exists(save_res_path): shutil.rmtree(save_res_path) os.mkdir(save_res_path) save_recall_path = os.path.join(save_base_path, 'recall') if os.path.exists(save_recall_path): shutil.rmtree(save_recall_path) os.mkdir(save_recall_path) save_ero_path = os.path.join(save_base_path, 'ero') if os.path.exists(save_ero_path): shutil.rmtree(save_ero_path) os.mkdir(save_ero_path) save_ori_path = os.path.join(save_base_path, 'ori') if os.path.exists(save_ori_path): shutil.rmtree(save_ori_path) os.mkdir(save_ori_path) test_img_path = os.path.join(test_base_path, 'VOC2007/JPEGImages') test_ano_path = os.path.join(test_base_path, 'VOC2007/Annotations') img_list = glob.glob(test_img_path + '/*.jpg') cfg.merge_from_file(config_file) cfg.MODEL.WEIGHT = model_path cfg.TEST.IMS_PER_BATCH = 1 # only test single image cfg.freeze() dbg_cfg = cfg model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(cfg.MODEL.WEIGHT) model.eval() normalize_transform = T.Normalize( mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD ) transform = T.Compose( [ T.ToPILImage(), T.Resize(input_size), T.ToTensor(), T.Lambda(lambda x: x * 255), normalize_transform, ] ) sad_accuracy = [0] * len(confidence_thresholds) sad_precision = [0] * len(confidence_thresholds) sad_recall = [0] * len(confidence_thresholds) spend_time = [] for idx, img_name in enumerate(img_list): progress(int(idx/len(img_list) * 100)) base_img_name = os.path.split(img_name)[-1] frame = cv2.imread(img_name) ori_frame = copy.deepcopy(frame) h, w = frame.shape[:2] image = transform(frame) image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(cfg.MODEL.DEVICE) start_time = time.time() with torch.no_grad(): predictions = model(image_list) prediction = predictions[0].to("cpu") end_time = time.time() spend_time.append(end_time - start_time) prediction = prediction.resize((w, h)).convert("xyxy") # scores = prediction.get_field("scores") # keep = torch.nonzero(scores > confidence_threshold).squeeze(1) # prediction = prediction[keep] scores = prediction.get_field("scores") _, idx = scores.sort(0, descending=True) prediction = prediction[idx] scores = prediction.get_field("scores").numpy() labels = prediction.get_field("labels").numpy() bboxes = prediction.bbox.numpy().astype(np.int32) bboxes_area = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1]) for ii, confidence_threshold in enumerate(confidence_thresholds): _keep = np.where((scores > confidence_threshold) & (bboxes_area > 0), True, False) _scores = scores[_keep].tolist() _labels = labels[_keep].tolist() _bboxes = bboxes[_keep].tolist() _labels, _bboxes, _scores = soft_nms(_labels, _bboxes, _scores, confidence_threshold) if ii == 0: for i, b in enumerate(_bboxes): # save all frame = cv2.rectangle(frame, (b[0], b[1]), (b[2], b[3]), (100, 220, 200), 2) frame = cv2.putText(frame, str(_labels[i]) + '-' + str(int(_scores[i] * 100)), (b[0], b[1]), 1, 1, (0, 0, 255), 1) # cv2.imwrite(os.path.join(save_res_path, base_img_name), frame) boxes_list_tmp = copy.deepcopy(_bboxes) classes_list_tmp = copy.deepcopy(_labels) score_list_tmp = copy.deepcopy(_scores) fg_cnt = 0 recall_flag = False xml_name = base_img_name[:-4] + '.xml' anno_path = os.path.join(test_ano_path, xml_name) tree = ET.parse(anno_path) root = tree.getroot() rc_box = [] for siz in root.findall('size'): width_ = siz.find('width').text height_ = siz.find('height').text if not int(width_) or not int(height_): width_ = w height_ = h for obj in root.findall('object'): name = obj.find('name').text # class_tmp = get_cls(name, labels_dict) for bndbox in obj.findall('bndbox'): xmin = bndbox.find('xmin').text ymin = bndbox.find('ymin').text xmax = bndbox.find('xmax').text ymax = bndbox.find('ymax').text tmp_bbox = [int(int(xmin) * w / int(width_)), int(int(ymin) * h / int(height_)), int(int(xmax) * w / int(width_)), int(int(ymax) * h / int(height_))] map_flag = False for bbox_idx in range(len(boxes_list_tmp)): min_area, box_s, min_flag, iou_score = \ get_iou(tmp_bbox, boxes_list_tmp[bbox_idx]) if iou_score > 0.3: map_flag = True del classes_list_tmp[bbox_idx] del boxes_list_tmp[bbox_idx] del score_list_tmp[bbox_idx] break # 如果没找到匹配,属于漏检,算到召回率/检出率中 if not map_flag: recall_flag = True rc_box.append(tmp_bbox) fg_cnt = fg_cnt + 1 if recall_flag: sad_recall[ii] += 1 if ii == 0: for box_idx in range(len(rc_box)): x1, y1, x2, y2 = rc_box[box_idx] rca_frame = cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 4) cv2.imwrite(os.path.join(save_recall_path, base_img_name), rca_frame) shutil.copy(img_name, os.path.join(save_ori_path, base_img_name)) shutil.copy(anno_path, os.path.join(save_ori_path, xml_name)) # print("sad_recall: " + str(sad_recall)) # 如果有多出来的,属于误检,ground_truth中没有这个框,算到准确率中 if len(classes_list_tmp) > 0: sad_precision[ii] += 1 if ii == 0: for box_idx in range(len(boxes_list_tmp)): x1, y1, x2, y2 = boxes_list_tmp[box_idx] ero_frame = cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 4) err_rect_name = base_img_name[:-4] + '_' + str(box_idx) + '.jpg' cv2.imwrite(os.path.join(save_ero_path, err_rect_name), ori_frame[y1: y2, x1: x2, :]) cv2.imwrite(os.path.join(save_ero_path, base_img_name), ero_frame) shutil.copy(img_name, os.path.join(save_ori_path, base_img_name)) shutil.copy(anno_path, os.path.join(save_ori_path, xml_name)) if not recall_flag and len(classes_list_tmp) == 0: sad_accuracy[ii] += 1 # print("cur sad: " + str(sad)) # print("fg_cnt: " + str(fg_cnt)) # print("pred_cnt: " + str(len(classes_list_tmp))) # 单图所有框都检测正确才正确率,少一个框算漏检,多一个框算误检,不看mAP print('\nfps is : ', 1 / np.average(spend_time)) for ii, confidence_threshold in enumerate(confidence_thresholds): print("confidence th is : {}".format(confidence_threshold)) accuracy = float(sad_accuracy[ii] / len(img_list)) print("accuracy is : {}".format(accuracy)) precision = 1 - float(sad_precision[ii] / len(img_list)) print("precision is : {}".format(precision)) recall = 1 - float(sad_recall[ii] / len(img_list)) print("recall is : {}\n".format(recall))