Python to_image_list示例，fcos_core.structures.image_list.to_image_list Python示例

示例#1

0

显示文件

文件： bbox_aug.py 项目： qilei123/FCOS

def im_detect_bbox(model, images, target_scale, target_max_size, device):
    """
    Performs bbox detection on the original image.
    """
    transform = TT.Compose([
        T.Resize(target_scale, target_max_size),
        TT.ToTensor(),
        T.Normalize(mean=cfg.INPUT.PIXEL_MEAN,
                    std=cfg.INPUT.PIXEL_STD,
                    to_bgr255=cfg.INPUT.TO_BGR255)
    ])
    images = [transform(image) for image in images]
    images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
    return model(images.to(device))

示例#2

0

显示文件

文件： predictor.py 项目： kovakimy/FCOS

    def compute_prediction(self, original_image):
        """
        Arguments:
            original_image (np.ndarray): an image as returned by OpenCV

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        # apply pre-processing to image
        time1 = time.time()
        image = self.transforms(original_image)
        # convert to an ImageList, padded so that it is divisible by
        # cfg.DATALOADER.SIZE_DIVISIBILITY
        image_list = to_image_list(image,
                                   self.cfg.DATALOADER.SIZE_DIVISIBILITY)
        image_list = image_list.to(self.device)
        time2 = time.time()
        razn = time2 - time1
        print("Pre-Processing time = {}\n".format(razn))
        # compute predictions
        with torch.no_grad():
            predictions = self.model(image_list)
        predictions = [o.to(self.cpu_device) for o in predictions]

        # always single image is passed at a time
        prediction = predictions[0]

        # reshape prediction (a BoxList) into the original image size
        time1 = time.time()
        height, width = original_image.shape[:-1]
        prediction = prediction.resize((width, height))
        time2 = time.time()
        razn = time2 - time1
        print("Post-Pocessing time = {}\n".format(razn))
        if prediction.has_field("mask"):
            # if we have masks, paste the masks in the right position
            # in the image, as defined by the bounding boxes
            masks = prediction.get_field("mask")
            # always single image is passed at a time
            masks = self.masker([masks], [prediction])[0]
            prediction.add_field("mask", masks)
        #dev = 'cpu'
        #model = self.model.to(dev)
        #inp = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY).to(dev)

        # Count the number of FLOPs
        #count_ops(model, inp)
        return prediction

示例#3

0

显示文件

    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        features = self.backbone(images.tensors)

        features = self.dsc(features)

        # 后来加的
        # features=self.enhance(features)
        #
        # featureList=[]
        # for x,y in zip(features,featureAdd):
        #     temp=torch.cat((x,y),1)
        #     temp=self.catDscEnhance(temp)
        #     temp=self.Backrelu(temp)
        #     featureList.append(temp)
        #
        # features=tuple(featureList)

        proposals, proposal_losses = self.rpn(images, features, targets)
        if self.roi_heads:
            x, result, detector_losses = self.roi_heads(
                features, proposals, targets)
        else:
            # RPN-only models don't have roi_heads
            x = features
            result = proposals
            detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            return losses

        return result

示例#4

0

显示文件

文件： generalized_rcnn.py 项目： kovakimy/FCOS

    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        time1 = time.time()
        features = self.backbone(images.tensors)
        time2 = time.time()
        razn = time2 - time1
        print("Backbone time = {}\n".format(razn))
        time1 = time.time()
        proposals, proposal_losses = self.rpn(images, features, targets)
        time2 = time.time()
        razn = time2 - time1
        print("RPN time = {}\n".format(razn))
        if self.roi_heads:
            print("roi_heads\n")
            x, result, detector_losses = self.roi_heads(
                features, proposals, targets)
        else:
            # RPN-only models don't have roi_heads
            time1 = time.time()
            x = features
            result = proposals
            detector_losses = {}
            time2 = time.time()
            razn = time2 - time1
            print("HEAD time = {}\n".format(razn))

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            return losses

        return result

示例#5

0

显示文件

    def forward(self, images, targets=None, weights_normal=None, timer=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """

        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)

        features = self.backbone(images.tensors)

        if not weights_normal or self.roi_heads:  # if roi_heads, don't search rpn
            # -- augment
            proposals, proposal_losses = self.rpn(images, features, targets)
        else:
            proposals, proposal_losses = self.rpn(images, features, targets,
                                                  weights_normal)

        if self.roi_heads:
            if not weights_normal:
                x, result, detector_losses = self.roi_heads(
                    features, proposals, targets)
            else:
                x, result, detector_losses = self.roi_heads(
                    features, proposals, targets, weights_normal)
        else:
            # RPN-only models don't have roi_heads
            x = features
            result = proposals
            detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            return losses

        return result

示例#6

0

显示文件

文件： generalized_rcnn.py 项目： yinghdb/EmbedMask

    def forward(self, images, targets=None, benchmark=False, timers=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if benchmark and timers is not None:
            timers[0].tic()
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        features = self.backbone(images.tensors)
        if benchmark and timers is not None:
            torch.cuda.synchronize()
            timers[0].toc()
        proposals, proposal_losses = self.rpn(images,
                                              features,
                                              targets,
                                              benchmark=benchmark,
                                              timers=timers)
        if self.roi_heads:
            x, result, detector_losses = self.roi_heads(features,
                                                        proposals,
                                                        targets,
                                                        benchmark=benchmark,
                                                        timers=timers)
        else:
            # RPN-only models don't have roi_heads
            x = features
            result = proposals
            detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            return losses

        return result

示例#7

0

显示文件

文件： generalized_rcnn.py 项目： jszgz/FCOS-YOOO

    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        features = self.backbone(images.tensors)
        proposals, proposal_losses = self.rpn(images, features, targets)
        #chwangteng
        if self.cfg.MODEL.YOOO_ON:
            proposals_event, proposal_losses_event = self.rpn_event(
                images, features, targets)

        if self.roi_heads:
            x, result, detector_losses = self.roi_heads(
                features, proposals, targets)
        else:
            # RPN-only models don't have roi_heads
            if self.cfg.MODEL.YOOO_ON:
                result = (proposals, proposals_event)
                detector_losses = {}
            else:
                x = features
                result = proposals
                detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            if self.cfg.MODEL.YOOO_ON:
                losses.update(proposal_losses_event)
            return losses

        return result

示例#8

0

显示文件

    def compute_prediction(self, original_image):
        """
        Arguments:
            original_image (np.ndarray): an image as returned by OpenCV

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        # apply pre-processing to image
        image = self.transforms(original_image)
        # convert to an ImageList, padded so that it is divisible by
        # cfg.DATALOADER.SIZE_DIVISIBILITY
        image_list = to_image_list(image,
                                   self.cfg.DATALOADER.SIZE_DIVISIBILITY)
        image_list = image_list.to(self.device)
        # compute predictions

        model_backbone = self.model["backbone"]
        model_fcos = self.model["fcos"]

        with torch.no_grad():
            features = model_backbone(image_list.tensors)
            predictions, _, _ = model_fcos(image_list,
                                           features,
                                           targets=None,
                                           return_maps=False)
        predictions = [o.to(self.cpu_device) for o in predictions]
        # always single image is passed at a time
        prediction = predictions[0]

        # reshape prediction (a BoxList) into the original image size
        height, width = original_image.shape[:-1]
        prediction = prediction.resize((width, height))

        if prediction.has_field("mask"):
            # if we have masks, paste the masks in the right position
            # in the image, as defined by the bounding boxes
            masks = prediction.get_field("mask")
            # always single image is passed at a time
            masks = self.masker([masks], [prediction])[0]
            prediction.add_field("mask", masks)
        return prediction

示例#9

0

显示文件

文件： generalized_rcnn.py 项目： amsword/FCOS

    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        features = self.backbone(images.tensors)
        proposals, proposal_losses = self.rpn(images, features, targets)
        if self.roi_heads:
            if len(proposals) == 0:
                import logging
                logging.info('# proposal is 0')
                x = None
                from maskrcnn_benchmark.structures.bounding_box import BoxList
                result = [
                    BoxList.create_empty_list(s) for s in images.image_sizes
                ]
                detector_losses = {}
            else:
                x, result, detector_losses = self.roi_heads(
                    features, proposals, targets)
        else:
            # RPN-only models don't have roi_heads
            x = features
            result = proposals
            detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            return losses

        return result

示例#10

0

显示文件

文件： bbox_aug.py 项目： qilei123/FCOS

def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device):
    """
    Performs bbox detection on the horizontally flipped image.
    Function signature is the same as for im_detect_bbox.
    """
    transform = TT.Compose([
        T.Resize(target_scale, target_max_size),
        TT.RandomHorizontalFlip(1.0),
        TT.ToTensor(),
        T.Normalize(mean=cfg.INPUT.PIXEL_MEAN,
                    std=cfg.INPUT.PIXEL_STD,
                    to_bgr255=cfg.INPUT.TO_BGR255)
    ])
    images = [transform(image) for image in images]
    images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
    boxlists = model(images.to(device))

    # Invert the detections computed on the flipped image
    boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists]
    return boxlists_inv

示例#11

0

显示文件

def foward_detector(model, images, targets=None, return_maps=False):
    map_layer_to_index = {"P3": 0, "P4": 1, "P5": 2, "P6": 3, "P7": 4}
    feature_layers = map_layer_to_index.keys()
    model_backbone = model["backbone"]
    model_fcos = model["fcos"]

    images = to_image_list(images)
    features = model_backbone(images.tensors)

    f = {
        layer: features[map_layer_to_index[layer]]
        for layer in feature_layers
    }
    losses = {}

    if model_fcos.training and targets is None:
        # train G on target domain
        proposals, proposal_losses, score_maps = model_fcos(
            images, features, targets=None, return_maps=return_maps)
        assert len(proposal_losses) == 1 and proposal_losses[
            "zero"] == 0  # loss_dict should be empty dict
    else:
        # train G on source domain / inference
        proposals, proposal_losses, score_maps = model_fcos(
            images, features, targets=targets, return_maps=return_maps)

    if model_fcos.training:
        # training
        m = {
            layer: {
                map_type: score_maps[map_type][map_layer_to_index[layer]]
                for map_type in score_maps
            }
            for layer in feature_layers
        }
        losses.update(proposal_losses)
        return losses, f, m
    else:
        # inference
        result = proposals
        return result

示例#12

0

显示文件

文件： fcos.py 项目： qilei123/FCOS

    def detect(self, im, min_confidence=None):
        '''
        :param im (np.ndarray): an image as returned by OpenCV
        :return:
        '''
        image = self.transforms(im)
        # convert to an ImageList, padded so that it is divisible by
        # cfg.DATALOADER.SIZE_DIVISIBILITY
        image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY)
        image_list = image_list.to(self.device)
        # compute predictions
        with torch.no_grad():
            predictions = self.model(image_list)
        predictions = [o.to(self.cpu_device) for o in predictions]

        assert len(predictions) == 1
        predictions = predictions[0]

        if min_confidence is None:
            min_confidence = _MODEL_NAMES_TO_INFO_[self.model_name]["best_min_confidence"]

        predictions = self.select_top_predictions(predictions, min_confidence)
        return self._bbox_list_to_py_bbox_list(predictions)

示例#13

0

显示文件

文件： generalized_rcnn.py 项目： zhoulw13/FCOS

    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """

        #from boxx import show
        #show(images.tensors.permute(0,2,3,1).int().cpu().numpy()[:,:,:,::-1]+127, figsize=(10,10))

        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        features = self.backbone(images.tensors)
        proposals, proposal_losses = self.rpn(images, features, targets)
        if self.roi_heads:
            x, result, detector_losses = self.roi_heads(features, proposals, targets)
        else:
            # RPN-only models don't have roi_heads
            x = features
            result = proposals
            detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            return losses

        return result

示例#14

0

显示文件

 def __call__(self, batch):
     transposed_batch = list(zip(*batch))
     images = to_image_list(transposed_batch[0], self.size_divisible)
     targets = transposed_batch[1]
     img_ids = transposed_batch[2]
     return images, targets, img_ids

示例#15

0

显示文件

文件： generalized_rcnn.py 项目： Hwang64/CrabNet

    def forward(self, arguments, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        outputs = []
        final_proposals = []
        if self.training: iteration = arguments["iteration"]
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        cls_features, last_backbone_cls = self.cls_backbone(images.tensors)
        reg_features = self.reg_backbone(images.tensors)

        for cls_feature, reg_feature in zip(cls_features, reg_features):

            cls_feature_align = self.deform_conv_cls(cls_feature)
            reg_feature_align = self.deform_conv_reg(reg_feature)

            cls_feature_weight = F.sigmoid(
                self.cls_weight_conv(cls_feature_align))
            reg_feature_weight = F.sigmoid(
                self.reg_weight_conv(reg_feature_align))

            cls_output = torch.add(
                torch.mul(reg_feature_align, reg_feature_weight),
                cls_feature_align)
            reg_output = torch.add(
                torch.mul(cls_feature_align, cls_feature_weight),
                reg_feature_align)
            output = torch.add(cls_output, reg_output)
            outputs.append(output)

        outputs = tuple(outputs)
        proposals, proposal_losses = self.rpn(images, outputs, outputs,
                                              targets)
        if self.extra == True:
            if self.training:
                extra_proposals, extra_proposal_losses = self.extra_rpn(
                    images, None, reg_features, targets, iteration)
        if self.training:
            cls_head_losses = self.cls_head(last_backbone_cls, targets,
                                            iteration)
        if self.roi_heads:
            x_cls, x_reg, result, detector_losses = self.roi_heads(
                features, features, proposals, targets)
        else:
            # RPN-only models don't have roi_heads
            result = proposals
            detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            if self.extra == True: losses.update(extra_proposal_losses)
            losses.update(cls_head_losses)
            return losses

        return result

示例#16

0

显示文件

文件： class_pr_cal.py 项目： jdnie/FCOS-DeepShift

def run_accCal(model_path,
               test_base_path,
               save_base_path,
               labels_dict,
               config_file,
               input_size=640,
               confidence_thresholds=(0.3, )):
    save_res_path = os.path.join(save_base_path, 'all')
    if os.path.exists(save_res_path):
        shutil.rmtree(save_res_path)
    os.mkdir(save_res_path)

    save_recall_path = os.path.join(save_base_path, 'recall')
    if os.path.exists(save_recall_path):
        shutil.rmtree(save_recall_path)
    os.mkdir(save_recall_path)

    save_ero_path = os.path.join(save_base_path, 'ero')
    if os.path.exists(save_ero_path):
        shutil.rmtree(save_ero_path)
    os.mkdir(save_ero_path)

    save_ori_path = os.path.join(save_base_path, 'ori')
    if os.path.exists(save_ori_path):
        shutil.rmtree(save_ori_path)
    os.mkdir(save_ori_path)

    test_img_path = os.path.join(test_base_path, 'VOC2007/JPEGImages')
    test_ano_path = os.path.join(test_base_path, 'VOC2007/Annotations')
    img_list = glob.glob(test_img_path + '/*.jpg')

    cfg.merge_from_file(config_file)
    cfg.MODEL.WEIGHT = model_path
    cfg.TEST.IMS_PER_BATCH = 1  # only test single image
    cfg.freeze()
    dbg_cfg = cfg

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR)
    checkpointer.load(cfg.MODEL.WEIGHT)
    model.eval()

    normalize_transform = T.Normalize(
        mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD
    )
    transform = T.Compose(
        [
            T.ToPILImage(),
            T.Resize(input_size),
            T.ToTensor(),
            T.Lambda(lambda x: x * 255),
            normalize_transform,
        ]
    )

    sad_accuracy = [0] * len(confidence_thresholds)
    sad_precision = [0] * len(confidence_thresholds)
    sad_recall = [0] * len(confidence_thresholds)
    spend_time = []
    for idx, img_name in enumerate(img_list):
        progress(int(idx/len(img_list) * 100))
        base_img_name = os.path.split(img_name)[-1]
        frame = cv2.imread(img_name)
        ori_frame = copy.deepcopy(frame)

        h, w = frame.shape[:2]
        image = transform(frame)
        image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY)
        image_list = image_list.to(cfg.MODEL.DEVICE)

        start_time = time.time()
        with torch.no_grad():
            predictions = model(image_list)
        prediction = predictions[0].to("cpu")
        end_time = time.time()
        spend_time.append(end_time - start_time)

        prediction = prediction.resize((w, h)).convert("xyxy")
        # scores = prediction.get_field("scores")
        # keep = torch.nonzero(scores > confidence_threshold).squeeze(1)
        # prediction = prediction[keep]
        scores = prediction.get_field("scores")
        _, idx = scores.sort(0, descending=True)
        prediction = prediction[idx]
        scores = prediction.get_field("scores").numpy()
        labels = prediction.get_field("labels").numpy()
        bboxes = prediction.bbox.numpy().astype(np.int32)
        bboxes_area = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1])

        for ii, confidence_threshold in enumerate(confidence_thresholds):
            _keep = np.where((scores > confidence_threshold) & (bboxes_area > 0), True, False)
            _scores = scores[_keep].tolist()
            _labels = labels[_keep].tolist()
            _bboxes = bboxes[_keep].tolist()
            _labels, _bboxes, _scores = soft_nms(_labels, _bboxes, _scores, confidence_threshold)

            if ii == 0:
                for i, b in enumerate(_bboxes):
                    # save all
                    frame = cv2.rectangle(frame,
                                          (b[0], b[1]), (b[2], b[3]),
                                          (100, 220, 200), 2)
                    frame = cv2.putText(frame,
                                        str(_labels[i]) + '-' + str(int(_scores[i] * 100)),
                                        (b[0], b[1]), 1, 1,
                                        (0, 0, 255), 1)
                # cv2.imwrite(os.path.join(save_res_path, base_img_name), frame)

            boxes_list_tmp = copy.deepcopy(_bboxes)
            classes_list_tmp = copy.deepcopy(_labels)
            score_list_tmp = copy.deepcopy(_scores)

            fg_cnt = 0
            recall_flag = False
            xml_name = base_img_name[:-4] + '.xml'
            anno_path = os.path.join(test_ano_path, xml_name)
            tree = ET.parse(anno_path)
            root = tree.getroot()
            rc_box = []
            for siz in root.findall('size'):
                width_ = siz.find('width').text
                height_ = siz.find('height').text
            if not int(width_) or not int(height_):
                width_ = w
                height_ = h
            for obj in root.findall('object'):
                name = obj.find('name').text
                # class_tmp = get_cls(name, labels_dict)
                for bndbox in obj.findall('bndbox'):
                    xmin = bndbox.find('xmin').text
                    ymin = bndbox.find('ymin').text
                    xmax = bndbox.find('xmax').text
                    ymax = bndbox.find('ymax').text
                    tmp_bbox = [int(int(xmin) * w / int(width_)),
                                int(int(ymin) * h / int(height_)),
                                int(int(xmax) * w / int(width_)),
                                int(int(ymax) * h / int(height_))]
                map_flag = False
                for bbox_idx in range(len(boxes_list_tmp)):
                    min_area, box_s, min_flag, iou_score = \
                        get_iou(tmp_bbox, boxes_list_tmp[bbox_idx])
                    if iou_score > 0.3:
                        map_flag = True
                        del classes_list_tmp[bbox_idx]
                        del boxes_list_tmp[bbox_idx]
                        del score_list_tmp[bbox_idx]
                        break
                # 如果没找到匹配，属于漏检，算到召回率/检出率中
                if not map_flag:
                    recall_flag = True
                    rc_box.append(tmp_bbox)
                fg_cnt = fg_cnt + 1

            if recall_flag:
                sad_recall[ii] += 1
                if ii == 0:
                    for box_idx in range(len(rc_box)):
                        x1, y1, x2, y2 = rc_box[box_idx]
                        rca_frame = cv2.rectangle(frame,
                                                  (int(x1), int(y1)), (int(x2), int(y2)),
                                                  (255, 0, 0), 4)
                    cv2.imwrite(os.path.join(save_recall_path, base_img_name), rca_frame)
                    shutil.copy(img_name, os.path.join(save_ori_path, base_img_name))
                    shutil.copy(anno_path, os.path.join(save_ori_path, xml_name))
                # print("sad_recall: " + str(sad_recall))

            # 如果有多出来的，属于误检，ground_truth中没有这个框，算到准确率中
            if len(classes_list_tmp) > 0:
                sad_precision[ii] += 1
                if ii == 0:
                    for box_idx in range(len(boxes_list_tmp)):
                        x1, y1, x2, y2 = boxes_list_tmp[box_idx]
                        ero_frame = cv2.rectangle(frame,
                                                  (int(x1), int(y1)), (int(x2), int(y2)),
                                                  (0, 0, 255), 4)
                        err_rect_name = base_img_name[:-4] + '_' + str(box_idx) + '.jpg'
                        cv2.imwrite(os.path.join(save_ero_path, err_rect_name),
                                    ori_frame[y1: y2, x1: x2, :])
                    cv2.imwrite(os.path.join(save_ero_path, base_img_name), ero_frame)
                    shutil.copy(img_name, os.path.join(save_ori_path, base_img_name))
                    shutil.copy(anno_path, os.path.join(save_ori_path, xml_name))

            if not recall_flag and len(classes_list_tmp) == 0:
                sad_accuracy[ii] += 1

            # print("cur sad: " + str(sad))
            # print("fg_cnt: " + str(fg_cnt))
            # print("pred_cnt: " + str(len(classes_list_tmp)))

    # 单图所有框都检测正确才正确率，少一个框算漏检，多一个框算误检，不看mAP
    print('\nfps is : ', 1 / np.average(spend_time))
    for ii, confidence_threshold in enumerate(confidence_thresholds):
        print("confidence th is : {}".format(confidence_threshold))
        accuracy = float(sad_accuracy[ii] / len(img_list))
        print("accuracy is : {}".format(accuracy))
        precision = 1 - float(sad_precision[ii] / len(img_list))
        print("precision is : {}".format(precision))
        recall = 1 - float(sad_recall[ii] / len(img_list))
        print("recall is : {}\n".format(recall))