Exemplo n.º 1
0
    def detect_image(self, image):
        # 绘制人脸框
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        # 它的作用是将归一化后的框坐标转换成原图的大小
        scale = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]])
        scale_for_landmarks = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0]])

        # pytorch
        image = preprocess_input(image).transpose(2, 0, 1)
        # 增加batch_size维度
        image = torch.from_numpy(image).unsqueeze(0)
        # 计算先验框
        anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()

        with torch.no_grad():
            if self.cuda:
                scale = scale.cuda()
                scale_for_landmarks = scale_for_landmarks.cuda()
                image = image.cuda()
                anchors = anchors.cuda()

            loc, conf, landms = self.net(image)  # forward pass
            
            boxes = decode(loc.data.squeeze(0), anchors, self.cfg['variance'])
            boxes = boxes * scale
            boxes = boxes.cpu().numpy()

            conf = conf.data.squeeze(0)[:,1:2].cpu().numpy()
            
            landms = decode_landm(landms.data.squeeze(0), anchors, self.cfg['variance'])
            landms = landms * scale_for_landmarks
            landms = landms.cpu().numpy()

            boxes_conf_landms = np.concatenate([boxes,conf,landms],-1)
            
            boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
    
        for b in boxes_conf_landms:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            # landms
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        pnum = len(boxes_conf_landms)
        return old_image , pnum
Exemplo n.º 2
0
    def get_FPS(self, image, test_interval):
        image = np.array(image,np.float32)
        im_height, im_width, _ = np.shape(image)
        scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]
        scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0]]
        if self.letterbox_image:
            image = np.array(letterbox_image(image,[self.input_shape[1], self.input_shape[0]]), np.float32)
        else:
            self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()

        with torch.no_grad():
            image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0)
            if self.cuda:
                self.anchors = self.anchors.cuda()
                image = image.cuda()
            loc, conf, landms = self.net(image)
            boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])
            boxes = boxes.cpu().numpy()

            conf = conf.data.squeeze(0)[:,1:2].cpu().numpy()

            landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])
            landms = landms.cpu().numpy()

            boxes_conf_landms = np.concatenate([boxes, conf, landms],-1)
            boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
            if len(boxes_conf_landms)>0:
                if self.letterbox_image:
                    boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
                    
                boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale
                boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                loc, conf, landms = self.net(image)
                boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])
                boxes = boxes.cpu().numpy()

                conf = conf.data.squeeze(0)[:,1:2].cpu().numpy()

                landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])
                landms = landms.cpu().numpy()

                boxes_conf_landms = np.concatenate([boxes, conf, landms],-1)
                boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
                if len(boxes_conf_landms)>0:
                    if self.letterbox_image:
                        boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
                        
                    boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale
                    boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Exemplo n.º 3
0
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     if self.backbone == "mobilenet":
         self.cfg = cfg_mnet
     else:
         self.cfg = cfg_re50
     self.generate()
     if self.letterbox_image:
         self.anchors = Anchors(self.cfg, image_size=[self.input_shape[0], self.input_shape[1]]).get_anchors()
Exemplo n.º 4
0
    def __init__(self, anchors=None):
        super(SiamRPN, self).__init__()
        self.anchors = anchors  # anchor_cfg
        self.anchor = Anchors(anchors)
        self.anchor_num = self.anchor.anchor_num
        self.features = None
        self.rpn_model = None

        self.all_anchors = None
Exemplo n.º 5
0
 def __init__(self, anchors=None, o_sz=127, g_sz=127):
     super(SiamMask, self).__init__()
     self.anchors = anchors  # anchor_cfg
     self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"])
     self.anchor = Anchors(anchors)
     self.features = None
     self.rpn_model = None
     self.mask_model = None
     self.o_sz = o_sz
     self.g_sz = g_sz
     self.all_anchors = None
Exemplo n.º 6
0
    def __init__(self, anchors=None, o_sz=63, g_sz=127):
        super(SiamMask, self).__init__()
        self.anchors = anchors  # anchor_cfg anchors中的配置信息
        self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"])  # anchor的数目
        self.anchor = Anchors(anchors)  # anchor
        self.features = None  # 特征提取网络模型
        self.rpn_model = None  # rpn网络模型
        self.mask_model = None  # 图像分割的网络模型
        self.o_sz = o_sz  # 输入尺寸
        self.g_sz = g_sz  # 输出尺寸
        self.upSample = nn.UpsamplingBilinear2d(size=[g_sz, g_sz])  # 2d数据的双线性插值

        self.all_anchors = None
Exemplo n.º 7
0
    def __init__(self, anchors=None, o_sz=63, g_sz=127):
        super(SiamMask, self).__init__()
        self.anchors = anchors  # anchor_cfg
        self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"])
        self.anchor = Anchors(anchors)
        self.features = None
        self.rpn_model = None
        self.mask_model = None
        self.o_sz = o_sz
        self.g_sz = g_sz
        self.upSample = nn.UpsamplingBilinear2d(size=[g_sz, g_sz])

        self.all_anchors = None
Exemplo n.º 8
0
def generate_anchor(cfg, score_size):
    """
    生成锚点:anchor
    :param cfg: anchor的配置信息
    :param score_size:分类的评分结果
    :return:生成的anchor
    """
    # 初始化anchor
    anchors = Anchors(cfg)
    # 得到生成的anchors
    anchor = anchors.anchors
    # 得到每一个anchor的左上角和右下角坐标
    x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
    # 将anchor转换为中心点坐标和宽高的形式
    anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1)
    # 获取生成anchor的范围
    total_stride = anchors.stride
    # 获取锚点的个数
    anchor_num = anchor.shape[0]
    # 将对锚点组进行广播,并设置其坐标。
    anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
    # 加上ori偏移后,xx和yy以图像中心为原点
    ori = -(score_size // 2) * total_stride
    xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
                         [ori + total_stride * dy for dy in range(score_size)])
    xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
             np.tile(yy.flatten(), (anchor_num, 1)).flatten()
    # 获取anchor
    anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
    return anchor
Exemplo n.º 9
0
def generate_anchor(cfg, score_size):  #默认是25  #cfg是test.py中model.anchors字典
    anchors = Anchors(
        cfg)  #实例化Anchors会自动调用函数Anchors类里面的函数generate_anchors()来生成self.anchors
    anchor = anchors.anchors  #anchor =(anchor_num,4)   (x上,y上,x下,y下)  对应(x1, y1, x2, y2)
    x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
    anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1],
                      1)  #(anchor_num,4)  (cx,cy,w,h)

    total_stride = anchors.stride  #8
    anchor_num = anchor.shape[0]

    #按原来的方式广播得到所有的锚点。复制锚点,然后添加不同位置的偏移量。
    anchor = np.tile(anchor, score_size * score_size).reshape(
        (-1, 4))  #anchor横向扩大score_size*score_size倍
    #anchor.shape = (anchor_num*score_size*score_size,4),每一个anchor有shape为(score_size*score_size,4)的数据,
    #一共anchor_num个anchor数据竖着摞在一起,

    ori = -(score_size // 2) * total_stride  #ori = -96              #0-24
    xx, yy = np.meshgrid(
        [ori + total_stride * dx
         for dx in range(score_size)],  #[-96,-88,80,....,88,96]
        [ori + total_stride * dy for dy in range(score_size)])  #生成网格点坐标矩阵。
    #xx.shape = (score_size,score_size),yy.shape = (score_size,score_size)其实就是(25,25)、

    #把xx展平,然后横向扩大anchor_num倍,yy亦如此
    #此操作之后,xx.shape = (anchor_num*score_size*score_size,),yy.shape = (anchor_num*score_size*score_size,)
    xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
             np.tile(yy.flatten(), (anchor_num, 1)).flatten()   #np.tile,对数组进行重复操作
    anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
    return anchor  #(anchor_num*score_size*score_size,4)
Exemplo n.º 10
0
 def __init__(self, model, device):
     super(DetectionWrapper, self).__init__()
     self.model = model
     self.device = device
     self.anchors = Anchors(cfg.MIN_LEVEL, cfg.MAX_LEVEL, cfg.NUM_SCALES,
                            cfg.ASPECT_RATIOS, cfg.ANCHOR_SCALE,
                            cfg.MODEL.IMAGE_SIZE)
     self._anchor_cache = None
Exemplo n.º 11
0
    def detect_image(self, image):
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        # 图片预处理,归一化
        photo = np.expand_dims(preprocess_input(image), 0)
        anchors = Anchors(self.cfg,
                          image_size=(im_height, im_width)).get_anchors()

        preds = self.retinaface.predict(photo)
        # 将预测结果进行解码和非极大抑制
        results = self.bbox_util.detection_out(
            preds, anchors, confidence_threshold=self.confidence)

        if len(results) <= 0:
            return old_image
        results = np.array(results)
        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            # landms
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        return old_image
Exemplo n.º 12
0
    def __init__(self, num_classes, block, layers):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [self.layer1[layers[0] - 1].conv2.out_channels, self.layer2[layers[1] - 1].conv2.out_channels, 
                        self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels]
        elif block == Bottleneck:
            fpn_sizes = [self.layer1[layers[0] - 1].conv3.out_channels, self.layer2[layers[1] - 1].conv3.out_channels, 
                        self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels]
        else:
            raise ValueError(f"Block type {block} not understood")

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], fpn_sizes[3])

        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes)
        self.contextModel = LevelAttentionModel(256)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.contextLoss = losses.Con()

        self.focalLoss = losses.FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                # init.xavier_normal(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.levelattentionModel.conv5.weight.data.fill_(0)
        self.levelattentionModel.conv5.bias.data.fill_(0)

        self.freeze_bn()
Exemplo n.º 13
0
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     if self.backbone == "mobilenet":
         self.cfg = cfg_mnet
     else:
         self.cfg = cfg_re50
     self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou)
     self.generate()
     self.anchors = Anchors(self.cfg, image_size=(self.input_shape[0], self.input_shape[1])).get_anchors()
Exemplo n.º 14
0
 def __init__(self, model, device):
     super(DetectionEvalWrapper, self).__init__()
     self.model = model
     self.device = device
     self.anchor_boxes = Anchors(cfg.MIN_LEVEL, cfg.MAX_LEVEL,
                                 cfg.NUM_SCALES, cfg.ASPECT_RATIOS,
                                 cfg.ANCHOR_SCALE, cfg.MODEL.IMAGE_SIZE,
                                 device).boxes
     self.model.eval()
Exemplo n.º 15
0
 def __init__(self, model, device, criterion):
     super(DetectionTrainWrapper, self).__init__()
     self.model = model
     self.device = device
     anchors = Anchors(cfg.MIN_LEVEL, cfg.MAX_LEVEL, cfg.NUM_SCALES,
                       cfg.ASPECT_RATIOS, cfg.ANCHOR_SCALE,
                       cfg.MODEL.IMAGE_SIZE, device)
     self.anchor_labeler = AnchorsLabeler(anchors, cfg.NUM_CLASSES)
     self.criterion = criterion
     self.model.train()
 def update(self, newparam=None, anchors=None):
     if newparam:
         for key, value in newparam.items():
             setattr(self, key, value)
     if anchors is not None:
         if isinstance(anchors, dict):
             anchors = Anchors(anchors)
         if isinstance(anchors, Anchors):
             self.total_stride = anchors.stride
             self.ratios = anchors.ratios
             self.scales = anchors.scales
             self.round_dight = anchors.round_dight
     self.renew()
Exemplo n.º 17
0
def generate_anchor(cfg, score_size):
    # cfg = {'stride': 8, 'ratios': [0.33, 0.5, 1, 2, 3], 'scales': [8], 'round_dight': 0}
    # score_size = 25

    anchors = Anchors(cfg)
    anchor = anchors.anchors

    # (Pdb) anchor == anchors.anchors
    # array([[-52., -16.,  52.,  16.],
    #        [-44., -20.,  44.,  20.],
    #        [-32., -32.,  32.,  32.],
    #        [-20., -40.,  20.,  40.],
    #        [-16., -48.,  16.,  48.]], dtype=float32)
    # (Pdb) anchors.anchors.shape -- (5, 4)

    x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
    anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1)

    total_stride = anchors.stride
    # total_stride == 8

    anchor_num = anchor.shape[0]
    # anchor_num -- 5
    anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
    ori = -(score_size // 2) * total_stride
    # (Pdb) ori == -96

    xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
                         [ori + total_stride * dy for dy in range(score_size)])
    xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
             np.tile(yy.flatten(), (anchor_num, 1)).flatten()

    # (Pdb) xx -- array([-96, -88, -80, ...,  80,  88,  96])
    # (Pdb) xx.shape -- (3125,)
    # (Pdb) yy -- array([-96, -96, -96, ...,  96,  96,  96])
    # (Pdb) yy.shape -- (3125,)

    anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)

    # (Pdb) anchor
    # array([[-96., -96., 104.,  32.],
    #        [-88., -96., 104.,  32.],
    #        [-80., -96., 104.,  32.],
    #        ...,
    #        [ 80.,  96.,  32.,  96.],
    #        [ 88.,  96.,  32.,  96.],
    #        [ 96.,  96.,  32.,  96.]], dtype=float32)
    # (Pdb) anchor.shape
    # (3125, 4)

    return anchor
Exemplo n.º 18
0
    def get_FPS(self, image, test_interval):
        #---------------------------------------------------#
        #   把图像转换成numpy的形式
        #---------------------------------------------------#
        image = np.array(image, np.float32)
        #---------------------------------------------------#
        #   计算输入图片的高和宽
        #---------------------------------------------------#
        im_height, im_width, _ = np.shape(image)
        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = letterbox_image(image,
                                    [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()
        #---------------------------------------------------------#
        #   图片预处理,归一化。
        #---------------------------------------------------------#
        photo = np.expand_dims(preprocess_input(image), 0)

        #---------------------------------------------------------#
        #   传入网络进行预测
        #---------------------------------------------------------#
        preds = self.get_pred(photo)
        preds = [pred.numpy() for pred in preds]
        #---------------------------------------------------------#
        #   将预测结果进行解码
        #---------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.anchors, confidence_threshold=self.confidence)

        t1 = time.time()
        for _ in range(test_interval):
            #---------------------------------------------------------#
            #   传入网络进行预测
            #---------------------------------------------------------#
            preds = self.get_pred(photo)
            preds = [pred.numpy() for pred in preds]
            #---------------------------------------------------------#
            #   将预测结果进行解码
            #---------------------------------------------------------#
            results = self.bbox_util.detection_out(
                preds, self.anchors, confidence_threshold=self.confidence)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Exemplo n.º 19
0
    def detect_image(self, image):
        self.confidence = 0.02
        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        #---------------------------------------------------#
        #   计算scale,用于将获得的预测框转换成原图的高宽
        #---------------------------------------------------#
        scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]
        scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0]]

        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()
            
        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = np.expand_dims(preprocess_input(image),0)

        preds = self.get_pred(photo)
        preds = [pred.numpy() for pred in preds]
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(preds, self.anchors, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results)<=0:
            return np.array([])

        results = np.array(results)
        #---------------------------------------------------------#
        #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
        #---------------------------------------------------------#
        if self.letterbox_image:
            results = retinaface_correct_boxes(results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
        
        results[:,:4] = results[:,:4]*scale
        results[:,5:] = results[:,5:]*scale_for_landmarks

        return results
Exemplo n.º 20
0
    def get_FPS(self, image, test_interval):
        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]
        scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0]]
        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()
            
        photo = np.expand_dims(preprocess_input(image),0)
        preds = self.retinaface.predict(photo)
        results = self.bbox_util.detection_out(preds, self.anchors, confidence_threshold=self.confidence)

        if len(results)>0:
            results = np.array(results)
            #---------------------------------------------------------#
            #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
            #---------------------------------------------------------#
            if self.letterbox_image:
                results = retinaface_correct_boxes(results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
        
            results[:,:4] = results[:,:4]*scale
            results[:,5:] = results[:,5:]*scale_for_landmarks
            
        t1 = time.time()
        for _ in range(test_interval):
            preds = self.retinaface.predict(photo)
            results = self.bbox_util.detection_out(preds, self.anchors, confidence_threshold=self.confidence)

            if len(results)>0:
                results = np.array(results)
                #---------------------------------------------------------#
                #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
                #---------------------------------------------------------#
                if self.letterbox_image:
                    results = retinaface_correct_boxes(results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
                
                results[:,:4] = results[:,:4]*scale
                results[:,5:] = results[:,5:]*scale_for_landmarks
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Exemplo n.º 21
0
def generate_anchor(cfg, score_size):
    anchors = Anchors(cfg)
    anchor = anchors.anchors
    x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
    anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1)

    total_stride = anchors.stride
    anchor_num = anchor.shape[0]

    anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
    ori = -(score_size // 2) * total_stride
    xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
                         [ori + total_stride * dy for dy in range(score_size)])
    xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
             np.tile(yy.flatten(), (anchor_num, 1)).flatten()
    anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
    return anchor
Exemplo n.º 22
0
    def __init__(self, num_classes, phi, pretrain_weights=False):
        super(Retinanet, self).__init__()
        self.pretrain_weights = pretrain_weights
        self.backbone_net = Resnet(phi,pretrain_weights)
        fpn_sizes = {
            0: [128, 256, 512],
            1: [128, 256, 512],
            2: [512, 1024, 2048],
            3: [512, 1024, 2048],
            4: [512, 1024, 2048],
        }[phi]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes)
        self.anchors = Anchors()
        self._init_weights()
Exemplo n.º 23
0
    def __init__(self, num_classes=80, phi=0, load_weights=False):
        super(EfficientDetBackbone, self).__init__()
        # phi指的是efficientdet的版本
        self.phi = phi
        # backbone_phi指的是该efficientdet对应的efficient
        self.backbone_phi = [0, 1, 2, 3, 4, 5, 6, 6]
        # BiFPN所用的通道数
        self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384]
        # BiFPN的重复次数
        self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8]
        # 分类头的卷积重复次数
        self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5]
        # 基础的先验框大小
        self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.]
        num_anchors = 9
        conv_channel_coef = {
            0: [40, 112, 320],
            1: [40, 112, 320],
            2: [48, 120, 352],
            3: [48, 136, 384],
            4: [56, 160, 448],
            5: [64, 176, 512],
            6: [72, 200, 576],
            7: [72, 200, 576],
        }

        self.bifpn = nn.Sequential(*[
            BiFPN(self.fpn_num_filters[self.phi],
                  conv_channel_coef[phi],
                  True if _ == 0 else False,
                  attention=True if phi < 6 else False)
            for _ in range(self.fpn_cell_repeats[phi])
        ])

        self.num_classes = num_classes
        self.regressor = BoxNet(in_channels=self.fpn_num_filters[self.phi],
                                num_anchors=num_anchors,
                                num_layers=self.box_class_repeats[self.phi])

        self.classifier = ClassNet(in_channels=self.fpn_num_filters[self.phi],
                                   num_anchors=num_anchors,
                                   num_classes=num_classes,
                                   num_layers=self.box_class_repeats[self.phi])
        self.anchors = Anchors(anchor_scale=self.anchor_scale[phi])

        self.backbone_net = EfficientNet(self.backbone_phi[phi], load_weights)
Exemplo n.º 24
0
    def __init__(self, num_classes=80, phi=0, load_weight=False):
        super(EfficientDet_BackBone, self).__init__()
        self.phi = phi
        self.backbone_phi = [0, 1, 2, 3, 4, 5, 6, 6]
        # self.backbone_phi = [0, 1, 2, 3, 4, 5, 6, 7]
        self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384]
        self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8]
        self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5]
        self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.]

        num_anchors = 9
        # 在著网络输出时,p3-p5的通道数目
        conv_channel_coef = {
            0: [40, 112, 320],
            1: [40, 112, 320],
            2: [48, 120, 352],
            3: [48, 136, 384],
            4: [56, 160, 448],
            5: [64, 176, 512],
            6: [72, 200, 576],
            7: [72, 200, 576],
        }

        self.bifpn = nn.Sequential(*[
            BiFPN(self.fpn_num_filters[self.phi],
                  conv_channel_coef[phi],
                  True if _ == 0 else False,
                  attention=True if phi < 6 else False)
            for _ in range(self.fpn_cell_repeats[phi])
        ])

        self.num_classes = num_classes

        self.regressor = Box_Block(in_channels=self.fpn_num_filters[self.phi],
                                   num_anchors=num_anchors,
                                   num_layers=self.box_class_repeats[self.phi])
        self.classifier = Class_Block(
            in_channels=self.fpn_num_filters[self.phi],
            num_anchors=num_anchors,
            num_layers=self.box_class_repeats[self.phi],
            num_classes=num_classes)
        self.anchors = Anchors(anchor_scale=self.anchor_scale[phi])
        self.bockbone_net = EfficientNet(self.backbone_phi[phi],
                                         load_weight=load_weight)
Exemplo n.º 25
0
    def __init__(self, pretrain=False, anchors=None, o_sz=127, g_sz=127):
        super(Custom, self).__init__()
        self.anchors = anchors  # anchor_cfg
        self.anchor_num = len(self.anchors["ratios"]) * len(
            self.anchors["scales"])
        self.anchor = Anchors(anchors)
        self.o_sz = o_sz
        self.g_sz = g_sz
        self.upSample = nn.Upsample(size=[g_sz, g_sz],
                                    mode='bilinear',
                                    align_corners=True)
        self.features = ResDown(pretrain=pretrain)
        self.rpn_model = UP(anchor_num=self.anchor_num,
                            feature_in=256,
                            feature_out=256)
        self.mask_model = MaskCorr()
        self.refine_model = Refine()

        self.all_anchors = None
Exemplo n.º 26
0
class SiamMask(nn.Module):
    def __init__(self, anchors=None, o_sz=127, g_sz=127):
        super(SiamMask, self).__init__()
        self.anchors = anchors  # anchor_cfg
        self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"])
        self.anchor = Anchors(anchors)
        self.features = None
        self.rpn_model = None
        self.mask_model = None
        self.o_sz = o_sz
        self.g_sz = g_sz
        self.all_anchors = None

    def set_all_anchors(self, image_center, size):
        # cx,cy,w,h
        if not self.anchor.generate_all_anchors(image_center, size):
            return
        all_anchors = self.anchor.all_anchors[1]  # cx, cy, w, h
        self.all_anchors = torch.from_numpy(all_anchors).float().cuda()
        self.all_anchors = [self.all_anchors[i] for i in range(4)]

    def feature_extractor(self, x):
        return self.features(x)

    def rpn(self, template, search):
        pred_cls, pred_loc = self.rpn_model(template, search)
        return pred_cls, pred_loc

    def mask(self, template, search):
        pred_mask = self.mask_model(template, search)
        return pred_mask

    def template(self, z):
        self.zf = self.feature_extractor(z)
        cls_kernel, loc_kernel = self.rpn_model.template(self.zf)
        return cls_kernel, loc_kernel

    def track(self, x, cls_kernel=None, loc_kernel=None, softmax=False):
        xf = self.feature_extractor(x)
        rpn_pred_cls, rpn_pred_loc = self.rpn_model.track(xf, cls_kernel, loc_kernel)
        if softmax:
            rpn_pred_cls = self.softmax(rpn_pred_cls)
        return rpn_pred_cls, rpn_pred_loc
Exemplo n.º 27
0
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        for name, value in kwargs.items():
            setattr(self, name, value)

        #---------------------------------------------------#
        #   不同主干网络的config信息
        #---------------------------------------------------#
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50

        #---------------------------------------------------#
        #   工具箱和先验框的生成
        #---------------------------------------------------#
        self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou)
        self.anchors = Anchors(self.cfg,
                               image_size=(self.input_shape[0],
                                           self.input_shape[1])).get_anchors()
        self.generate()
Exemplo n.º 28
0
    def __init__(self, num_classes, phi, pretrained=False):
        super(retinanet, self).__init__()
        self.pretrained = pretrained
        #-----------------------------------------#
        #   取出三个有效特征层,分别是C3、C4、C5
        #   假设输入图像为600,600,3
        #   当我们使用resnet50的时候
        #   C3     75,75,512
        #   C4     38,38,1024
        #   C5     19,19,2048
        #-----------------------------------------#
        self.backbone_net = Resnet(phi, pretrained)
        fpn_sizes = {
            0: [128, 256, 512],
            1: [128, 256, 512],
            2: [512, 1024, 2048],
            3: [512, 1024, 2048],
            4: [512, 1024, 2048],
        }[phi]

        #-----------------------------------------#
        #   经过FPN可以获得5个有效特征层分别是
        #   P3     75,75,256
        #   P4     38,38,256
        #   P5     19,19,256
        #   P6     10,10,256
        #   P7     5,5,256
        #-----------------------------------------#
        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
        #----------------------------------------------------------#
        #   将获取到的P3, P4, P5, P6, P7传入到
        #   Retinahead里面进行预测,获得回归预测结果和分类预测结果
        #   将所有特征层的预测结果进行堆叠
        #----------------------------------------------------------#
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256,
                                                       num_classes=num_classes)
        self.anchors = Anchors()
        self._init_weights()
Exemplo n.º 29
0
    def __init__(self, cfg, anchor_cfg, num_epoch=1):
        super(DataSets, self).__init__()
        global logger
        logger = logging.getLogger('global')

        # anchors
        self.anchors = Anchors(anchor_cfg)
        # size
        self.template_size = 127
        self.origin_size = 127
        self.search_size = 255
        self.size = 17
        self.base_size = 0
        self.crop_size = 0
        # 根据配置文件更新参数
        if 'template_size' in cfg:
            self.template_size = cfg['template_size']
        if 'origin_size' in cfg:
            self.origin_size = cfg['origin_size']
        if 'search_size' in cfg:
            self.search_size = cfg['search_size']
        if 'base_size' in cfg:
            self.base_size = cfg['base_size']
        if 'size' in cfg:
            self.size = cfg['size']

        if (self.search_size - self.template_size) / self.anchors.stride + 1 + self.base_size != self.size:
            raise Exception("size not match!")  # TODO: calculate size online
        if 'crop_size' in cfg:
            self.crop_size = cfg['crop_size']
        self.template_small = False
        if 'template_small' in cfg and cfg['template_small']:
            self.template_small = True
        # 生成anchor
        self.anchors.generate_all_anchors(im_c=self.search_size//2, size=self.size)
        if 'anchor_target' not in cfg:
            cfg['anchor_target'] = {}
        # 生成anchor的信息:cls,reg,mask
        self.anchor_target = AnchorTargetLayer(cfg['anchor_target'])

        # data sets
        if 'datasets' not in cfg:
            raise(Exception('DataSet need "{}"'.format('datasets')))

        self.all_data = []
        start = 0
        self.num = 0
        for name in cfg['datasets']:
            dataset = cfg['datasets'][name]
            dataset['mark'] = name
            dataset['start'] = start
            # 加载数据
            dataset = SubDataSet(dataset)
            dataset.log()
            self.all_data.append(dataset)
            # 数据数量
            start += dataset.num  # real video number
            # 打乱的数据数量
            self.num += dataset.num_use  # the number used for subset shuffle

        # 数据增强data augmentation
        aug_cfg = cfg['augmentation']
        self.template_aug = Augmentation(aug_cfg['template'])
        self.search_aug = Augmentation(aug_cfg['search'])
        self.gray = aug_cfg['gray']
        self.neg = aug_cfg['neg']
        self.inner_neg = 0 if 'inner_neg' not in aug_cfg else aug_cfg['inner_neg']

        self.pick = None  # list to save id for each img
        if 'num' in cfg:  # number used in training for all dataset
            self.num = int(cfg['num'])
        self.num *= num_epoch
        self.shuffle()

        self.infos = {
                'template': self.template_size,
                'search': self.search_size,
                'template_small': self.template_small,
                'gray': self.gray,
                'neg': self.neg,
                'inner_neg': self.inner_neg,
                'crop_size': self.crop_size,
                'anchor_target': self.anchor_target.__dict__,
                'num': self.num // num_epoch
                }
        logger.info('dataset informations: \n{}'.format(json.dumps(self.infos, indent=4)))
Exemplo n.º 30
0
class DataSets(Dataset):
    def __init__(self, cfg, anchor_cfg, num_epoch=1):
        super(DataSets, self).__init__()
        global logger
        logger = logging.getLogger('global')

        # anchors
        self.anchors = Anchors(anchor_cfg)
        # size
        self.template_size = 127
        self.origin_size = 127
        self.search_size = 255
        self.size = 17
        self.base_size = 0
        self.crop_size = 0
        # 根据配置文件更新参数
        if 'template_size' in cfg:
            self.template_size = cfg['template_size']
        if 'origin_size' in cfg:
            self.origin_size = cfg['origin_size']
        if 'search_size' in cfg:
            self.search_size = cfg['search_size']
        if 'base_size' in cfg:
            self.base_size = cfg['base_size']
        if 'size' in cfg:
            self.size = cfg['size']

        if (self.search_size - self.template_size) / self.anchors.stride + 1 + self.base_size != self.size:
            raise Exception("size not match!")  # TODO: calculate size online
        if 'crop_size' in cfg:
            self.crop_size = cfg['crop_size']
        self.template_small = False
        if 'template_small' in cfg and cfg['template_small']:
            self.template_small = True
        # 生成anchor
        self.anchors.generate_all_anchors(im_c=self.search_size//2, size=self.size)
        if 'anchor_target' not in cfg:
            cfg['anchor_target'] = {}
        # 生成anchor的信息:cls,reg,mask
        self.anchor_target = AnchorTargetLayer(cfg['anchor_target'])

        # data sets
        if 'datasets' not in cfg:
            raise(Exception('DataSet need "{}"'.format('datasets')))

        self.all_data = []
        start = 0
        self.num = 0
        for name in cfg['datasets']:
            dataset = cfg['datasets'][name]
            dataset['mark'] = name
            dataset['start'] = start
            # 加载数据
            dataset = SubDataSet(dataset)
            dataset.log()
            self.all_data.append(dataset)
            # 数据数量
            start += dataset.num  # real video number
            # 打乱的数据数量
            self.num += dataset.num_use  # the number used for subset shuffle

        # 数据增强data augmentation
        aug_cfg = cfg['augmentation']
        self.template_aug = Augmentation(aug_cfg['template'])
        self.search_aug = Augmentation(aug_cfg['search'])
        self.gray = aug_cfg['gray']
        self.neg = aug_cfg['neg']
        self.inner_neg = 0 if 'inner_neg' not in aug_cfg else aug_cfg['inner_neg']

        self.pick = None  # list to save id for each img
        if 'num' in cfg:  # number used in training for all dataset
            self.num = int(cfg['num'])
        self.num *= num_epoch
        self.shuffle()

        self.infos = {
                'template': self.template_size,
                'search': self.search_size,
                'template_small': self.template_small,
                'gray': self.gray,
                'neg': self.neg,
                'inner_neg': self.inner_neg,
                'crop_size': self.crop_size,
                'anchor_target': self.anchor_target.__dict__,
                'num': self.num // num_epoch
                }
        logger.info('dataset informations: \n{}'.format(json.dumps(self.infos, indent=4)))

    def imread(self, path):
        # 数据读取
        img = cv2.imread(path)
        if self.origin_size == self.template_size:
            # 返回图像
            return img, 1.0

        def map_size(exe, size):
            return int(round(((exe + 1) / (self.origin_size + 1) * (size+1) - 1)))
        # 尺寸调整
        nsize = map_size(self.template_size, img.shape[1])
        # 调整图像大小
        img = cv2.resize(img, (nsize, nsize))
        # 返回图像和缩放比例
        return img, nsize / img.shape[1]

    def shuffle(self):
        "打乱"
        pick = []
        m = 0
        # 获取数据
        while m < self.num:
            p = []
            for subset in self.all_data:
                sub_p = subset.shuffle()
                p += sub_p
            # 打乱数据
            sample_random.shuffle(p)
            # 将打乱的结果进行拼接
            pick += p
            m = len(pick)
        # 将打乱的结果赋值给pick
        self.pick = pick
        logger.info("shuffle done!")
        logger.info("dataset length {}".format(self.num))

    def __len__(self):
        return self.num

    def find_dataset(self, index):
        "查找数据"
        for dataset in self.all_data:
            if dataset.start + dataset.num > index:
                # 返回索引范围内的数据
                return dataset, index - dataset.start

    def __getitem__(self, index, debug=False):
        # 在打乱的结果中找到索引
        index = self.pick[index]
        # 查找得到数据
        dataset, index = self.find_dataset(index)
        # 灰度图
        gray = self.gray and self.gray > random.random()
        # 负样本
        neg = self.neg and self.neg > random.random()
        # 负样本
        if neg:
            # 获取template
            template = dataset.get_random_target(index)
            # 根据设置,从数据生成负样本或随机选择负样本
            if self.inner_neg and self.inner_neg > random.random():
                search = dataset.get_random_target()
            else:
                search = random.choice(self.all_data).get_random_target()
        else:
            # 获得正样本对
            template, search = dataset.get_positive_pair(index)
        # 裁剪图像的中央大小为size的部分
        def center_crop(img, size):
            # 获取图像的形状
            shape = img.shape[1]
            # 若为size,则直接返回
            if shape == size: return img
            # 否则,裁剪中央位置为size大小的图像
            c = shape // 2
            l = c - size // 2
            r = c + size // 2 + 1
            return img[l:r, l:r]
        # 读取模板图像
        template_image, scale_z = self.imread(template[0])
        # 若设置为小模板时,则从模板图像中进行裁剪
        if self.template_small:
            template_image = center_crop(template_image, self.template_size)
        # 读取待搜索图像
        search_image, scale_x = self.imread(search[0])
        # 若存在掩膜并且不是负样本数据
        if dataset.has_mask and not neg:
            # 读取掩膜数据
            search_mask = (cv2.imread(search[2], 0) > 0).astype(np.float32)
        else:
            # 掩膜数据用全零数组替代
            search_mask = np.zeros(search_image.shape[:2], dtype=np.float32)
        # 若裁剪size大于0,对搜索图像和掩膜进行裁剪
        if self.crop_size > 0:
            search_image = center_crop(search_image, self.crop_size)
            search_mask = center_crop(search_mask, self.crop_size)
        # 根据图像大小生成bbox,shape是模板图像中bbox的形状
        def toBBox(image, shape):
            # 图像的大小
            imh, imw = image.shape[:2]
            # 获取shape的宽高
            if len(shape) == 4:
                w, h = shape[2]-shape[0], shape[3]-shape[1]
            else:
                w, h = shape
            # 扩展比例
            context_amount = 0.5
            # 模板尺寸
            exemplar_size = self.template_size  # 127
            # 获取宽高
            wc_z = w + context_amount * (w+h)
            hc_z = h + context_amount * (w+h)
            # 等效边长
            s_z = np.sqrt(wc_z * hc_z)
            # 比例
            scale_z = exemplar_size / s_z
            # 宽高
            w = w*scale_z
            h = h*scale_z
            # 中心点坐标
            cx, cy = imw//2, imh//2
            bbox = center2corner(Center(cx, cy, w, h))
            return bbox
        # 生成模板图像和待搜索图像中的bbox
        template_box = toBBox(template_image, template[1])
        search_box = toBBox(search_image, search[1])
        # 模板数据增强
        template, _, _ = self.template_aug(template_image, template_box, self.template_size, gray=gray)
        # 待搜索图像的数据增强
        search, bbox, mask = self.search_aug(search_image, search_box, self.search_size, gray=gray, mask=search_mask)

        # def draw(image, box, name):
        #     image = image.copy()
        #     x1, y1, x2, y2 = map(lambda x: int(round(x)), box)
        #     cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0))
        #     cv2.imwrite(name, image)
        #
        # if debug:
        #     draw(template_image, template_box, "debug/{:06d}_ot.jpg".format(index))
        #     draw(search_image, search_box, "debug/{:06d}_os.jpg".format(index))
        #     draw(template, _, "debug/{:06d}_t.jpg".format(index))
        #     draw(search, bbox, "debug/{:06d}_s.jpg".format(index))
        # 生成anchor对应的信息
        cls, delta, delta_weight = self.anchor_target(self.anchors, bbox, self.size, neg)
        if dataset.has_mask and not neg:
            # 掩膜图像
            mask_weight = cls.max(axis=0, keepdims=True)
        else:
            mask_weight = np.zeros([1, cls.shape[1], cls.shape[2]], dtype=np.float32)
        # 模板和搜索图像
        template, search = map(lambda x: np.transpose(x, (2, 0, 1)).astype(np.float32), [template, search])
        # 掩膜结果
        mask = (np.expand_dims(mask, axis=0) > 0.5) * 2 - 1  # 1*H*W
        # 返回结果
        return template, search, cls, delta, delta_weight, np.array(bbox, np.float32), \
               np.array(mask, np.float32), np.array(mask_weight, np.float32)