    def __call__(self, image, bbox, size, gray=False):
        :param image: crop后的图像,大小511*511,模板图像已经对齐到图像中心,
        :param bbox: 带有上下文信息的box大小,在crop511坐标系下的坐标
        :param size: 网络输入时模板大小127*127,或者搜索区域大小255*255
        :param gray: 是否进行灰度化
        shape = image.shape                #固定大小511*511
        crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2,              #要从image中抠出搜索区域,这里计算出模板在图中左上角和右下角的坐标
                                         size-1, size-1))
        # gray augmentation(如果随机选择过程要进行灰度化,则先将彩色图像转化为灰度,在从灰度转化为3通道“彩图”)
        if gray:
            image = self._gray_aug(image)

        # shift scale augmentation
        # 在这里真正完成图像的扣取操作,和简单的缩放操作
        image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size)

        # color augmentation
        if self.color > np.random.random():
            image = self._color_aug(image)

        # blur augmentation,按照目标和区域的面积比限制卷积核的尺寸,否则模糊核过大,看不到目标
        _, _, w, h = corner2center(bbox)
        area_ratio = (w*h*1.0)/(size*size)
        if self.blur > np.random.random():
            image = self._blur_aug(image,area_ratio)

        # flip augmentation
        if self.flip and self.flip > np.random.random():
            image, bbox = self._flip_aug(image, bbox)
        return image, bbox
    def perturb(self, bbox, sz):
        # cx, cy, w, h = get_axis_aligned_bbox(np.array(bbox))

        cx = (bbox.x1 + bbox.x2) / 2
        cy = (bbox.y1 + bbox.y2) / 2
        w = np.abs(bbox.x1 - bbox.x2)
        h = np.abs(bbox.y1 - bbox.y2)

        # w = np.abs(sz - w)/2
        # h = np.abs(sz - h)/2

        # cx = np.abs(sz - cx)
        # cy = np.abs(sz - cy)
        # rx, ry = np.random.random(size=2)
        # if sz/4 < cx < 3*sz/4 and rx > 0.5:
        #     cx = sz - cx
        # if sz/4 < cy < 3*sz/4 and ry > 0.5:
        #     cy = sz - cy

        # bbox = [cx-w/2, cy-y/2, cx-w/2, cy+y/2, cx+w/2, cy-y/2, cx+w/2, cy+y/2]

        # bbox = np.array([cx - w, cy - h, w//2, h//2])
        bbox = np.array([cx, cy, w, h])
        return center2corner(bbox)
    def _get_bbox(self, image, shape):
        :param image:
        :param shape:
        imh, imw = image.shape[:2]
        if len(shape) == 4:
            w, h = shape[2] - shape[0], shape[3] - shape[1]
            w, h = shape


        context_amount = 0.5  #上下文占用的比例,gt构成的box再加上一定比例的上下文图像内容,认为是模板区域
        exemplar_size = cfg.TRAIN.EXEMPLAR_SIZE
        wc_z = w + context_amount * (w + h)
        hc_z = h + context_amount * (w + h)
        s_z = np.sqrt(wc_z * hc_z)
        scale_z = exemplar_size / s_z  #模板在网络中默认为127*127大小的,在crop数据集合的时候,把具有上下问的模板区域resize成了127*127,所以w,h要同比例缩放
        w = w * scale_z
        h = h * scale_z
        cx, cy = imw // 2, imh // 2  #因为在制作数据集合的时候,模板区域已经默认对齐到图像中心
        bbox = center2corner(Center(cx, cy, w, h))
        return bbox  #bbox的中心就是在511*511图的中心,这里输出转化为【x1,y1,x2,y2】的形式
    def _shift_scale_aug(self, image, bbox, crop_bbox, size):
        ''' 对具有上下文信息的gt bbox进行位移和缩放调整,然后输出的bbox,和对应的图像区域
        :param image:
        :param bbox:  带有上下文信息的box(gt值),在crop511坐标系下的坐标
        :param crop_bbox: 要crop的bbox位置信息 127*127或者255*255
        :param size: 期望crop出来的区域尺寸,网络输入时模板大小127*127,或者搜索区域大小255*255
        im_h, im_w = image.shape[:2]

        # adjust crop bounding box
        crop_bbox_center = corner2center(crop_bbox)          #对要crop输出的box进行大小调整和位移调整
        if self.scale:
            scale_x = (1.0 + Augmentation.random() * self.scale)
            scale_y = (1.0 + Augmentation.random() * self.scale)
            h, w = crop_bbox_center.h, crop_bbox_center.w
            scale_x = min(scale_x, float(im_w) / w)         #对要crop输出的box的w,h进行调整,取最小值是为了上搜索区域w,h不要超过图像区域
            scale_y = min(scale_y, float(im_h) / h)
            crop_bbox_center = Center(crop_bbox_center.x,
                                      crop_bbox_center.w * scale_x,
                                      crop_bbox_center.h * scale_y)

        crop_bbox = center2corner(crop_bbox_center)

        if self.shift:
            sx = Augmentation.random() * self.shift             #siamese rpn++ 论文中讨论了shift最大范围的时候能够一定程度上解决网络学习过程中的位置偏见问题
            sy = Augmentation.random() * self.shift
           # print("shift", self.shift,sx,sy)
            x1, y1, x2, y2 = crop_bbox

            sx = max(-x1, min(im_w - 1 - x2, sx))   #min(im_w - 1 - x2, sx) 保证x2+sx不会超出图像右边界,也就是即使平移搜索区域,右边也不要超出右边图像边界,max(-x1,xxx)是保证x1+xxx不会小鱼0,也就是即使平移搜索区域,左边也不会超出左边图像边界
            sy = max(-y1, min(im_h - 1 - y2, sy))

            crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy)

        # adjust target bounding box  要crop的box的变换上面已经确定,这里需要将他的gt信息也同样做调整
        x1, y1 = crop_bbox.x1, crop_bbox.y1
        # 以要crop输出的box的左上角为参考点,计算bbox新的坐标,也就是相应得修改gt的信息,与要crop的内容保持一致
        # 输出的bbox是相对与127*127或者255*255图像下的坐标,
        bbox = Corner(bbox.x1 - x1, bbox.y1 - y1,
                      bbox.x2 - x1, bbox.y2 - y1)

        if self.scale:
            bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y,
                          bbox.x2 / scale_x, bbox.y2 / scale_y)

        image = self._crop_roi(image, crop_bbox, size)     #扣取出要crop的区域
        return image, bbox
    def generate_all_anchors(self, im_c, size):
        im_c: image center (搜索区域图像的中心 255//2)
        size: image size  (输出相关操作后的特征图的大小17*17)
        if self.image_center == im_c and self.size == size:
            return False
        self.image_center = im_c
        self.size = size

        a0x = im_c - size // 2 * self.stride  #在输入分辨下,相关面与搜索区域中心对齐,相关面左上角的坐标,也就模板第一次卷积对应的中心位置,通过加减0.5w得到anchor左上右下的坐标
        ori = np.array([a0x] * 4, dtype=np.float32)
        zero_anchors = self.anchors + ori  #为坐上角那个点产生anchor  大小为[n,4]

        x1 = zero_anchors[:, 0]  #大小为n
        y1 = zero_anchors[:, 1]
        x2 = zero_anchors[:, 2]
        y2 = zero_anchors[:, 3]
        x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1),
                             [x1, y1, x2, y2])
        cx, cy, w, h = corner2center([x1, y1, x2,
                                      y2])  #shape 为【anchor_nums,1,1]
        disp_x = np.arange(0, size).reshape(
            1, 1, -1) * self.stride  #shape为【1,1,size】
        disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride

        cx = cx + disp_x  # shape为【anchor_nums,1,size]
        cy = cy + disp_y  # shape为 [anchor_nums,size,1]

        # broadcast 为每个点产生anchor
        zero = np.zeros((self.anchor_num, size, size), dtype=np.float32)
        cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h])
        x1, y1, x2, y2 = center2corner([cx, cy, w, h])
        self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32),
                            np.stack([cx, cy, w, h]).astype(np.float32))
        return True
    def __call__(self, image, bbox, size, gray=False):
        shape = image.shape
        # size for template and search region are defined as : [127, 255]
        crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2,
                                         size-1, size-1))
        # gray augmentation
        if gray:
            image = self._gray_aug(image)

        # shift scale augmentation, two types augmentation !
        image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size)

        # color augmentation
        if self.color > np.random.random():
            image = self._color_aug(image)

        # blur augmentation
        if self.blur > np.random.random():
            image = self._blur_aug(image)

        # flip augmentation
        if self.flip and self.flip > np.random.random():
            image, bbox = self._flip_aug(image, bbox)
        return image, bbox
    def track(self, img):
            img(np.ndarray): BGR image
            bbox(list):[x, y, width, height]
        w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
        h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
        s_z = np.sqrt(w_z * h_z)
        scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z
        s_x = s_z * (cfg.TRACK.INSTANCE_SIZE / cfg.TRACK.EXEMPLAR_SIZE)
        x_crop = self.get_subwindow(img, self.center_pos,
                                    round(s_x), self.channel_average)

        # 'cls': cls,
        # 'loc': loc,
        # 'xf': xf,
        # 'mask': mask if cfg.MASK.MASK else None
        outputs = self.model.track(x_crop, self.xf_crops)

        score = self._convert_score(outputs['cls'])
        pred_bbox = self._convert_bbox(outputs['loc'], self.anchors)

        def change(r):
            return np.maximum(r, 1. / r)

        def sz(w, h):
            pad = (w + h) * 0.5
            return np.sqrt((w + pad) * (h + pad))

        # scale penalty
        s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
                     (sz(self.size[0]*scale_z, self.size[1]*scale_z)))

        # aspect ratio penalty
        r_c = change((self.size[0]/self.size[1]) /
                     (pred_bbox[2, :]/pred_bbox[3, :]))
        penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K)
        pscore = penalty * score

        # window penalty
        # pscore *= self.window
        pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \
            self.window * cfg.TRACK.WINDOW_INFLUENCE
        best_idx = np.argmax(pscore)

        bbox = pred_bbox[:, best_idx]
        iou = IoU(center2corner(bbox), center2corner(np.transpose(self.anchors)))
        bbox /= scale_z
        lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR

        cx = bbox[0] + self.center_pos[0]
        cy = bbox[1] + self.center_pos[1]

        # smooth bbox
        width = self.size[0] * (1 - lr) + bbox[2] * lr
        height = self.size[1] * (1 - lr) + bbox[3] * lr

        # clip boundary
        cx, cy, width, height = self._bbox_clip(cx, cy, width,
                                                height, img.shape[:2])

        # udpate state
        self.center_pos = np.array([cx, cy])
        self.size = np.array([width, height])

        bbox = [cx - width / 2,
                cy - height / 2,
        best_score = score[best_idx]

        # crop search region for feature transform
        _, iy, ix = np.unravel_index(best_idx, [5, 25, 25])
        iy += 3
        ix += 3
        self.xf_crops = [o[:, :, iy-3:iy+4, ix-3:ix+4].contiguous() for o in outputs['xf']]

        return {
                'bbox': bbox,
                'best_score': best_score,
                'best_idx': best_idx,
                'pscore': pscore,
                'score': score,
                'xf': outputs['xf'],
    def __call__(self, image, bbox, size, data, gray=False):
        shape = image.shape
        cv2.imwrite('511.jpg', image)  # image:[511,511,3]

        if data == 'template':
            image1 = np.zeros((127, 127, 3))
            for i in range(127):
                for j in range(127):
                    for k in range(3):
                        if k == 0:
                            image1[i, j, k] = 87
                        elif k == 1:
                            image1[i, j, k] = 135
                        elif k == 2:
                            image1[i, j, k] = 123

        crop_bbox = center2corner(
            Center(shape[0] // 2, shape[1] // 2, size - 1, size - 1))
        # gray augmentation
        if gray:
            image = self._gray_aug(image)

        # shift scale augmentation
        image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size)
        #cv2.imwrite('127_255.jpg', image)  # image:[127,127,3] 或 [255,255,3]
        crop_bbox = center2corner(
            Center(shape[0] // 2, shape[1] // 2, size - 1, size - 1))
        # color augmentation
        if self.color > np.random.random():
            image = self._color_aug(image)

        # blur augmentation
        if self.blur > np.random.random():
            image = self._blur_aug(image)

        # flip augmentation
        if self.flip and self.flip > np.random.random():
            image, bbox = self._flip_aug(image, bbox)

        if data == 'template':
            # visual bounding box
            cv2.rectangle(image, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (0, 0, 255),
                          thickness=1)  # 红[0,0,255]
            cv2.imwrite('127_bbox.jpg', image)  # image:[255,255,3]

            image_l = image1
            image_t = image1.copy()
            image_b = image1.copy()
            image_r = image1.copy()
            image_l[int(bbox[1]):(int(bbox[3])), int(bbox[0]):(int(bbox[0]+cfg.corners.crop_size)), :] =\
                image[int(bbox[1]):(int(bbox[3])), int(bbox[0]):(int(bbox[0]+cfg.corners.crop_size)), :]
            cv2.imwrite('crop_l.jpg', image_l)  # image:[255,255,3]

            #cv2.imwrite('127_bbox——2.jpg', image)  # image:[255,255,3]
            #cv2.imwrite('127_bbox--3.jpg', image_t)  # image:[255,255,3]
            image_t[int(bbox[1]):(int(bbox[1]+cfg.corners.crop_size)), int(bbox[0]):(int(bbox[2])), :] = \
                image[int(bbox[1]):(int(bbox[1]+cfg.corners.crop_size)), int(bbox[0]):(int(bbox[2])), :]
            cv2.imwrite('crop_t.jpg', image_t)  # image:[255,255,3]

            image_b[(int(bbox[3] - cfg.corners.crop_size)):int(bbox[3]), int(bbox[0]):(int(bbox[2])), :] = \
                image[(int(bbox[3] - cfg.corners.crop_size)):int(bbox[3]), int(bbox[0]):(int(bbox[2])), :]
            cv2.imwrite('crop_b.jpg', image_b)  # image:[255,255,3]

            image_r[int(bbox[1]):(int(bbox[3])), (int(bbox[2]-cfg.corners.crop_size)):int(bbox[2]), :] = \
                image[int(bbox[1]):(int(bbox[3])), (int(bbox[2]-cfg.corners.crop_size)):int(bbox[2]), :]
            cv2.imwrite('crop_r.jpg', image_r)  # image:[255,255,3]

        if data == 'search':
            attentions = [
                np.zeros((1, cfg.atts.att_size, cfg.atts.att_size),
            ]  # 25 为attention map大小
            # tl_heats_map
            tl_heats = np.zeros(
                (1, cfg.corners.cor_size, cfg.corners.cor_size),
                dtype=np.float32)  # [1,25,25]
            br_heats = np.zeros(
                (1, cfg.corners.cor_size, cfg.corners.cor_size),
            # tl_valids
            tl_regrs = np.zeros((cfg.corners.offs_max_objects, 2),
            br_regrs = np.zeros((cfg.corners.offs_max_objects, 2),
            tl_tags = np.zeros((cfg.corners.offs_max_objects), dtype=np.int64)
            br_tags = np.zeros((cfg.corners.offs_max_objects), dtype=np.int64)
            tl_valids = np.zeros(
                (1, cfg.corners.cor_size, cfg.corners.cor_size),
                dtype=np.float32)  # [1,25,25]
            br_valids = np.zeros(
                (1, cfg.corners.cor_size, cfg.corners.cor_size),
            tag_masks = np.ones((cfg.corners.offs_max_objects), dtype=np.uint8)
            tag_lens = 0

            #atts_map, x_int, y_int, x_float, y_float = self.create_attention_mask(attentions, cfg.TRAIN.ratios, bbox) # image:[255,255,3] x_int,y_int为目标中心点坐标
            atts_map = []

            xtl, ytl = bbox[0], bbox[1]  # 图大小为255的坐标
            xbr, ybr = bbox[2], bbox[3]

            det_height = int(ybr) - int(ytl)
            det_width = int(xbr) - int(xtl)
            det_max = max(det_height, det_width)

            min_scale = 16
            valid = det_max >= min_scale  # min_scale:16

            fxtl = (xtl * cfg.corners.Ratios)  # width_ratio:由255-->25的缩放比例
            fytl = (ytl * cfg.corners.Ratios)
            fxbr = (xbr * cfg.corners.Ratios)
            fybr = (ybr * cfg.corners.Ratios)

            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)

            width = bbox[2] - bbox[0]
            height = bbox[3] - bbox[1]

            # visual bounding box
            #cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), thickness=1)  # 红[0,0,255]
            #cv2.imwrite('255.jpg', image) # image:[255,255,3]

            width = math.ceil(width * cfg.corners.Ratios)
            height = math.ceil(height * cfg.corners.Ratios)

            if cfg.corners.gaussian_rad == -1:
                radius = gaussian_radius((height, width),
                radius = max(0, int(radius))
                radius = cfg.corners.gaussian_rad

            if valid:
                draw_gaussian(tl_heats[0], [xtl, ytl], radius)
                draw_gaussian(br_heats[0], [xbr, ybr], radius)
                tl_regrs[0, :] = [fxtl - xtl, fytl - ytl]  # tl_regrs:[5,128,2]
                br_regrs[0, :] = [fxbr - xbr, fybr - ybr]
                tl_tags[0] = max(
                    min(ytl * cfg.corners.cor_size + xtl,
                        cfg.corners.cor_size * cfg.corners.cor_size -
                        1))  # 坐标索引 ytl为取整后
                br_tags[0] = max(
                    min(ybr * cfg.corners.cor_size + xbr,
                        cfg.corners.cor_size * cfg.corners.cor_size - 1))
                draw_gaussian(tl_valids[b_ind, category], [xtl, ytl],
                              radius)  # 得到上左masked_heatmap
                draw_gaussian(br_valids[b_ind, category], [xbr, ybr], radius)

            tl_valids = (tl_valids == 0).astype(np.float32)
            br_valids = (br_valids == 0).astype(np.float32)

            #tag_masks[:1] = 1

            atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags = [], [],\
            [], [], [], [], [], [], [], []
        if x_int:
            tag_masks = np.ones((cfg.offs.max_objects), dtype=np.uint8)
            tl_regrs  = np.zeros((cfg.offs.max_objects, 2), dtype=np.float32)  # max_objects:1
            tl_regrs[0, :] = [x_float - x_int, y_float - y_int]  # tl_regrs:[5,128,2]
            tl_tags = np.zeros((cfg.offs.max_objects), dtype=np.int64)
            tl_tags[0] = y_int * cfg.offs.off_size + x_int  # 坐标索引 ytl为取整后
            tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags = [], [], [], [], [], [], [], [], []
        if data == 'template':
            return image_t, image_l, image_b, image_r, bbox, atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags,\
            return image, bbox, atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, \