def __call__(self, image, bbox, size, gray=False): ''' :param image: crop后的图像,大小511*511,模板图像已经对齐到图像中心, :param bbox: 带有上下文信息的box大小,在crop511坐标系下的坐标 :param size: 网络输入时模板大小127*127,或者搜索区域大小255*255 :param gray: 是否进行灰度化 :return: ''' shape = image.shape #固定大小511*511 crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2, #要从image中抠出搜索区域,这里计算出模板在图中左上角和右下角的坐标 size-1, size-1)) # gray augmentation(如果随机选择过程要进行灰度化,则先将彩色图像转化为灰度,在从灰度转化为3通道“彩图”) if gray: image = self._gray_aug(image) # shift scale augmentation # 在这里真正完成图像的扣取操作,和简单的缩放操作 image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size) # color augmentation if self.color > np.random.random(): image = self._color_aug(image) # blur augmentation,按照目标和区域的面积比限制卷积核的尺寸,否则模糊核过大,看不到目标 _, _, w, h = corner2center(bbox) area_ratio = (w*h*1.0)/(size*size) if self.blur > np.random.random(): image = self._blur_aug(image,area_ratio) # flip augmentation if self.flip and self.flip > np.random.random(): image, bbox = self._flip_aug(image, bbox) return image, bbox
def perturb(self, bbox, sz): # cx, cy, w, h = get_axis_aligned_bbox(np.array(bbox)) cx = (bbox.x1 + bbox.x2) / 2 cy = (bbox.y1 + bbox.y2) / 2 w = np.abs(bbox.x1 - bbox.x2) h = np.abs(bbox.y1 - bbox.y2) # w = np.abs(sz - w)/2 # h = np.abs(sz - h)/2 # cx = np.abs(sz - cx) # cy = np.abs(sz - cy) # # rx, ry = np.random.random(size=2) # # if sz/4 < cx < 3*sz/4 and rx > 0.5: # cx = sz - cx # if sz/4 < cy < 3*sz/4 and ry > 0.5: # cy = sz - cy # bbox = [cx-w/2, cy-y/2, cx-w/2, cy+y/2, cx+w/2, cy-y/2, cx+w/2, cy+y/2] # bbox = np.array([cx - w, cy - h, w//2, h//2]) bbox = np.array([cx, cy, w, h]) return center2corner(bbox)
def _get_bbox(self, image, shape): ''' 默认模板图像位于整个图像的中心,将gt标注的bbox加上0.5倍大小的上下文图像内容作为模板区域,认为是网络训练的模板区域,缩放到127*127,输出缩放后的相对与图像中心的模板坐标 注意:这里虽然shape是给的gt意义下的bbox信息,但是这个bbox是对应原始图像坐标系下的坐标,而这里输入图像是crop之后大小为511×511的图像,目标已经在图像的正中间了,因此 这里的bbox信息虽然有【x1,y1,x2,y2】四个量,但是最后用到的只有w=x2-x1,h=y2-y1这两个量,这两个量也只是为了得到目标(带有上下文信息的目标)在511*511图中的宽高 目标中心位置已经默认在crop阶段对齐到了511*511的图像中心 :param image: :param shape: :return: ''' imh, imw = image.shape[:2] if len(shape) == 4: w, h = shape[2] - shape[0], shape[3] - shape[1] else: w, h = shape #通过下面的方式会对目标区域进行缩放,当时长宽上的缩放比例保持一致,也就是说目标不会变形,因为最后的模板是正方形的 #对于长方形的部分,短边就用背景来填补,也就是说对于狭长的bbox并不友好,会引入较多的背景信息 context_amount = 0.5 #上下文占用的比例,gt构成的box再加上一定比例的上下文图像内容,认为是模板区域 exemplar_size = cfg.TRAIN.EXEMPLAR_SIZE wc_z = w + context_amount * (w + h) hc_z = h + context_amount * (w + h) s_z = np.sqrt(wc_z * hc_z) scale_z = exemplar_size / s_z #模板在网络中默认为127*127大小的,在crop数据集合的时候,把具有上下问的模板区域resize成了127*127,所以w,h要同比例缩放 w = w * scale_z h = h * scale_z cx, cy = imw // 2, imh // 2 #因为在制作数据集合的时候,模板区域已经默认对齐到图像中心 bbox = center2corner(Center(cx, cy, w, h)) return bbox #bbox的中心就是在511*511图的中心,这里输出转化为【x1,y1,x2,y2】的形式
def _shift_scale_aug(self, image, bbox, crop_bbox, size): ''' 对具有上下文信息的gt bbox进行位移和缩放调整,然后输出的bbox,和对应的图像区域 :param image: :param bbox: 带有上下文信息的box(gt值),在crop511坐标系下的坐标 :param crop_bbox: 要crop的bbox位置信息 127*127或者255*255 :param size: 期望crop出来的区域尺寸,网络输入时模板大小127*127,或者搜索区域大小255*255 :return:返回的图像,是按照增强后的crop_box扣取出的roi图像区域,返回的bbox是gt信息也做相应调整后并转化到crop图像坐标系下的位置信息 ''' im_h, im_w = image.shape[:2] # adjust crop bounding box crop_bbox_center = corner2center(crop_bbox) #对要crop输出的box进行大小调整和位移调整 if self.scale: scale_x = (1.0 + Augmentation.random() * self.scale) scale_y = (1.0 + Augmentation.random() * self.scale) h, w = crop_bbox_center.h, crop_bbox_center.w scale_x = min(scale_x, float(im_w) / w) #对要crop输出的box的w,h进行调整,取最小值是为了上搜索区域w,h不要超过图像区域 scale_y = min(scale_y, float(im_h) / h) crop_bbox_center = Center(crop_bbox_center.x, crop_bbox_center.y, crop_bbox_center.w * scale_x, crop_bbox_center.h * scale_y) crop_bbox = center2corner(crop_bbox_center) if self.shift: sx = Augmentation.random() * self.shift #siamese rpn++ 论文中讨论了shift最大范围的时候能够一定程度上解决网络学习过程中的位置偏见问题 sy = Augmentation.random() * self.shift # print("shift", self.shift,sx,sy) x1, y1, x2, y2 = crop_bbox sx = max(-x1, min(im_w - 1 - x2, sx)) #min(im_w - 1 - x2, sx) 保证x2+sx不会超出图像右边界,也就是即使平移搜索区域,右边也不要超出右边图像边界,max(-x1,xxx)是保证x1+xxx不会小鱼0,也就是即使平移搜索区域,左边也不会超出左边图像边界 sy = max(-y1, min(im_h - 1 - y2, sy)) crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy) # adjust target bounding box 要crop的box的变换上面已经确定,这里需要将他的gt信息也同样做调整 x1, y1 = crop_bbox.x1, crop_bbox.y1 # 以要crop输出的box的左上角为参考点,计算bbox新的坐标,也就是相应得修改gt的信息,与要crop的内容保持一致 # 输出的bbox是相对与127*127或者255*255图像下的坐标, bbox = Corner(bbox.x1 - x1, bbox.y1 - y1, bbox.x2 - x1, bbox.y2 - y1) if self.scale: bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y, bbox.x2 / scale_x, bbox.y2 / scale_y) image = self._crop_roi(image, crop_bbox, size) #扣取出要crop的区域 return image, bbox
def _get_bbox(self, image, shape): imh, imw = image.shape[:2] if len(shape) == 4: w, h = shape[2] - shape[0], shape[3] - shape[1] else: w, h = shape context_amount = 0.5 exemplar_size = cfg.TRAIN.EXEMPLAR_SIZE wc_z = w + context_amount * (w + h) hc_z = h + context_amount * (w + h) s_z = np.sqrt(wc_z * hc_z) scale_z = exemplar_size / s_z w = w * scale_z h = h * scale_z cx, cy = imw // 2, imh // 2 bbox = center2corner(Center(cx, cy, w, h)) return bbox
def _get_bbox(self, s_z): # imh, imw = image.shape[:2] # if len(shape) == 4: # w, h = shape[2]-shape[0], shape[3]-shape[1] # else: # w, h = shape # context_amount = cfg.TRACK.CONTEXT_AMOUNT exemplar_size = cfg.TRACK.EXEMPLAR_SIZE # wc_z = w + context_amount * (w+h) # hc_z = h + context_amount * (w+h) # s_z = np.sqrt(wc_z * hc_z) scale_z = exemplar_size / s_z w,h = self.size imh, imw = cfg.TRACK.INSTANCE_SIZE, cfg.TRACK.INSTANCE_SIZE w = w*scale_z h = h*scale_z cx, cy = imw//2, imh//2 bbox = center2corner(Center(cx, cy, w, h)) return bbox
def generate_all_anchors(self, im_c, size): """ 依据输入图像大小和rpn特征图大小size,以及generate_anchors生成的单点的anchor信息,为rpn输出层特征每一点生成anchor的相关信息 im_c: image center (搜索区域图像的中心 255//2) size: image size (输出相关操作后的特征图的大小17*17) """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride #在输入分辨下,相关面与搜索区域中心对齐,相关面左上角的坐标,也就模板第一次卷积对应的中心位置,通过加减0.5w得到anchor左上右下的坐标 ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori #为坐上角那个点产生anchor 大小为[n,4] x1 = zero_anchors[:, 0] #大小为n y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] #reshape为[n,1,1],中间这个1代表尺度,这里只选择了一个尺度 x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) #shape 为【anchor_nums,1,1] #生成相对与左上角的点的偏移量 disp_x = np.arange(0, size).reshape( 1, 1, -1) * self.stride #shape为【1,1,size】 disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x # shape为【anchor_nums,1,size] cy = cy + disp_y # shape为 [anchor_nums,size,1] # broadcast 为每个点产生anchor zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) x1, y1, x2, y2 = center2corner([cx, cy, w, h]) #生产两种类型的anchor,第一种是左上右下坐标类型的,第二种是中心点类型的,shape均为【4,anchor_num,size,size] self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32)) return True
def generate_all_anchors(self, im_c, size): """ im_c: image center - search image 中心位置 size: image size - 输出特征图大小 """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori # 均为角点坐标的形式 x1 = zero_anchors[:, 0] y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x cy = cy + disp_y # broadcast zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) # 5 x 25 x 25 cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) # 保存锚点位置 x1, y1, x2, y2 = center2corner([cx, cy, w, h]) # 中心点坐标形式转化为角点坐标形式 self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32) ) # stack() 将元素变成numpy的数组之后,作升维处理,[N,cx,cy,w,h] return True
def _shift_scale_aug(self, image, bbox, crop_bbox, size): im_h, im_w = image.shape[:2] # adjust crop bounding box crop_bbox_center = corner2center(crop_bbox) if self.scale: scale_x = (1.0 + Augmentation.random() * self.scale) scale_y = (1.0 + Augmentation.random() * self.scale) h, w = crop_bbox_center.h, crop_bbox_center.w scale_x = min(scale_x, float(im_w) / w) scale_y = min(scale_y, float(im_h) / h) crop_bbox_center = Center(crop_bbox_center.x, crop_bbox_center.y, crop_bbox_center.w * scale_x, crop_bbox_center.h * scale_y) crop_bbox = center2corner(crop_bbox_center) if self.shift: sx = Augmentation.random() * self.shift sy = Augmentation.random() * self.shift x1, y1, x2, y2 = crop_bbox sx = max(-x1, min(im_w - 1 - x2, sx)) sy = max(-y1, min(im_h - 1 - y2, sy)) crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy) # adjust target bounding box x1, y1 = crop_bbox.x1, crop_bbox.y1 bbox = Corner(bbox.x1 - x1, bbox.y1 - y1, bbox.x2 - x1, bbox.y2 - y1) if self.scale: bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y, bbox.x2 / scale_x, bbox.y2 / scale_y) image = self._crop_roi(image, crop_bbox, size) return image, bbox
def generate_all_anchors(self, im_c, size): """ Question: What's the difference between generate_all_anchors and generate_anchors? im_c: image center size: image size """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori x1 = zero_anchors[:, 0] y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x cy = cy + disp_y # broadcast zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) x1, y1, x2, y2 = center2corner([cx, cy, w, h]) self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32)) return True
def __call__(self, image, bbox, size, gray=False): shape = image.shape crop_bbox = center2corner( Center(shape[0] // 2, shape[1] // 2, size - 1, size - 1)) # gray augmentation if gray: image = self._gray_aug(image) # shift scale augmentation - 在这里已经把原图crop成255 x 255 x 3 image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size) # color augmentation if self.color > np.random.random(): image = self._color_aug(image) # blur augmentation if self.blur > np.random.random(): image = self._blur_aug(image) # flip augmentation if self.flip and self.flip > np.random.random(): image, bbox = self._flip_aug(image, bbox) return image, bbox
def generate_all_anchors(self, im_c, size): """ im_c: image center size: image size """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori x1 = zero_anchors[:, 0] y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] x1, y1, x2, y2 = map(partial(reshape_anchor_1_1, anchor_num=self.anchor_num), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x cy = cy + disp_y # broadcast zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) cx, cy, w, h = map(partial(add_zero, zero=zero), [cx, cy, w, h]) x1, y1, x2, y2 = center2corner([cx, cy, w, h]) self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32)) return True
def __call__(self, image, bbox, size, gray=False): shape = image.shape # size for template and search region are defined as : [127, 255] crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2, size-1, size-1)) # gray augmentation if gray: image = self._gray_aug(image) # shift scale augmentation, two types augmentation ! image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size) # color augmentation if self.color > np.random.random(): image = self._color_aug(image) # blur augmentation if self.blur > np.random.random(): image = self._blur_aug(image) # flip augmentation if self.flip and self.flip > np.random.random(): image, bbox = self._flip_aug(image, bbox) return image, bbox
def track(self, img): """ args: img(np.ndarray): BGR image return: bbox(list):[x, y, width, height] """ w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) s_z = np.sqrt(w_z * h_z) scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z s_x = s_z * (cfg.TRACK.INSTANCE_SIZE / cfg.TRACK.EXEMPLAR_SIZE) x_crop = self.get_subwindow(img, self.center_pos, cfg.TRACK.INSTANCE_SIZE, round(s_x), self.channel_average) # 'cls': cls, # 'loc': loc, # 'xf': xf, # 'mask': mask if cfg.MASK.MASK else None outputs = self.model.track(x_crop, self.xf_crops) score = self._convert_score(outputs['cls']) pred_bbox = self._convert_bbox(outputs['loc'], self.anchors) def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 return np.sqrt((w + pad) * (h + pad)) # scale penalty s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) / (sz(self.size[0]*scale_z, self.size[1]*scale_z))) # aspect ratio penalty r_c = change((self.size[0]/self.size[1]) / (pred_bbox[2, :]/pred_bbox[3, :])) penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K) pscore = penalty * score # window penalty # pscore *= self.window pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \ self.window * cfg.TRACK.WINDOW_INFLUENCE best_idx = np.argmax(pscore) bbox = pred_bbox[:, best_idx] iou = IoU(center2corner(bbox), center2corner(np.transpose(self.anchors))) bbox /= scale_z lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR cx = bbox[0] + self.center_pos[0] cy = bbox[1] + self.center_pos[1] # smooth bbox width = self.size[0] * (1 - lr) + bbox[2] * lr height = self.size[1] * (1 - lr) + bbox[3] * lr # clip boundary cx, cy, width, height = self._bbox_clip(cx, cy, width, height, img.shape[:2]) # udpate state self.center_pos = np.array([cx, cy]) self.size = np.array([width, height]) bbox = [cx - width / 2, cy - height / 2, width, height] best_score = score[best_idx] # crop search region for feature transform _, iy, ix = np.unravel_index(best_idx, [5, 25, 25]) iy += 3 ix += 3 self.xf_crops = [o[:, :, iy-3:iy+4, ix-3:ix+4].contiguous() for o in outputs['xf']] return { 'bbox': bbox, 'best_score': best_score, 'best_idx': best_idx, 'pscore': pscore, 'score': score, 'xf': outputs['xf'], }
def track(self, img): """ args: img(np.ndarray): BGR image return: bbox(list):[x, y, width, height] """ w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) s_z = np.sqrt(w_z * h_z) scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z s_x = s_z * (cfg.TRACK.INSTANCE_SIZE / cfg.TRACK.EXEMPLAR_SIZE) x_crop = self.get_subwindow(img, self.center_pos, cfg.TRACK.INSTANCE_SIZE, round(s_x), self.channel_average) outputs = self.model.track(x_crop) score = self._convert_score(outputs['cls']) pred_bbox = self._convert_bbox(outputs['loc'], self.anchors) def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 return np.sqrt((w + pad) * (h + pad)) # scale penalty s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) / (sz(self.size[0]*scale_z, self.size[1]*scale_z))) # aspect ratio penalty r_c = change((self.size[0]/self.size[1]) / (pred_bbox[2, :]/pred_bbox[3, :])) penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K) pscore = penalty * score # window penalty pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \ self.window * cfg.TRACK.WINDOW_INFLUENCE best_idx = np.argmax(pscore) bbox = pred_bbox[:, best_idx] / scale_z lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR cx = bbox[0] + self.center_pos[0] cy = bbox[1] + self.center_pos[1] # smooth bbox width = self.size[0] * (1 - lr) + bbox[2] * lr height = self.size[1] * (1 - lr) + bbox[3] * lr # clip boundary cx, cy, width, height = self._bbox_clip(cx, cy, width, height, img.shape[:2]) # udpate state self.center_pos = np.array([cx, cy]) self.size = np.array([width, height]) bbox = [cx - width / 2, cy - height / 2, width, height] best_score = score[best_idx] # for getting the label which is used to obtain gradident # bbox0: the bbox on x_crop bbox0 = pred_bbox[:, best_idx] imh, imw = cfg.TRACK.INSTANCE_SIZE, cfg.TRACK.INSTANCE_SIZE w = width * scale_z h = height * scale_z cx, cy = imw // 2, imh // 2 cx = bbox0[0] + cx cy = bbox0[1] + cy bbox0 = center2corner(Center(cx, cy, w, h)) return { 'bbox': bbox, 'best_score': best_score, 'bbox0': bbox0, 'cls_feas': outputs['cls_feas'] if 'cls_feas' in outputs.keys() else None, 'loc_feas': outputs['loc_feas'] if 'loc_feas' in outputs.keys() else None }
def __call__(self, image, bbox, size, data, gray=False): shape = image.shape cv2.imwrite('511.jpg', image) # image:[511,511,3] if data == 'template': image1 = np.zeros((127, 127, 3)) for i in range(127): for j in range(127): for k in range(3): if k == 0: image1[i, j, k] = 87 elif k == 1: image1[i, j, k] = 135 elif k == 2: image1[i, j, k] = 123 crop_bbox = center2corner( Center(shape[0] // 2, shape[1] // 2, size - 1, size - 1)) # gray augmentation if gray: image = self._gray_aug(image) # shift scale augmentation image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size) #cv2.imwrite('127_255.jpg', image) # image:[127,127,3] 或 [255,255,3] crop_bbox = center2corner( Center(shape[0] // 2, shape[1] // 2, size - 1, size - 1)) # color augmentation if self.color > np.random.random(): image = self._color_aug(image) # blur augmentation if self.blur > np.random.random(): image = self._blur_aug(image) # flip augmentation if self.flip and self.flip > np.random.random(): image, bbox = self._flip_aug(image, bbox) if data == 'template': # visual bounding box cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), thickness=1) # 红[0,0,255] cv2.imwrite('127_bbox.jpg', image) # image:[255,255,3] image_l = image1 image_t = image1.copy() image_b = image1.copy() image_r = image1.copy() image_l[int(bbox[1]):(int(bbox[3])), int(bbox[0]):(int(bbox[0]+cfg.corners.crop_size)), :] =\ image[int(bbox[1]):(int(bbox[3])), int(bbox[0]):(int(bbox[0]+cfg.corners.crop_size)), :] cv2.imwrite('crop_l.jpg', image_l) # image:[255,255,3] #cv2.imwrite('127_bbox——2.jpg', image) # image:[255,255,3] #cv2.imwrite('127_bbox--3.jpg', image_t) # image:[255,255,3] image_t[int(bbox[1]):(int(bbox[1]+cfg.corners.crop_size)), int(bbox[0]):(int(bbox[2])), :] = \ image[int(bbox[1]):(int(bbox[1]+cfg.corners.crop_size)), int(bbox[0]):(int(bbox[2])), :] cv2.imwrite('crop_t.jpg', image_t) # image:[255,255,3] image_b[(int(bbox[3] - cfg.corners.crop_size)):int(bbox[3]), int(bbox[0]):(int(bbox[2])), :] = \ image[(int(bbox[3] - cfg.corners.crop_size)):int(bbox[3]), int(bbox[0]):(int(bbox[2])), :] cv2.imwrite('crop_b.jpg', image_b) # image:[255,255,3] image_r[int(bbox[1]):(int(bbox[3])), (int(bbox[2]-cfg.corners.crop_size)):int(bbox[2]), :] = \ image[int(bbox[1]):(int(bbox[3])), (int(bbox[2]-cfg.corners.crop_size)):int(bbox[2]), :] cv2.imwrite('crop_r.jpg', image_r) # image:[255,255,3] if data == 'search': attentions = [ np.zeros((1, cfg.atts.att_size, cfg.atts.att_size), dtype=np.float32) ] # 25 为attention map大小 # tl_heats_map tl_heats = np.zeros( (1, cfg.corners.cor_size, cfg.corners.cor_size), dtype=np.float32) # [1,25,25] br_heats = np.zeros( (1, cfg.corners.cor_size, cfg.corners.cor_size), dtype=np.float32) # tl_valids tl_regrs = np.zeros((cfg.corners.offs_max_objects, 2), dtype=np.float32) br_regrs = np.zeros((cfg.corners.offs_max_objects, 2), dtype=np.float32) tl_tags = np.zeros((cfg.corners.offs_max_objects), dtype=np.int64) br_tags = np.zeros((cfg.corners.offs_max_objects), dtype=np.int64) tl_valids = np.zeros( (1, cfg.corners.cor_size, cfg.corners.cor_size), dtype=np.float32) # [1,25,25] br_valids = np.zeros( (1, cfg.corners.cor_size, cfg.corners.cor_size), dtype=np.float32) tag_masks = np.ones((cfg.corners.offs_max_objects), dtype=np.uint8) tag_lens = 0 #atts_map, x_int, y_int, x_float, y_float = self.create_attention_mask(attentions, cfg.TRAIN.ratios, bbox) # image:[255,255,3] x_int,y_int为目标中心点坐标 atts_map = [] xtl, ytl = bbox[0], bbox[1] # 图大小为255的坐标 xbr, ybr = bbox[2], bbox[3] det_height = int(ybr) - int(ytl) det_width = int(xbr) - int(xtl) det_max = max(det_height, det_width) min_scale = 16 valid = det_max >= min_scale # min_scale:16 fxtl = (xtl * cfg.corners.Ratios) # width_ratio:由255-->25的缩放比例 fytl = (ytl * cfg.corners.Ratios) fxbr = (xbr * cfg.corners.Ratios) fybr = (ybr * cfg.corners.Ratios) xtl = int(fxtl) ytl = int(fytl) xbr = int(fxbr) ybr = int(fybr) width = bbox[2] - bbox[0] height = bbox[3] - bbox[1] # visual bounding box #cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), thickness=1) # 红[0,0,255] #cv2.imwrite('255.jpg', image) # image:[255,255,3] width = math.ceil(width * cfg.corners.Ratios) height = math.ceil(height * cfg.corners.Ratios) if cfg.corners.gaussian_rad == -1: radius = gaussian_radius((height, width), cfg.corners.gaussian_iou) radius = max(0, int(radius)) else: radius = cfg.corners.gaussian_rad if valid: draw_gaussian(tl_heats[0], [xtl, ytl], radius) draw_gaussian(br_heats[0], [xbr, ybr], radius) tl_regrs[0, :] = [fxtl - xtl, fytl - ytl] # tl_regrs:[5,128,2] br_regrs[0, :] = [fxbr - xbr, fybr - ybr] tl_tags[0] = max( 0, min(ytl * cfg.corners.cor_size + xtl, cfg.corners.cor_size * cfg.corners.cor_size - 1)) # 坐标索引 ytl为取整后 br_tags[0] = max( 0, min(ybr * cfg.corners.cor_size + xbr, cfg.corners.cor_size * cfg.corners.cor_size - 1)) else: draw_gaussian(tl_valids[b_ind, category], [xtl, ytl], radius) # 得到上左masked_heatmap draw_gaussian(br_valids[b_ind, category], [xbr, ybr], radius) tl_valids = (tl_valids == 0).astype(np.float32) br_valids = (br_valids == 0).astype(np.float32) #tag_masks[:1] = 1 else: atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags = [], [],\ [], [], [], [], [], [], [], [] ''' if x_int: tag_masks = np.ones((cfg.offs.max_objects), dtype=np.uint8) tl_regrs = np.zeros((cfg.offs.max_objects, 2), dtype=np.float32) # max_objects:1 tl_regrs[0, :] = [x_float - x_int, y_float - y_int] # tl_regrs:[5,128,2] tl_tags = np.zeros((cfg.offs.max_objects), dtype=np.int64) tl_tags[0] = y_int * cfg.offs.off_size + x_int # 坐标索引 ytl为取整后 else: tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags = [], [], [], [], [], [], [], [], [] ''' if data == 'template': return image_t, image_l, image_b, image_r, bbox, atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags,\ br_tags else: return image, bbox, atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, \ br_tags