def __init__(self, alpha=0.25, gamma=2.0, lambda_p=5.0, temperature=1. / 3, strides=None, iou_type='giou'): self.alpha = alpha self.gamma = gamma self.lambda_p = lambda_p self.temperature = temperature if strides is None: strides = [8, 16, 32, 64, 128] self.strides = strides self.box_coder = BoxCoder() self.iou_loss_func = IOULoss(iou_type=iou_type, coord_type='ltrb')
def __init__(self, radius=1, strides=None, layer_limits=None): self.radius = radius # decide matching method self.box_coder = BoxCoder() if strides is None: strides = [8, 16, 32, 64, 128] self.strides = torch.tensor(strides) if layer_limits is None: layer_limits = [64, 128, 256, 512] expand_limits = np.array(layer_limits)[None].repeat( 2).tolist() # list, len=8 ''' FCOS通过规定每一层预测的尺度范围来避免一个sample匹配到多个target的情况 self.layer_limits=[ [-1,64], shape=[5,2] [64,128], [128,256], [256,512], [512,inf]] ''' self.layer_limits = torch.tensor([-1.] + expand_limits + [INF]).view( -1, 2)
def __init__(self, alpha=0.25, gamma=2.0, lambda_p=5.0, temperature=1. / 3, strides=None, iou_type='giou', positive_weights=0.1, negative_weights=1.0): self.alpha = alpha self.gamma = gamma self.lambda_p = lambda_p # 平衡正负样本的损失权重 self.temperature = temperature # 突出具有高置信度的位置 # 正负样本损失的权值,论文中并未明确给出,待定 self.positive_weights = positive_weights self.negative_weights = negative_weights if strides is None: strides = [8, 16, 32, 64, 128] self.strides = strides self.box_coder = BoxCoder() self.iou_loss_func = IOULoss(iou_type=iou_type, coord_type='ltrb')
class FCOSLossBuilder(object): # target build class def __init__(self, radius=1, strides=None, layer_limits=None): self.radius = radius # decide matching method self.box_coder = BoxCoder() if strides is None: strides = [8, 16, 32, 64, 128] self.strides = torch.tensor(strides) if layer_limits is None: layer_limits = [64, 128, 256, 512] expand_limits = np.array(layer_limits)[None].repeat( 2).tolist() # list, len=8 ''' FCOS通过规定每一层预测的尺度范围来避免一个sample匹配到多个target的情况 self.layer_limits=[ [-1,64], shape=[5,2] [64,128], [128,256], [256,512], [512,inf]] ''' self.layer_limits = torch.tensor([-1.] + expand_limits + [INF]).view( -1, 2) @torch.no_grad() def __call__(self, bs, grids, targets): ''' params :param bs: batch_size :param grids (list, len=num_layer, num_layer=5) : its element shape = [h,w,2] 2==>(x,y) 原图尺度 :param targets : [gts,7] (batch_id,weights,label_id,x1,y1,x2,y2) x1,y1,x2,y2==>原图尺度 :return: batch_reg_targets (list, len=bs): its element shape = [num_grids,4] 4==>(l*,t*,r*,b*) 注意,即使是副样本也可能有对应的为正的取值,因此要和label_target结合来甄别出副样本 batch_label_targets (list, len=bs): its element shape = [num_grids,1] 1==>label id (匹配到某个gt_box的 class id) 1==>-1 (neg_sample) ''' device = grids[0].device self.layer_limits = self.layer_limits.to(device) self.strides = self.strides.to(device) # [num_grids,5] 5==>(xc,yc,min_limit,max_limit,stride) num_grids = num of grid points among all featuremaps expand_grid = torch.cat([ torch.cat([ grid, layer_limit.expand_as(grid), stride.expand_as(grid[..., [0]]) ], dim=-1).view(-1, 5) for grid, layer_limit, stride in zip( grids, self.layer_limits, self.strides) ], dim=0) # build targets for each image batch_reg_targets = list() batch_label_targets = list() for bi in range(bs): batch_targets = targets[ targets[:, 0] == bi, 1:] # [num_gts,6]==>(weights,label_id,x1,y1,x2,y2) # no target in the image if len(batch_targets) == 0: batch_reg_targets.append(torch.Tensor()) batch_label_targets.append( torch.ones(size=(len(expand_grid), ), device=device, dtype=torch.float32) * -1) continue # encode grid point with all targets reg_target_per_img = self.box_coder.encoder( expand_grid, batch_targets[:, 2:] ) # shape=[num_grids,num_gts,4] (l,t,r,b)==>原图尺度 # 筛选条件1: ''' if self.radius==0: 删除掉那些不再gt_box区域内部的点/样本 else: 删除掉那些不在以gt_box中心为依据生成的坐标为 (gt_xc-radius*stride,gt_yc-radius*stride,gt_xc+radius*stride,gt_yc-radius*stride)且不在gt_box区域内部的点/样本 ''' if self.radius == 0: valid_in_box = reg_target_per_img.min(dim=2)[0] > 0 else: limit_gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]] ) / 2.0 # shape=[num_gts,2], 2==>(xc,yc) limit_gt_min_xy = limit_gt_xy[None, :, :] - expand_grid[:, None, [ 4, 4 ]] * self.radius # [1,num_gts,2]-[num_grids,1,2]=[num_grids,num_gts,2] limit_gt_max_xy = limit_gt_xy[None, :, :] + expand_grid[:, None, [ 4, 4 ]] * self.radius # [1,num_gts,2]+[num_grids,1,2]=[num_grids,num_gts,2] limit_gt_min_xy = torch.where( limit_gt_min_xy > batch_targets[None, :, [2, 3]], limit_gt_min_xy, batch_targets[None, :, [2, 3]]) limit_gt_max_xy = torch.where( limit_gt_max_xy < batch_targets[None, :, [4, 5]], limit_gt_max_xy, batch_targets[None, :, [4, 5]]) left_top = expand_grid[:, None, [0, 1]] - limit_gt_min_xy right_bottom = limit_gt_max_xy - expand_grid[:, None, [0, 1]] valid_in_box = torch.cat([left_top, right_bottom], dim=2).min(dim=2)[0] > 0 # 筛选条件2: ''' 删除掉那些所对应的gt_box不符合尺度限制条件的样本 ''' max_reg_targets_per_img = reg_target_per_img.max( dim=2)[0] # shape=[num_grids,num_gts] is_card_in_level = (max_reg_targets_per_img >= expand_grid[:, [2]]) & (max_reg_targets_per_img <= expand_grid[:, [3]]) # gt_area = (batch_targets[:, 4] - batch_targets[:, 2]) * ( batch_targets[:, 5] - batch_targets[:, 3]) # shape=[num_gts] locations_to_gt_area = gt_area[None, :].repeat( len(expand_grid), 1) # shape=[num_grid,num_gts] # 筛选掉不符合条件的样本 locations_to_gt_area[~valid_in_box] = INF locations_to_gt_area[~is_card_in_level] = INF # 筛选条件3: ''' 当某个grid/sample匹配到多个gt_box时,选择面积最小的gt_box作为其匹配到的target ''' min_area, gt_idx = locations_to_gt_area.min( dim=1 ) # shape=[num_grids], calculate the minest gt_box area coorsponding to grid/sample reg_target_per_img = reg_target_per_img[ range(len(reg_target_per_img)), gt_idx] # shape=[num_grids,4] (l,t,r,b)==>原图尺度 labels_per_img = batch_targets[:, 1][gt_idx] labels_per_img[min_area == INF] = -1 batch_reg_targets.append(reg_target_per_img) batch_label_targets.append(labels_per_img) return batch_reg_targets, batch_label_targets
class FCOSAutoAssignLoss(object): def __init__(self, alpha=0.25, gamma=2.0, lambda_p=5.0, temperature=1. / 3, strides=None, iou_type='giou', positive_weights=0.1, negative_weights=1.0): self.alpha = alpha self.gamma = gamma self.lambda_p = lambda_p self.temperature = temperature self.positive_weights = positive_weights self.negative_weights = negative_weights if strides is None: strides = [8, 16, 32, 64, 128] self.strides = strides self.box_coder = BoxCoder() self.iou_loss_func = IOULoss(iou_type=iou_type, coord_type='ltrb') def __call__(self, cls_predicts, box_predicts, implicits, grids, gaussian, targets): """ :param cls_predicts: list(cls_predict) cls_predict [bs, cls, h, w] :param box_predicts: list(box_predict) box_predict [bs, 4, h, w] :param implicits: list(implicit) implicit[bs, 1, h, w] :param grids: [h, w, 2] :param gaussian: [cls, 4] :param targets: [gt, 7] (bs, weights, label_id, x1, y1, x2, y2) :return: """ device = cls_predicts[0].device bs = cls_predicts[0].shape[0] cls_num = cls_predicts[0].shape[1] # expand_grid [grid_num,3](xc,yc,stride) expand_grid = torch.cat([ torch.cat([ grid_item, torch.tensor(data=stride_item, device=device, dtype=torch.float).expand_as(grid_item[..., [0]]) ], dim=-1).view(-1, 3) for stride_item, grid_item in zip(self.strides, grids)], dim=0) for i in range(len(cls_predicts)): if cls_predicts[i].dtype == torch.float16: cls_predicts[i] = cls_predicts[i].float() for i in range(len(implicits)): if implicits[i].dtype == torch.float16: implicits[i] = implicits[i].float() negative_loss_list = list() positive_loss_list = list() for bi in range(bs): # batch_cls_predicts [grid_num,cls_num] batch_cls_predicts = torch.cat( [cls_item[bi].permute(1, 2, 0).contiguous().view(-1, cls_num) for cls_item in cls_predicts], dim=0).sigmoid() # batch_implicit [grid_num,1] batch_implicit = torch.cat( [implicit_item[bi].permute(1, 2, 0).contiguous().view(-1, 1) for implicit_item in implicits], dim=0).sigmoid() batch_join_predicts = (batch_cls_predicts * batch_implicit).clamp(1e-6, 1 - 1e-6) # batch_box_predicts [grid_num, 4] batch_box_predicts = torch.cat( [box_item[bi].permute(1, 2, 0).contiguous().view(-1, 4) for box_item in box_predicts], dim=0) batch_targets = targets[targets[:, 0] == bi, 1:] if len(batch_targets) == 0: negative_loss = -(1 - self.alpha) * batch_join_predicts ** self.gamma * ( 1 - batch_join_predicts).log() negative_loss = negative_loss.sum() negative_loss_list.append(negative_loss) continue # [gt_num,6] (weights,label_idx,x1,y1,x2,y2) gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]]) / 2 # [grid_num,gt_num,2] xy_offset = (expand_grid[:, None, :2] - gt_xy[None, :, :]) / expand_grid[:, None, [2]] # [grid_num,gt_num,4] batch_reg_targets = self.box_coder.encode(expand_grid[..., :2], batch_targets[..., 2:]) grid_idx, gt_idx = (batch_reg_targets.min(dim=-1)[0] > 0).nonzero(as_tuple=False).t() cls_prob = batch_join_predicts[grid_idx, batch_targets[gt_idx, 1].long()] iou_loss = self.iou_loss_func(batch_box_predicts[grid_idx, :], batch_reg_targets[grid_idx, gt_idx, :]) loc_prob = (-self.lambda_p * iou_loss).exp() joint_prob = cls_prob * loc_prob confidence = (joint_prob / self.temperature).exp() gaussian_delta_mu = -( (xy_offset[grid_idx, gt_idx, :] - gaussian[batch_targets[gt_idx, 1].long(), :2]) ** 2 ).sum(-1) gaussian_delta_theta = 2 * ((gaussian[batch_targets[gt_idx, 1].long(), 2:]) ** 2).sum(-1) gaussian_weights = (gaussian_delta_mu / gaussian_delta_theta).exp() positive_weights = confidence * gaussian_weights positive_loss = torch.tensor(data=0., device=device) for unique_gt_idx in gt_idx.unique(): grid_idx_mask = gt_idx == unique_gt_idx instance_weights = positive_weights[grid_idx_mask] / positive_weights[grid_idx_mask].sum() instance_loss = -(instance_weights * joint_prob[grid_idx_mask]).sum().log() positive_loss += instance_loss positive_loss_list.append(positive_loss) decode_box = self.box_coder.decoder(expand_grid[..., :2], batch_box_predicts).detach() predict_targets_iou = box_iou(decode_box, batch_targets[..., 2:]) max_iou, max_iou_gt_idx = predict_targets_iou.max(dim=-1) func_iou = 1 / (1 - max_iou) func_iou = 1 - (func_iou - 1) / (func_iou.max() - 1 + 1e-10) negative_weights = torch.ones(size=(expand_grid.shape[0], cls_num), device=device).float() negative_weights[grid_idx, batch_targets[gt_idx, 1].long()] = func_iou[grid_idx] weighted_negative_prob = negative_weights * batch_join_predicts negative_loss = -(1 - self.alpha) * weighted_negative_prob ** self.gamma * ( 1 - weighted_negative_prob).log() negative_loss = negative_loss.sum() negative_loss_list.append(negative_loss) total_negative_loss = torch.stack(negative_loss_list).sum() / max(1, len(targets)) if len(targets) == 0: return total_negative_loss, \ torch.stack([total_negative_loss, torch.tensor(0., device=device)]).detach(), \ len(targets) total_positive_loss = torch.stack(positive_loss_list).sum() / max(1, len(targets)) total_negative_loss = total_negative_loss * self.negative_weights total_positive_loss = total_positive_loss * self.positive_weights total_loss = total_negative_loss + total_positive_loss return total_loss, torch.stack([total_negative_loss, total_positive_loss]).detach(), len(targets)
class FCOSAutoAssignLoss(object): def __init__(self, alpha=0.25, gamma=2.0, lambda_p=5.0, temperature=1. / 3, strides=None, iou_type='giou', positive_weights=0.1, negative_weights=1.0): self.alpha = alpha self.gamma = gamma self.lambda_p = lambda_p # 平衡正负样本的损失权重 self.temperature = temperature # 突出具有高置信度的位置 # 正负样本损失的权值,论文中并未明确给出,待定 self.positive_weights = positive_weights self.negative_weights = negative_weights if strides is None: strides = [8, 16, 32, 64, 128] self.strides = strides self.box_coder = BoxCoder() self.iou_loss_func = IOULoss(iou_type=iou_type, coord_type='ltrb') def __call__(self, cls_predicts, box_predicts, implicits, grids, gaussian, targets): ''' params :param cls_predicts: list(cls_predict) cls_predict [bs, num_cls, h, w] :param box_predicts: list(box_predict) box_predict [bs, 4, h, w] :param implicits: list(implicit) implicit[bs, 1, h, w] :param grids: list(grid,len=5) grid [h, w, 2] 2==>(xc,yc)原图尺度 :param gaussian: [cls, 4] 4==>(ux,uy,theta_x,theta_y) :param targets: [gt, 7] (bs, weights, label_id, x1, y1, x2, y2) :return: ''' device = cls_predicts[0].device bs = cls_predicts[0].shape[0] cls_num = cls_predicts[0].shape[1] # expand_grid.shape=[grid_num,3] 3==>(xc,yc,stride) expand_grid = torch.cat([ torch.cat([ grid_item, torch.tensor(data=stride_item, device=device, dtype=torch.float).expand_as(grid_item[..., [0]]) ], dim=-1).view(-1, 3) for stride_item, grid_item in zip(self.strides, grids) ], dim=0) for i in range(len(cls_predicts)): if cls_predicts[i].dtype == torch.float16: cls_predicts[i] = cls_predicts[i].float() for i in range(len(implicits)): if implicits[i].dtype == torch.float16: implicits[i] = implicits[i].float() negative_loss_list = list() positive_loss_list = list() num_neg_grids = 0 for bi in range(bs): # batch_cls_predicts [grid_num,cls_num]==>sigmoid batch_cls_predicts = torch.cat([ cls_item[bi].permute(1, 2, 0).contiguous().view(-1, cls_num) for cls_item in cls_predicts ], dim=0).sigmoid() # batch_implicit [grid_num,1] batch_implicit = torch.cat([ implicit_item[bi].permute(1, 2, 0).contiguous().view(-1, 1) for implicit_item in implicits ], dim=0).sigmoid() # join_predicts=cls_predicts*implicit_predicts(分类*object) [grid_num,cls_num] batch_join_predicts = (batch_cls_predicts * batch_implicit).clamp( 1e-6, 1 - 1e-6) # batch_box_predicts [grid_num, 4] batch_box_predicts = torch.cat([ box_item[bi].permute(1, 2, 0).contiguous().view(-1, 4) for box_item in box_predicts ], dim=0) # target [gt_num,6] 6==>(weights, label_id, x1, y1, x2, y2) batch_targets = targets[targets[:, 0] == bi, 1:] # 如果没有target,则直接loss= negative focal loss if len(batch_targets) == 0: negative_loss = -1 * (batch_join_predicts**self.gamma) * ( 1 - batch_join_predicts).log() negative_loss = negative_loss.sum() negative_loss_list.append(negative_loss) continue ############################################################################################################ ### clac positive loss ------------------------------------------------------------------------------------- # [gt_num,6] (weights,label_idx,x1,y1,x2,y2) gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]]) / 2. # d=(grid_xy-gt_xy) 用来计算centerness weight [grid_num,gt_num,2] xy_offset = (expand_grid[:, None, :2] - gt_xy[None, :, :]) / expand_grid[:, None, [2]] # 编码每个grid point的回归目标 [grid_num,gt_num,4] batch_reg_targets = self.box_coder.encode(expand_grid[..., :2], batch_targets[..., 2:]) # shape=[1,N] N=num of positive grid/location 假设所有在gt_box内部的点都是正样本 grid_idx, gt_idx = (batch_reg_targets.min(dim=-1)[0] > 0).nonzero( as_tuple=False).t() # debug num_neg_grids += grid_idx.shape[0] cls_prob = batch_join_predicts[grid_idx, batch_targets[ gt_idx, 1].long()] # shape=[N,1] iou_loss = self.iou_loss_func( batch_box_predicts[grid_idx, :], batch_reg_targets[grid_idx, gt_idx, :]) loc_prob = (-self.lambda_p * iou_loss).exp() # P_loc, shape=[N,1] joint_prob = cls_prob * loc_prob # P_+=cls_prob*obj_prob ,P(confidence at the location) shape=[N,1] confidence = ( joint_prob / self.temperature).exp() # C(P) weight_function shape=[N,1] ''' G(d)=e{-1*(d-u)**2/(2*theta**2)} d=xy_offset=grid_xy-gt_xy u,theta are learnable parameters. ''' gaussian_delta_mu = -( (xy_offset[grid_idx, gt_idx, :] - gaussian[batch_targets[gt_idx, 1].long(), :2])**2).sum(-1) gaussian_delta_theta = 2 * ( (gaussian[batch_targets[gt_idx, 1].long(), 2:])**2).sum(-1) gaussian_weights = (gaussian_delta_mu / gaussian_delta_theta).exp() # shape=[N,1] # w+ positive_weights = confidence * gaussian_weights # shape=[N,1] positive_loss = torch.tensor(data=0., device=device) for unique_gt_idx in gt_idx.unique(): gt_idx_mask = gt_idx == unique_gt_idx instance_weights = positive_weights[ gt_idx_mask] / positive_weights[gt_idx_mask].sum() instance_loss = -(instance_weights * joint_prob[gt_idx_mask]).sum().log() positive_loss += instance_loss positive_loss_list.append(positive_loss) ########################################################################################################################## ## calc negative loss ---------------------------------------------------------------------------------------------------- decode_box = self.box_coder.decoder( expand_grid[..., :2], batch_box_predicts).detach( ) # shape=[grid_num,4] 4==>(x1,y1,x2,y2) predict_targets_iou = box_iou( decode_box, batch_targets[..., 2:]) # shape=[grid_num,gt_num] ''' max_iou=max{iou between the predicted_box and all gt_boxes} ''' max_iou, max_iou_gt_idx = predict_targets_iou.max( dim=-1) # shape=[grid_num] func_iou = 1 / (1 - max_iou) func_iou = 1 - (func_iou - 1) / ( func_iou.max() - 1 + 1e-10 ) # max_iou==>(0,1) if max_iou=1, func_iou=0. if max_iou=0, func_iou=1. # 任何gt_box区域之外的点w-=1.0 negative_weights = torch.ones( size=(expand_grid.shape[0], cls_num), device=device).float() # shape=[grid_num, cls_num] negative_weights[grid_idx, batch_targets[gt_idx, 1].long()] = func_iou[grid_idx] weighted_negative_prob = negative_weights * batch_join_predicts negative_loss = -1 * (weighted_negative_prob**self.gamma) * ( 1 - weighted_negative_prob).log() negative_loss = negative_loss.sum() negative_loss_list.append(negative_loss) total_negative_loss = torch.stack(negative_loss_list).sum() / max( 1, len(targets)) # total_negative_loss = torch.stack(negative_loss_list).sum() / num_neg_grids if len(targets) == 0: return total_negative_loss, torch.stack( [total_negative_loss, torch.tensor(0., device=device)]).detach(), len(targets) total_positive_loss = torch.stack(positive_loss_list).sum() / max( 1, len(targets)) total_negative_loss = total_negative_loss * (1 - self.alpha) total_positive_loss = total_positive_loss * self.alpha total_loss = total_negative_loss + total_positive_loss return total_loss, torch.stack( [total_negative_loss, total_positive_loss]).detach(), len(targets)