def test_smooth_l1(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[4], dtype='float32') y = layers.data(name='label', shape=[4], dtype='float32') loss = layers.smooth_l1(x, y) self.assertIsNotNone(loss) print(str(program))
def __call__( self, predictions, labels_pos_mask, # Shape: [batch_size, 19248, 1] labels_neg_mask, # Shape: [batch_size, 19248, 1] labels_allboxes_vector, # Shape: [batch_size, 19248, 8] segment_t, # list Shape: [batch_size, 19248, 1] label_masks, labels_best_truth_idx, labels_pos_index, labels_pos_cid, # Shape: [batch_size, 19248] labels_pos_cid2, # Shape: [batch_size, 19248] priors, class_vectors, batch_size, use_maskiou=True, use_ce_loss=True, use_ghm_c_loss=False, use_focal_loss=False, use_ohem_loss=False): pred_allboxes_encode_x0y0x1y1 = predictions[ 'loc'] # Shape: [batch_size, 19248, 4] pred_allboxes_conf = predictions[ 'conf'] # Shape: [batch_size, 19248, 1+80] pred_allboxes_mask_coef = predictions[ 'mask'] # Shape: [batch_size, 19248, 原型数=32] pred_proto = predictions[ 'proto'] # Shape: [batch_size, s4=138, s4=138, 原型数=32] pred_segm = predictions[ 'segm'] # Shape: [batch_size, 类别数=80, s8=69, s8=69] labels_allboxes_x0y0x1y1 = labels_allboxes_vector[:, :, 0: 4] # Shape: [batch_size, 19248, 4] labels_allboxes_decode_x0y0x1y1 = labels_allboxes_vector[:, :, 4: 8] # Shape: [batch_size, 19248, 4] losses = {} # 1.bbox_loss,只有正例才计算。 # bbox_alpha = 1.5 # bbox_loss = P.smooth_l1(P.reshape(pred_allboxes_encode_x0y0x1y1, (-1, 4)), P.reshape(labels_allboxes_x0y0x1y1, (-1, 4))) # bbox_loss = P.reshape(labels_pos_mask, (-1, 1)) * bbox_loss # bbox_loss = P.reduce_sum(bbox_loss) * bbox_alpha # losses['B'] = bbox_loss # 1.bbox_loss,ciou_loss pred_x0y0x1y1 = [] for idx in range(batch_size): temp = decode(pred_allboxes_encode_x0y0x1y1[idx], priors) pred_x0y0x1y1.append(temp) pred_x0y0x1y1 = P.concat(pred_x0y0x1y1, axis=0) # Shape: [batch_size*num_priors, 4] pred_x0y0x1y1 = P.reshape( pred_x0y0x1y1, (batch_size, -1, 4)) # Shape: [batch_size, num_priors, 4] ciou = P.reshape( self.bbox_ciou(pred_x0y0x1y1, labels_allboxes_decode_x0y0x1y1), (batch_size, -1, 1)) # (batch_size, num_priors, 1) # 每个预测框ciou_loss的权重 = 2 - (ground truth的面积/图片面积) gt_area = (labels_allboxes_decode_x0y0x1y1[:, :, 2:3] - labels_allboxes_decode_x0y0x1y1[:, :, 0:1]) * \ (labels_allboxes_decode_x0y0x1y1[:, :, 3:4] - labels_allboxes_decode_x0y0x1y1[:, :, 1:2]) bbox_loss_scale = 2.0 - gt_area ciou_loss = labels_pos_mask * bbox_loss_scale * (1 - ciou) bbox_alpha = 1.5 ciou_loss = P.reduce_sum(ciou_loss) * bbox_alpha losses['B'] = ciou_loss # 2.mask_loss,只有正例才计算 mask_h = P.shape(pred_proto)[1] mask_w = P.shape(pred_proto)[2] loss_m = 0 maskiou_t_list = [] maskiou_net_input_list = [] label_t_list = [] for idx in range(batch_size): # [[0], [0], [0], [0], [0], [0], [0], [0]]。把8个正样本的最匹配gt的下标(在label_x0y0x1y1cid[idx]中的下标)选出来。 # 因为只有一个gt,所以下标全是0 labels_pos_index[idx].stop_gradient = True cur_gt = P.gather(labels_best_truth_idx[idx], labels_pos_index[idx]) # (?, 1) cur_gt.stop_gradient = True cur_x0y0x1y1 = P.gather(labels_allboxes_decode_x0y0x1y1[idx], labels_pos_index[idx]) # (?, 4) proto_masks = pred_proto[idx] # (138, 138, 32) # pred_mask_coef (batch_size, 19248, 32)。 把8个正样本预测的mask系数选出来。 proto_coef = P.gather(pred_allboxes_mask_coef[idx], labels_pos_index[idx]) # (?, 32) # (?, 138, 138),把8个正样本所匹配的gt的真实mask抽出来。因为匹配到同一个gt,所以是同一个mask重复了8次。 mask_t = P.gather(label_masks[idx], cur_gt) # (?, 138, 138) # (?, ),把8个正样本所匹配的gt的真实cid抽出来。因为匹配到同一个gt,所以是同一个cid重复了8次。 label_t = P.gather(labels_pos_cid[idx], labels_pos_index[idx]) # (?, ) # Size: (138, 138, ?) = 原型*系数转置 pred_masks = P.matmul(proto_masks, proto_coef, transpose_y=True) pred_masks = P.sigmoid(pred_masks) # sigmoid激活 pred_masks = crop(pred_masks, cur_x0y0x1y1) pred_masks = P.transpose(pred_masks, perm=[2, 0, 1]) masks_pos_loss = mask_t * (0 - P.log(pred_masks + 1e-9) ) # 二值交叉熵,加了极小的常数防止nan masks_neg_loss = (1 - mask_t) * (0 - P.log(1 - pred_masks + 1e-9) ) # 二值交叉熵,加了极小的常数防止nan pre_loss = (masks_pos_loss + masks_neg_loss) pre_loss = P.reduce_sum(pre_loss, dim=[1, 2]) # gt面积越小,对应mask损失权重越大 cur_cxcywh = center_size(cur_x0y0x1y1) gt_box_width = cur_cxcywh[:, 2] gt_box_height = cur_cxcywh[:, 3] pre_loss = pre_loss / (gt_box_width * gt_box_height) loss_m += P.reduce_sum(pre_loss) if use_maskiou: # mask_t中,面积<=5*5的被丢弃 # discard_mask_area = 5*5 ''' gpu版本的paddlepaddle1.6.2里有一个问题。select如果是[None],并且在gather()里使用了select,就会出现 cudaGetLastError invalid configuration argument errno: 9 这个错误。cpu版本则可以正常跑。 为了避免上面的问题,只能让select不是[None],所以这里不做面积过滤,mask_t全部保留。 ''' discard_mask_area = -1 gt_mask_area = P.reduce_sum(mask_t, dim=[1, 2]) gt_mask_area.stop_gradient = True select = P.where(gt_mask_area > discard_mask_area) select.stop_gradient = True pred_masks = P.gather(pred_masks, select) mask_t = P.gather(mask_t, select) label_t = P.gather(label_t, select) label_t.stop_gradient = True maskiou_net_input = P.reshape( pred_masks, (P.shape(pred_masks)[0], 1, mask_h, mask_w)) pred_masks = P.cast(pred_masks > 0.5, 'float32') # 四舍五入 maskiou_t = self._mask_iou(pred_masks, mask_t) # (8, ) maskiou_net_input_list.append( maskiou_net_input) # (8, 1, 138, 138) maskiou_t_list.append(maskiou_t) # (8, ) label_t_list.append(label_t) # (8, ) mask_alpha = 6.125 losses['M'] = loss_m * mask_alpha / mask_h / mask_w # 余下部分 if use_maskiou: maskiou_net_input = P.concat( maskiou_net_input_list, axis=0) # (21, 1, 138, 138) 21个正例预测的掩码 maskiou_t = P.concat(maskiou_t_list, axis=0) # (21, ) 21个正例预测的掩码和真实掩码的iou label_t = P.concat(label_t_list, axis=0) # (21, ) 21个正例预测的cid label_t.stop_gradient = True # 因为是整数所以才? maskiou_targets = [maskiou_net_input, maskiou_t, label_t] # 3.conf_loss。 conf_alpha = 1.0 if use_ce_loss: conf_loss = self.ce_conf_loss(pred_allboxes_conf, labels_pos_mask, labels_neg_mask, class_vectors, labels_pos_cid2, gt_area) elif use_ghm_c_loss: conf_loss = self.ghm_c_loss(pred_allboxes_conf, labels_pos_mask, labels_neg_mask, class_vectors, labels_pos_cid2) elif use_focal_loss: conf_loss = self.focal_conf_loss(pred_allboxes_conf, labels_pos_mask, labels_neg_mask, class_vectors, labels_pos_cid2) elif use_ohem_loss: conf_loss = self.ohem_conf_loss(pred_allboxes_conf, batch_size, labels_neg_mask, labels_pos_mask, labels_pos_index, class_vectors, labels_pos_cid) losses['C'] = conf_loss * conf_alpha # 4.mask_iou_loss,只有正例才计算。 if use_maskiou: # maskiou_net_input (21, 1, 138, 138) 21个正例预测的掩码 # maskiou_t (21, ) 21个正例预测的掩码和真实掩码的iou # label_t (21, ) 21个正例预测的cid maskiou_net_input, maskiou_t, label_t = maskiou_targets maskiou_p = maskiou_net(maskiou_net_input, self.num_classes - 1) maskiou_p = P.reduce_max(maskiou_p, dim=[2, 3]) # 最大池化 (21, 80) temp_mask = P.gather(class_vectors, label_t) # 掩码 (21, 81) temp_mask = temp_mask[:, 1:] # 掩码 (21, 80) maskiou_p = temp_mask * maskiou_p # 只保留真实类别的那个通道 (21, 80) maskiou_p = P.reduce_sum(maskiou_p, dim=1, keep_dim=True) # (21, 1) loss_i = P.smooth_l1( maskiou_p, P.reshape(maskiou_t, (P.shape(maskiou_t)[0], 1))) maskiou_alpha = 25.0 losses['I'] = maskiou_alpha * P.reduce_sum(loss_i) # 5.semantic_segmentation_loss,只有正例才计算 mask_h = P.shape(pred_segm)[2] mask_w = P.shape(pred_segm)[3] loss_s = 0.0 for idx in range(batch_size): cur_segment = pred_segm[idx] # (80, 69, 69) l = P.sigmoid_cross_entropy_with_logits(cur_segment, segment_t[idx]) loss_s += P.reduce_sum(l) semantic_segmentation_alpha = 1.0 losses['S'] = loss_s / mask_h / mask_w * semantic_segmentation_alpha total_num_pos = P.cast(P.reduce_sum(labels_pos_mask), 'float32') for k in losses: if k not in ('S', ): losses[k] /= total_num_pos else: losses[k] /= batch_size total_loss = 0.0 for k in losses: total_loss += losses[k] # Loss Key: # - B: Box Localization Loss # - M: Mask Loss # - C: Class Confidence Loss # - I: MaskIou Loss # - S: Semantic Segmentation Loss # return losses['M'], losses['C'] return losses, total_loss
def __call__(self, location, confidence, gt_box, gt_label, landmark_predict, lmk_label, lmk_ignore_flag, prior_box, prior_box_var=None): def _reshape_to_2d(var): return layers.flatten(x=var, axis=2) helper = LayerHelper('ssd_loss') #, **locals()) # Only support mining_type == 'max_negative' now. mining_type = 'max_negative' # The max `sample_size` of negative box, used only # when mining_type is `hard_example`. sample_size = None num, num_prior, num_class = confidence.shape conf_shape = layers.shape(confidence) # 1. Find matched boundding box by prior box. # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. iou = iou_similarity(x=gt_box, y=prior_box) # 1.2 Compute matched boundding box by bipartite matching algorithm. matched_indices, matched_dist = bipartite_match( iou, self.match_type, self.overlap_threshold) # 2. Compute confidence for mining hard examples # 2.1. Get the target label based on matched indices gt_label = layers.reshape(x=gt_label, shape=(len(gt_label.shape) - 1) * (0, ) + (-1, 1)) gt_label.stop_gradient = True target_label, _ = target_assign(gt_label, matched_indices, mismatch_value=self.background_label) # 2.2. Compute confidence loss. # Reshape confidence to 2D tensor. confidence = _reshape_to_2d(confidence) target_label = tensor.cast(x=target_label, dtype='int64') target_label = _reshape_to_2d(target_label) target_label.stop_gradient = True conf_loss = layers.softmax_with_cross_entropy(confidence, target_label) # 3. Mining hard examples actual_shape = layers.slice(conf_shape, axes=[0], starts=[0], ends=[2]) actual_shape.stop_gradient = True conf_loss = layers.reshape(x=conf_loss, shape=(-1, 0), actual_shape=actual_shape) conf_loss.stop_gradient = True neg_indices = helper.create_variable_for_type_inference(dtype='int32') updated_matched_indices = helper.create_variable_for_type_inference( dtype=matched_indices.dtype) helper.append_op(type='mine_hard_examples', inputs={ 'ClsLoss': conf_loss, 'LocLoss': None, 'MatchIndices': matched_indices, 'MatchDist': matched_dist, }, outputs={ 'NegIndices': neg_indices, 'UpdatedMatchIndices': updated_matched_indices }, attrs={ 'neg_pos_ratio': self.neg_pos_ratio, 'neg_dist_threshold': self.neg_overlap, 'mining_type': mining_type, 'sample_size': sample_size, }) # 4. Assign classification and regression targets # 4.1. Encoded bbox according to the prior boxes. encoded_bbox = box_coder(prior_box=prior_box, prior_box_var=prior_box_var, target_box=gt_box, code_type='encode_center_size') # 4.2. Assign regression targets target_bbox, target_loc_weight = target_assign( encoded_bbox, updated_matched_indices, mismatch_value=self.background_label) # 4.3. Assign classification targets target_label, target_conf_weight = target_assign( gt_label, updated_matched_indices, negative_indices=neg_indices, mismatch_value=self.background_label) target_loc_weight = target_loc_weight * target_label encoded_lmk_label = self.decode_lmk(lmk_label, prior_box, prior_box_var) target_lmk, target_lmk_weight = target_assign( encoded_lmk_label, updated_matched_indices, mismatch_value=self.background_label) lmk_ignore_flag = layers.reshape( x=lmk_ignore_flag, shape=(len(lmk_ignore_flag.shape) - 1) * (0, ) + (-1, 1)) target_ignore, nouse = target_assign( lmk_ignore_flag, updated_matched_indices, mismatch_value=self.background_label) target_lmk_weight = target_lmk_weight * target_ignore landmark_predict = _reshape_to_2d(landmark_predict) target_lmk = _reshape_to_2d(target_lmk) target_lmk_weight = _reshape_to_2d(target_lmk_weight) lmk_loss = layers.smooth_l1(landmark_predict, target_lmk) lmk_loss = lmk_loss * target_lmk_weight target_lmk.stop_gradient = True target_lmk_weight.stop_gradient = True target_ignore.stop_gradient = True nouse.stop_gradient = True # 5. Compute loss. # 5.1 Compute confidence loss. target_label = _reshape_to_2d(target_label) target_label = tensor.cast(x=target_label, dtype='int64') conf_loss = layers.softmax_with_cross_entropy(confidence, target_label) target_conf_weight = _reshape_to_2d(target_conf_weight) conf_loss = conf_loss * target_conf_weight # the target_label and target_conf_weight do not have gradient. target_label.stop_gradient = True target_conf_weight.stop_gradient = True # 5.2 Compute regression loss. location = _reshape_to_2d(location) target_bbox = _reshape_to_2d(target_bbox) loc_loss = layers.smooth_l1(location, target_bbox) target_loc_weight = _reshape_to_2d(target_loc_weight) loc_loss = loc_loss * target_loc_weight # the target_bbox and target_loc_weight do not have gradient. target_bbox.stop_gradient = True target_loc_weight.stop_gradient = True # 5.3 Compute overall weighted loss. loss = self.conf_loss_weight * conf_loss + self.loc_loss_weight * loc_loss + 0.4 * lmk_loss # reshape to [N, Np], N is the batch size and Np is the prior box number. loss = layers.reshape(x=loss, shape=(-1, 0), actual_shape=actual_shape) loss = layers.reduce_sum(loss, dim=1, keep_dim=True) if self.normalize: normalizer = layers.reduce_sum(target_loc_weight) + 1 loss = loss / normalizer return loss