def compare(bbox, other): means = [] for k, (p1, p2) in enumerate(zip(bbox, other)): exp_p1 = torch.zeros(p1.size(0), 4) exp_p2 = torch.zeros(p2.size(0), 4) exp_p1[:, 2:] = p1 exp_p2[:, 2:] = p2 overlaps = jaccard( point_form(exp_p1), point_form(exp_p2) ) # size [num_p1, num_p2] best_overlap, _ = overlaps.max(1) means += [best_overlap.mean().item()] print("Layer %d avg overlap = %.4f" % (k, means[-1])) print("Mean Avg Overlap = %.4f" % (sum(means) / len(means))) pass
def __call__(self, anchors: torch.Tensor, truths): # filter out too small objects truths_wh = truths[:, 2:] - truths[:, :2] msk = (truths_wh[:, 0] < self.igs) | (truths_wh[:, 1] < self.igs) truths = truths[~msk] truths_wh = truths_wh[~msk] # compute IOUs overlaps = jaccard( truths, point_form(anchors) ) # size [num_truths, num_priors] # [1,num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False) recall_msk = best_prior_overlap <= 0.5 l1 = best_prior_overlap.mean() l2 = (best_prior_overlap[recall_msk]).mean() l3 = (best_prior_overlap ** (1. / 3)).mean() # approx ssd loss loss # diff_wh = anchors[best_prior_idx, 2:] / truths_wh # diff_wh = diff_wh.log().abs() # diff_wh = torch.where(diff_wh < 1., 0.5 * diff_wh ** 2, diff_wh - 0.5) # l4 = diff_wh.sum(dim=1).mean() * self.l # loss = (-(l1.log() + l2.log() + l3.log() * 3) + l4) / 4. loss = -(l1.log() + l2.log() + l3.log() * 3) / 3. if self.decay > 0.01: best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False) l0 = -best_truth_overlap.mean().log() loss = l0 * self.decay + loss * (1. - self.decay) self.decay *= 0.9 return loss
def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)): sigmoid_alphas = params[:, -1].sigmoid() # size [num_priors] priors = torch.cat([locs, params[:, :2]], dim=1) # size [num_priors, 4] with torch.no_grad(): overlaps = jaccard( truths, point_form(priors) ) # size [num_truths, num_priors] # [num_priors] best ground truth for each prior best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False) # [1,num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False) # replace original best truth indexes whose prior boxes are the best priors of given truths best_truth_overlap[best_prior_idx] = best_prior_overlap best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long) # create filter x_filter = torch.zeros(best_truth_overlap.size()) x_filter[best_truth_overlap > self.thresh] = 1. x_filter[best_prior_idx] = self.k # filtering msk = x_filter > 1e-7 x_filter = x_filter[msk] best_truth_overlap = best_truth_overlap[msk] # return loss value return ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum() + self.beta * sigmoid_alphas.sum()) / x_filter.sum()
def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)): sigmoid_alphas = params[:, -1].sigmoid() # size [num_priors] priors = torch.cat([locs, params[:, :2]], dim=1) # size [num_priors, 4] with torch.no_grad(): overlaps = jaccard( truths, point_form(priors) ) # size [num_truths, num_priors] # [num_priors] best ground truth for each prior best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False) # [1,num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False) # replace original best truth indexes whose prior boxes are the best priors of given truths # best_truth_overlap[best_prior_idx] = best_prior_overlap best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long) # filter x_filter = torch.zeros(best_truth_overlap.size()) x_filter[best_truth_overlap > self.thresh] = 1. x_filter[best_prior_idx] = self.k # encode, L1_loss encoded_dis = encode(truths[best_truth_idx], priors, variance) encoded_dis = torch.abs(encoded_dis) l1_tensor = torch.where(encoded_dis < 1., 0.5 * encoded_dis ** 2, encoded_dis - 0.5) l1_tensor = l1_tensor.sum(dim=1) # return loss value return ((sigmoid_alphas * x_filter * l1_tensor).sum() + self.beta * sigmoid_alphas.sum()) / x_filter.sum()
def __gen_priors(params, cfg, ret_point_form=True): image_size = cfg['min_dim'] feature_maps = cfg['feature_maps'] steps = cfg['steps'] mean = [] from itertools import product for k, f in enumerate(feature_maps): mean += [[[] for _ in range(params[k].size(0))]] for i, j in product(range(f), repeat=2): f_k = image_size / steps[k] # unit center x,y cx = (j + 0.5) / f_k cy = (i + 0.5) / f_k for ii, p in enumerate(params[k]): tmp = torch.zeros(4) tmp[0], tmp[1] = cx, cy tmp[2:] = p[:2] mean[k][ii] += [tmp] # back to torch land # output = [torch.stack(o).clamp_(max=1, min=0) for o in mean] if ret_point_form: return [[ point_form(torch.stack(boxes).clamp_(max=1, min=0)) for boxes in layer ] for layer in mean] return [[torch.stack(boxes).clamp_(max=1, min=0) for boxes in layer] for layer in mean]
def match(truths, priors, all_priors, mask_t, idx): iou_map = jaccard(point_form(priors), truths) iou_map_global = jaccard(point_form(all_priors), truths) feature_size = int(iou_map.shape[0]) max_iou, _ = torch.max(iou_map_global, dim=0) mask_per_img = torch.zeros([feature_size], dtype=torch.int64).cuda() for k in range(truths.shape[0]): if torch.sum(truths[k]) == 0.: break #if max_iou[k] < 0.2: # continue max_iou_per_gt = 0.35 #max_iou[k] * 0.5 #mask_per_gt = torch.sum((iou_map[:,k] > max_iou_per_gt).view(feature_size, num_anchors), dim=1) mask_per_gt = iou_map[:, k] > max_iou_per_gt mask_per_gt = mask_per_gt.long() mask_per_img += mask_per_gt mask_per_img = mask_per_img > 0 mask_t[idx] = mask_per_img
def show_priors(background_pth, locs, params, thresh, name='prior boxes', show=True): img = cv2.imread(background_pth) img = cv2.resize(img, (800, 800)) color_red = (0, 0, 255) params = params.detach() _, idx_lst = params[:, -1].sort(descending=True) idx_lst = idx_lst[:thresh] priors = torch.cat([locs[idx_lst], params[idx_lst][:, :2]], dim=1) priors = point_form(priors) priors *= 800. for xx1, yy1, xx2, yy2 in priors: cv2.rectangle(img, (xx1, yy1), (xx2, yy2), color_red, thickness=1) if show: cv2.imshow(name, img) cv2.waitKey(0) cv2.destroyAllWindows() cv2.imwrite('%s.jpg' % name, img) pass
def _analyze(anchs, gts, log=True): _t = AnchorsAnalyzer(point_form(anchs), gts) ret = torch.tensor([ # _t.get_num_anchors(), _t.get_approx_loss(), _t.get_power_mean(1 / 3), _t.get_geometric_mean_iou(), _t.get_mean_best_ious(), _t.get_recall(), _t.get_power_mean(3), _t.get_mean_best_gt_iou() ]) if log: print('num anchors = %.0f' % ret[0].item()) print('approx loss = %.4f' % ret[1].item()) print('mean log = %.4f' % ret[2].item()) print('mean best = %.4f' % ret[3].item()) print('recall = %.4f' % ret[4].item()) print('specialty = %.4f' % ret[5].item()) return ret
def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)): sigmoid_alphas = params[:, -1].sigmoid() # size [num_priors] priors = torch.cat([locs, params[:, :2]], dim=1) # size [num_priors, 4] with torch.no_grad(): overlaps = jaccard( truths, point_form(priors) ) # size [num_truths, num_priors] # [num_priors] best ground truth for each prior best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False) # [1,num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False) # replace original best truth indexes whose prior boxes are the best priors of given truths best_truth_overlap[best_prior_idx] = best_prior_overlap best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long) # create filter x_filter = torch.zeros(best_truth_overlap.size()) x_filter[best_truth_overlap > self.thresh] = 1. x_filter[best_prior_idx] = self.k # filtering msk = x_filter > 1e-7 x_filter = x_filter[msk] best_truth_overlap = best_truth_overlap[msk] # log info aaa = (best_truth_overlap < 1e-7).sum().item() print('%d best truths after filtering' % (x_filter > 1e-4).sum().item()) print('%d best priors, of which %d priors fail to meet iou threshold' % (best_prior_idx.size(0), (best_prior_overlap <= self.thresh).sum().item())) ret = ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum() + self.beta * sigmoid_alphas.sum()) / x_filter.sum() print("loss fn: (%.2f(1st term) + %.2f(2nd term)) / %.2f(3rd term) = %.2f" % ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum().item(), self.beta * sigmoid_alphas.sum().item(), x_filter.sum().item(), ret.item())) # return loss value return ret
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) if targets[0].shape[1] == 6: # mixup weight_t = torch.Tensor(num, num_priors) for idx in range(num): defaults = priors.data if targets[idx].shape[1] == 6: # mixup truths = targets[idx][:, :-2].data labels = targets[idx][:, -2].data weight_loss = targets[idx][:, -1].data match_mixup(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, weight_t, weight_loss, self.giou) elif targets[idx].shape[1] == 5: # no moxiup truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, self.giou) else: print('The shape of targets is error') if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 mix_up = (False, True)[targets[0].shape[1] == 6] pos_weight = None weights_conf = None # Localization Loss (Smooth L1) pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) if self.giou: prior_giou = point_form(priors) # [x,y,h,w]->[x0,y0,x1,y1] prior_giou = prior_giou.unsqueeze(0).expand(num, num_priors, 4) prior_giou = prior_giou[pos_idx].view(-1, 4) reg_loss = GIoUloss() loss_l = reg_loss(loc_p, prior_giou, loc_t) else: if mix_up: weight_t = weight_t.cuda() weight_t = Variable(weight_t, requires_grad=False) pos_weight = weight_t[pos].view(-1, 1) reg_loss = SmoothL1_Mixup_Balance_loss(mixup=mix_up, balance=self.balance_l1, size_average=False) loss_l = reg_loss(loc_p, loc_t, pos_weight) # Confidence Loss if self.sigmoid_focal: # if use original focal loss, please modify the output of the test in models/SSD.py to the sigmoid batch_conf = conf_data.view(-1, self.num_classes) label_onehot = batch_conf.clone().zero_().scatter( 1, conf_t.view(-1, 1), 1) alpha = self.alpha * label_onehot + (1 - self.alpha) * ( 1 - label_onehot) p = torch.sigmoid(batch_conf) pt = torch.where(label_onehot == 1, p, 1 - p) loss_c = -alpha * ((1 - pt)**self.gamma) * torch.log(pt) loss_c = loss_c.sum() num_pos = pos.long().sum(1, keepdim=True) else: batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) if self.label_smooth: p = conf_t.clone().view(-1, 1).float() lp = torch.where(p < 1, p + 1, torch.tensor(self.label_pos).cuda()) label = batch_conf.clone().zero_().scatter_( 1, conf_t.view(-1, 1), lp) label[:, 1:][pos.clone().view(-1, 1).flatten()] += self.label_neg label_ohem = (pos + neg).view(-1, 1).expand_as(batch_conf) targets_weighted = label[label_ohem.gt(0)].view( -1, self.num_classes) else: targets_weighted = conf_t[(pos + neg).gt(0)] if mix_up: weights_conf = weight_t[(pos + neg).gt(0)] weights_conf = torch.where(weights_conf > 0, weights_conf, weights_conf + 1.0).view(-1, 1) conf_loss = Crossentropy_Mixup_SoftmaxFocal_LableSmooth_loss( mixup=mix_up, focal_loss=self.softmax_focal, gamma=2.0, alpha=1.0, label_smooth=self.label_smooth, size_average=False) loss_c = conf_loss(conf_p, targets_weighted, weights_conf) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c