예제 #1
0
    def forward(self, pred, prior_box, targets):
        loc_data, conf_data = pred
        batch_num = loc_data.size(0)
        prior_box_num = (prior_box.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(batch_num, prior_box_num, 4)
        conf_t = torch.LongTensor(batch_num, prior_box_num)
        for idx in range(batch_num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = prior_box.data
            match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx)

        loc_t = loc_t.cuda()
        conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(batch_num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
 def forward(self, predict, truth, weight=None):
     if self.mixup:
         assert predict.shape[0] == truth.shape[0] == weight.shape[0]
     else:
         assert predict.shape[0] == truth.shape[0]
     if self.softmax_focal:
         # using OHEM and focal loss with CE
         soft_score = focal_sum_exp(predict)
         pro = self.alpha * (1 - soft_score)**self.gamma
         cmsloss = (log_sum_exp(predict) - predict.gather(
             1, truth.view(-1, 1))) * pro.gather(1, truth.view(-1, 1))
     elif self.label_smooth:
         cmsloss = (log_sum_exp(predict, label_smooth=True) * truth).sum(
             1, keepdim=True)
     else:
         cmsloss = log_sum_exp(predict) - predict.gather(
             1, truth.view(-1, 1))
     if self.mixup:
         cmsloss = cmsloss * weight
     if self.size_average:
         return cmsloss.mean()
     else:
         return cmsloss.sum()
예제 #3
0
    def run(self, bbox_regressions_list, classifications_list, priors,
            all_priors, targets):

        cls_mask_list = []
        loc_mask_list = []

        for i in range(len(bbox_regressions_list)):
            prior = priors[i].cuda()
            bbox_regression = bbox_regressions_list[i]
            conf_data = classifications_list[i]
            num = bbox_regression.size(0)
            num_priors = int(prior.size(0))

            mask_t = torch.LongTensor(num, num_priors)

            for idx in range(num):
                truths = targets[idx][:, :4].data
                defaults = prior.data
                match(truths, defaults, all_priors.cuda().data, mask_t, idx)
            if GPU:
                mask_t = mask_t.cuda()

            # Compute max conf across batch for hard negative mining
            batch_conf = conf_data.view(-1, self.num_classes)
            loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
                1, mask_t.view(-1, 1))

            zeros = torch.tensor(0).cuda()
            pos = mask_t > zeros

            # Hard Negative Mining
            loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
            loss_c = loss_c.view(num, -1)
            _, loss_idx = loss_c.sort(1, descending=True)
            _, idx_rank = loss_idx.sort(1)

            num_pos = pos.long().sum(1, keepdim=True)
            num_neg = torch.clamp(self.negpos_ratio * num_pos,
                                  max=pos.size(1) - 1)
            neg = idx_rank < num_neg.expand_as(idx_rank)

            cls_mask = torch.sum(
                (pos + neg).view(num, -1, self.num_anchors), 2) > 0
            loc_mask = torch.sum(pos.view(num, -1, self.num_anchors), 2) > 0

            cls_mask_list.append(cls_mask.detach())
            loc_mask_list.append(loc_mask.detach())

        return loc_mask_list, cls_mask_list
예제 #4
0
    def forward(self, predictions, targets):
        
        loc_data, conf_data = predictions
        num_images = loc_data.size(0)
        num_priors = (self.priors.size(0))

        loc_t = torch.Tensor(num_images, num_priors, 4)
        conf_t = torch.LongTensor(num_images, num_priors)
        priors = self.priors.data
        for idx in range(num_images):
            gt_boxes = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            loc, conf = boxlib.match(gt_boxes, priors, labels, self.args.overlap_th, self.args.variance)
            loc_t[idx], conf_t[idx] = loc, conf
        if self.args.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        
        # calculate location loss
        pos = conf_t > 0
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)  # (n_images, n_priors, 4)
        loc_p   = loc_data[pos_idx].view(-1, 4)
        loc_t   = loc_t[pos_idx].view(-1, 4)
        loss_l  = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # (cx, cy, w, h)

        # hard negative mining
        batch_conf = conf_data.view(-1, self.args.num_classes) # conf_data.shape = (n_images, n_priors, 2)
        loss_c = boxlib.log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # calculate the -log(softmax()) = - x_y + log_sum_exp 
        loss_c[pos.view(-1, 1)] = 0
        loss_c = loss_c.view(num_images, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.args.np_ratio * num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        
        scores = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.args.num_classes)
        np_gty = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(scores, np_gty, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + α * Lloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N

        return loss_l, loss_c
예제 #5
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
 
            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
 
        #import pdb; pdb.set_trace()
        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
 
        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :4].data
            labels = targets[idx][:, -1].data
            landms = targets[idx][:, 4:14].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()
        zeros = torch.tensor(0).cuda()
        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros
        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        #import pdb
        #pdb.set_trace()
        landm_p = landm_data[pos_idx1].view(-1, 10)[:,:4]
        landm_t1 = landm_t[pos_idx1].view(-1, 10)[:,:4]
 
        #s1 = torch.ones(1,2)
        #s2 = torch.ones(1,4)*3
        #s = torch.cat([s1,s2],dim=-1).cuda()
 
 
        loss_landm = self.wingloss(landm_p, landm_t1)
        #loss_landm = F.smooth_l1_loss(landm_p, landm_t1, reduction='sum')
 
 
        one = torch.tensor(1).cuda()
        pos_mafa = conf_t == one
        num_pos_landm2 = pos_mafa.long().sum(1, keepdim=True)
        N2 = max(num_pos_landm2.data.sum().float(), 1)
        pos_idx2 = pos_mafa.unsqueeze(pos_mafa.dim()).expand_as(landm_data)
 
        landm_p_mafa = landm_data[pos_idx2].view(-1, 10)[:,4:]
        landm_t1_mafa = landm_t[pos_idx2].view(-1, 10)[:,4:]
 
        s1 = torch.ones(1,2)
        s2 = torch.ones(1,4)*3
        s = torch.cat([s1,s2],dim=-1).cuda()
 
        loss_landm_mafa = self.wingloss(landm_p_mafa*s,landm_t1_mafa*s)
        #loss_landm_mafa = F.smooth_l1_loss(landm_p_mafa*s,landm_t1_mafa*s,reduction='sum')
        #loss_landm = self.wingloss(landm_p*s, landm_t*s)
        #loss_landm = self.adaptivewingloss(landm_p, landm_t)
        pos = conf_t != zeros
        conf_t[pos] = 1
 
        # eye landmark loss
        #pos2 = pos.unsqueeze(pos.dim()).expand_as(landm_data)
        #lm_eye_p = landm_data[pos2].view(-1, 10)[:,:4]
        #lm_eye_t = landm_t[pos2].view(-1, 10)[:,:4]
 
        #loss_landm_eye = F.smooth_l1_loss(lm_eye_p, lm_eye_t, reduction='sum')
 
        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
 
        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
 
        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
 
        one = torch.tensor(1).cuda()
 
        #import pdb
        #pdb.set_trace()
        tmp = torch.where(targets_weighted==one,torch.tensor(0.1).cuda(),torch.tensor(0.0).cuda())
        conf_p[:,1] = conf_p[:,1] - tmp
 
 
 
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
 
        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1
        loss_landm_mafa /= N2
        loss_lm = loss_landm + loss_landm_mafa
        return loss_l, loss_c, loss_lm
예제 #6
0
    def forward(self, predictions, targets, big_ssd_preds=None, distill_mask=None):
        loc_data, conf_data, priors = predictions
        if self._enable_distill:
            assert big_ssd_preds is not None
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.zeros(num, num_priors, 4)
        conf_t = torch.zeros(num, num_priors, dtype=torch.long)

        best_priors_msk = []
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            if self.use_half:
                truths = truths.half()
            defaults = priors.data
            pmsk = match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx)
            best_priors_msk.append(pmsk)
        best_priors_msk = torch.stack(best_priors_msk)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t.requires_grad = False
        conf_t.requires_grad = False

        pos = conf_t > 0

        assert (pos & best_priors_msk == best_priors_msk).min().item() == 1
        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        loc_p = loc_data[pos].view(-1, 4)
        loc_t = loc_t[pos].view(-1, 4)
        pos_idx_l = pos
        msk = best_priors_msk[pos]
        loss_l = F.smooth_l1_loss(loc_p[~msk], loc_t[~msk], reduction='sum')
        loss_l += self.bpw * F.smooth_l1_loss(loc_p[msk], loc_t[msk], reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c = loss_c.view(pos.size(0), pos.size(1))
        loss_c[pos] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        chosen_idx = pos | neg
        conf_p = conf_data[chosen_idx].view(-1, self.num_classes)
        targets_weighted = conf_t[pos | neg]
        msk = best_priors_msk[pos | neg]
        loss_c = F.cross_entropy(conf_p[~msk], targets_weighted[~msk], reduction='sum')
        loss_c += self.bpw * F.cross_entropy(conf_p[msk], targets_weighted[msk], reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = pos.sum() + best_priors_msk.sum() * (self.bpw - 1.)
        loss_l /= N
        loss_c /= N
        if self._enable_distill:
            big_loc_data, big_conf_data, _ = big_ssd_preds
            inv_temperature = 1 / 1.
            # 大网络和小网络的 prior boxes 数量不同。vgg-lite小网络没有对 38 * 38 的识别。
            if distill_mask is not None:
                big_loc_data = big_loc_data[:, distill_mask]
                big_conf_data = big_conf_data[:, distill_mask]
            big_conf_p = big_conf_data[chosen_idx].view(-1, self.num_classes)
            y_softmax = F.log_softmax(conf_p * inv_temperature, dim=1)
            y_big_softmax = F.softmax(big_conf_p, dim=1)

            big_loc_p = big_loc_data[pos_idx_l].view(-1, 4)
            # same as loss_c and loss_l
            loss_c_distill = -(y_big_softmax * y_softmax).sum(dim=1).sum()
            loss_l_distill = F.smooth_l1_loss(loc_p, big_loc_p, reduction='sum')
            loss_c_distill /= N
            loss_l_distill /= N
            return loss_l, loss_c, loss_c_distill, loss_l_distill
        return loss_l, loss_c
예제 #7
0
    def forward(self, predictions, targets):
        #--------------------------------------------------#
        #   取出预测结果的三个值:回归信息,置信度,先验框
        #--------------------------------------------------#
        loc_data, conf_data, priors = predictions
        #--------------------------------------------------#
        #   计算出batch_size和先验框的数量
        #--------------------------------------------------#
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        #--------------------------------------------------#
        #   创建一个tensor进行处理
        #--------------------------------------------------#
        loc_t = torch.zeros(num, num_priors, 4).type(torch.FloatTensor)
        conf_t = torch.zeros(num, num_priors).long()

        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            priors = priors.cuda()

        for idx in range(num):
            # 获得真实框与标签
            truths = targets[idx][:, :-1]
            labels = targets[idx][:, -1]

            if (len(truths) == 0):
                continue

            # 获得先验框
            defaults = priors
            #--------------------------------------------------#
            #   利用真实框和先验框进行匹配。
            #   如果真实框和先验框的重合度较高,则认为匹配上了。
            #   该先验框用于负责检测出该真实框。
            #--------------------------------------------------#
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)

        #--------------------------------------------------#
        #   转化成Variable
        #   loc_t   (num, num_priors, 4)
        #   conf_t  (num, num_priors)
        #--------------------------------------------------#
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        # 所有conf_t>0的地方,代表内部包含物体
        pos = conf_t > 0

        #--------------------------------------------------#
        #   求和得到每一个图片内部有多少正样本
        #   num_pos  (num, )
        #--------------------------------------------------#
        num_pos = pos.sum(dim=1, keepdim=True)

        #--------------------------------------------------#
        #   取出所有的正样本,并计算loss
        #   pos_idx (num, num_priors, 4)
        #--------------------------------------------------#
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)

        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
        #--------------------------------------------------#
        #   batch_conf  (num * num_priors, num_classes)
        #   loss_c      (num, num_priors)
        #--------------------------------------------------#
        batch_conf = conf_data.view(-1, self.num_classes)
        # 这个地方是在寻找难分类的先验框
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        loss_c = loss_c.view(num, -1)

        # 难分类的先验框不把正样本考虑进去,只考虑难分类的负样本
        loss_c[pos] = 0
        #--------------------------------------------------#
        #   loss_idx    (num, num_priors)
        #   idx_rank    (num, num_priors)
        #--------------------------------------------------#
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        #--------------------------------------------------#
        #   求和得到每一个图片内部有多少正样本
        #   num_pos     (num, )
        #   neg         (num, num_priors)
        #--------------------------------------------------#
        num_pos = pos.long().sum(1, keepdim=True)
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        num_neg[num_neg.eq(0)] = self.negatives_for_hard
        neg = idx_rank < num_neg.expand_as(idx_rank)

        #--------------------------------------------------#
        #   求和得到每一个图片内部有多少正样本
        #   pos_idx   (num, num_priors, num_classes)
        #   neg_idx   (num, num_priors, num_classes)
        #--------------------------------------------------#
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)

        # 选取出用于训练的正样本与负样本,计算loss
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        N = torch.max(num_pos.data.sum(), torch.ones_like(num_pos.data.sum()))
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
예제 #8
0
    def forward(self, predictions, priors, targets, conf_target_stand_dist):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                bin_conf: torch.size(batch_size,num_priors,2)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, bin_conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        bin_conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match_3_terms(self.threshold, truths, defaults, self.variance,
                          labels, loc_t, conf_t, bin_conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            bin_conf_t = bin_conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)
        bin_conf_t = Variable(bin_conf_t, requires_grad=False)

        pos = bin_conf_t > 0
        fake_pos = pos + (conf_t < 0)
        conf_t[conf_t < 0] = 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max binary_conf across batch for hard negative mining
        batch_bin_conf = bin_conf_data.view(-1, 2)
        loss_bin = log_sum_exp(batch_bin_conf) - batch_bin_conf.gather(
            1, bin_conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_bin[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        #loss_bin[fake_pos.view(-1, 1)] = 0  # filter out fake pos boxes for now
        loss_bin = loss_bin.view(num, -1)
        _, loss_idx = loss_bin.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        neg_binary = neg
        '''
        loss_bin = loss_bin.view(-1,1)
        loss_bin[fake_pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_bin = loss_bin.view(num, -1)
        _, loss_idx = loss_bin.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        # neg_binary = neg
        '''
        # Binary confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(bin_conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(bin_conf_data)
        bin_conf_p = bin_conf_data[(pos_idx + neg_idx).gt(0)].view(-1, 2)
        targets_weighted = bin_conf_t[(pos + neg).gt(0)]
        loss_bin = F.cross_entropy(bin_conf_p,
                                   targets_weighted,
                                   reduction='sum')

        # Compute max conf across batch for hard negative mining
        # Confidence Loss Including Positive and Negative Examples
        # the version 0922 that I use the neg samples from binary classification as the neg for multi-classification
        '''
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
        '''
        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        #########################################################################
        # used fake_pos or not

        # Hard Negative Mining
        #loss_c[fake_pos.view(-1, 1)] = 0  # filter out fake pos boxes for now
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        neg_multi = neg
        '''
        loss_c = loss_c.view(-1, 1)
        loss_c[fake_pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        #neg_multi = neg
        '''
        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]

        #soft_label_val =smooth_label(targets_weighted, self.num_classes, eps=0.001)
        #loss_c = crossentropy(conf_p, soft_label_val)
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # for dist loss
        conf_t_new = conf_t.view(-1)
        nonzero_index = torch.nonzero(conf_t_new)
        conf_t_new = conf_t_new[nonzero_index]
        conf_target_stand_dist = conf_target_stand_dist.view(
            -1, self.num_classes)
        pos_conf_target_stand_dist = conf_target_stand_dist[
            nonzero_index].squeeze(1)
        pos_dist = torch.gather(pos_conf_target_stand_dist,
                                dim=1,
                                index=conf_t_new)

        neg_nonzero_index = torch.nonzero(neg.view(-1))
        neg_conf_target_stand_dist = conf_target_stand_dist[
            neg_nonzero_index].squeeze(1)
        neg_dist = neg_conf_target_stand_dist[:, 0]

        dist_loss = -pos_dist.sum() + 1 / 5 * neg_dist.sum()

        # weight
        '''
        conf_p_t = conf_data[(pos_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos).gt(0)]
        loss_c = F.cross_entropy(conf_p_t, targets_weighted, reduction='sum')

        conf_p_f = conf_data[(neg_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(neg).gt(0)]
        loss_c = loss_c + F.cross_entropy(conf_p_f, targets_weighted, reduction='sum') * 0.2
        '''
        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g) + \beta Lbinconf(x, c)) / N

        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_bin /= N
        dist_loss /= N
        return loss_l, loss_c, loss_bin, dist_loss, pos, neg_binary, neg_multi
예제 #9
0
    def forward(self, predictions, targets):
        # 回归信息,置信度,先验框
        loc_data, conf_data, priors = predictions
        # 计算出batch_size
        num = loc_data.size(0)
        # 取出所有的先验框
        priors = priors[:loc_data.size(1), :]
        # 先验框的数量
        num_priors = (priors.size(0))
        num_classes = self.num_classes
        # 创建一个tensor进行处理
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            # 获得框
            truths = targets[idx][:, :-1].data
            # 获得标签
            labels = targets[idx][:, -1].data
            # 获得先验框
            defaults = priors.data
            # 找到标签对应的先验框
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()

        # 转化成Variable
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        # 所有conf_t>0的地方,代表内部包含物体
        pos = conf_t > 0
        # 求和得到每一个图片内部有多少正样本
        num_pos = pos.sum(dim=1, keepdim=True)
        # 计算回归loss
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # 转化形式
        batch_conf = conf_data.view(-1, self.num_classes)
        # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法
        # 获得每个框预测到真实框的类的概率
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        loss_c = loss_c.view(num, -1)

        loss_c[pos] = 0
        # 获得每一张图新的softmax的结果
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # 计算每一张图的正样本数量
        num_pos = pos.long().sum(1, keepdim=True)
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # 计算正样本的loss和负样本的loss
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
예제 #10
0
    def forward(self, predictions, priors, targets, teacher_data):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_teacher, conf_teacher = teacher_data
        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :4].data
            labels = targets[idx][:, -1].data
            landms = targets[idx][:, 4:14].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  landms, loc_t, conf_t, landm_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()

        zeros = torch.tensor(0).cuda()
        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros
        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)

        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loc_t2 = loc_teacher[pos_idx].view(-1, 4)
        loss_fn = torch.nn.MSELoss(reduction='sum')
        loss1 = loss_fn(loc_p, loc_t)
        loss2 = loss_fn(loc_t2, loc_t)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
        if loss1 > loss2:
            loss_l = loss_l + self.r_weight * loss1

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        m = nn.Softmax(dim=1)
        loss_fn2 = nn.BCEWithLogitsLoss(reduction='sum')
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        conf_t2 = conf_teacher[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_soft = loss_fn2(m(conf_p), m(conf_t2))
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        loss_c = loss_c + self.c_weight * loss_soft
        # import ipdb;
        # ipdb.set_trace()
        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1

        return loss_l, loss_c, loss_landm
예제 #11
0
    def forward(self, predictions, priors, targets, pass_index=None):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
                pre_conf is used to use Early reject or not

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0)) if priors.dim() == 2 else priors.size(1)
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)

        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            if priors.dim() == 3:
                defaults = priors.data[idx, :, :]
            else:
                defaults = priors.data
            # if pass_index is not None:
            #     defaults = defaults[pass_index_data[idx].unsqueeze(1).expand_as(defaults)].view(-1, num)

            # if defaults.shape[0] != 6375:
            #     print('ERROR')
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        if pass_index is not None:
            pass_index_data = pass_index.data
            loc_t = loc_t[pass_index_data.unsqueeze(2).expand_as(loc_t)].view(
                -1, 4)
            conf_t1 = conf_t[pass_index_data]
            loc_data = loc_data[pass_index_data.unsqueeze(2).expand_as(
                loc_data)].view(-1, 4)
            print(conf_t1.shape[0] / num)
            # conf_data1 = conf_data[pass_index_data.unsqueeze(2).expand_as(conf_data)].view(-1, self.num_classes)

        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        if pass_index is not None:
            conf_t1 = Variable(conf_t1, requires_grad=False)
            pos = conf_t1 > 0
            pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
            loc_p = loc_data[pos_idx].view(-1, 4)
            loc_t = loc_t[pos_idx].view(-1, 4)
            loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
        else:
            pos = conf_t > 0
            pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
            loc_p = loc_data[pos_idx].view(-1, 4)
            loc_t = loc_t[pos_idx].view(-1, 4)
            loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        pos = conf_t > 0

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # ER
        if pass_index is None:
            x_max = batch_conf.data.max()
            temp = torch.exp(batch_conf[:, 0] - x_max) / torch.sum(
                torch.exp(batch_conf - x_max), 1)
            # print(temp.data.max())
            temp = temp < 0.99
            temp_idx = temp.view(num, -1)

        # Hard Negative Mining
        loss_c[pos.view(-1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        if pass_index is not None:
            loss_c[1 - pass_index_data] = 0
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N

        if pass_index is None:
            index = (pos + temp_idx).gt(0)
        else:
            index = None
        return loss_l, loss_c, index
예제 #12
0
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t,requires_grad=False)

        pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1,4)
        loc_t = loc_t[pos_idx].view(-1,4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1,self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))

        # Hard Negative Mining
        loss_c[pos.view(-1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _,loss_idx = loss_c.sort(1, descending=True)
        _,idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1,keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, loc_five_data = predictions
        # print('loc_data shape:', loc_data.shape, '\n loc_five_data:', loc_five_data.shape)
        priors = priors
        num = loc_data.size(0)  #  batch size
        num_priors = (
            priors.size(0)
        )  # [21824, 4]  total number of r anchor in one img if img.shape = (1024, 1024)

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(
            num,
            num_priors)  # LongTensor , class label for priors box match the gt
        loc_five = torch.Tensor(num, num_priors, 10)  # for 5 location points
        # print("before:", loc_t,conf_t,loc_five)
        for idx in range(num):
            # truths = targets[idx][:, :-1].data    # bbox, x1, y1, x2, y2: 0-1
            truths = targets[idx][:, :4].data  # bbox, x1, y1, x2, y2: 0-1
            truths_five = targets[idx][:, 5:].data  # coords 10
            labels = targets[idx][:, 4].data
            defaults = priors.data
            # match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx)    # key important
            match_(self.threshold, truths, truths_five, defaults,
                   self.variance, labels, loc_t, conf_t, loc_five,
                   idx)  # key important
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            loc_five = loc_five.cuda()
        # loc_t is changed when math done!!!!
        # should loc_five change same time!!!!
        pos = conf_t > 0  # only optimizer positive anchors?
        print("pos conf_t:", conf_t.shape, pos.shape,
              pos.sum())  # why all of pos is < 0????
        # print("after:",loc_t,conf_t, loc_five)
        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        # print("pos_idx shape before:", pos.unsqueeze(pos.dim()).shape)
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(
            loc_data)  # think more time
        pos_idx_five = pos.unsqueeze(pos.dim()).expand_as(loc_five_data)
        # print("pos_idx shape after:", pos_idx.shape)
        loc_p = loc_data[pos_idx].view(-1,
                                       4)  # choose positive loc_p from pred
        # print("loc_p:", loc_p)    # is empty....
        loc_t = loc_t[pos_idx].view(
            -1, 4)  # get correspond loc_t to loc_p which have matched
        loc_five = loc_five[pos_idx_five].view(-1, 10)
        loc_f = loc_five_data[pos_idx_five].view(-1, 10)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
        # print("loss_l:",loc_p, loc_t)
        loss_coords = F.mse_loss(loc_five, loc_f, reduction='sum')
        # print("loss_coords:",loc_five, loc_f, loss_coords)

        # compute five key point loss  :
        # change the targets [4, 1, 10], predictions is three: loc_data, conf_data, loc_data_pair

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        # print("sum of neg:", neg.sum(), '\n', "sum of pos:", pos.sum())    # may zero !!!!!!!!

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        # print("???????", conf_data.shape, '\n', conf_p, conf_p.shape)
        # conf_p may empty!!!!!!!!!!
        if conf_p.shape[0] == 0:
            print(pos_idx.shape, neg_idx.shape, conf_p)
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
        # print("?XX"*3)
        """
        try:
            loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
        except:
            print("wwwwwww", targets_weighted.max())    # may is -9223372036854775808  ???
        
        """

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + ¦ÁLloc(x,l,g) + betaLloc_f(x,l,g_f)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_coords /= N

        return loss_l, loss_c, loss_coords
예제 #14
0
    def forward(self, preds, targets):
        loc_data, conf_data, priors = preds
        bs = loc_data.size(0)  # batch size
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(bs, num_priors, 4)
        conf_t = torch.LongTensor(bs, num_priors)
        for idx in range(bs):
            truths = targets[idx][:, :-1].data
            label = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, label, defaults, self.variance,
                  loc_t, conf_t, idx)

        loc_t = loc_t.to(self.device)
        conf_t = conf_t.to(self.device)
        pos = conf_t > 0
        num_pos = pos.sum(dim=1, keepdim=True)

        # Localization Loss (Smooth L1)
        # Shape: [batch, num_priors, 4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        # if diff both classes is small, the loss_c will be large
        batch_conf = conf_data.view(-1, num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c = loss_c.view(bs, -1)
        loss_c[pos] = 0  # filter out pos boxes for now
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(
            1)  # idx is more small,  loss is more large
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p,
                                 targets_weighted,
                                 weight=self.weight,
                                 reduction='sum')

        # sum of losses: L(x, c, l, g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N

        return loss_l, loss_c
예제 #15
0
    def forward(self, predictions, targets):
        # 回归信息,置信度,先验框
        loc_data, conf_data, priors = predictions
        # loc_data torch.Size([4, 8732, 4])
        # conf_data torch.Size([4, 8732, 3])
        # priors torch.Size([8732, 4])
        # 计算出batch_size
        num = loc_data.size(0)  # 4 batchsize
        # 取出所有的先验框
        priors = priors[:loc_data.size(1), :]  # torch.Size([8732, 4])
        # 先验框的数量
        num_priors = (priors.size(0))  # 8732
        # 创建一个tensor进行处理
        loc_t = torch.Tensor(num, num_priors, 4)  # torch.Size([4, 8732, 4])
        conf_t = torch.LongTensor(num, num_priors)  # torch.Size([4, 8732])

        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            priors = priors.cuda()

        for idx in range(num):  # 这是batchsize的长度
            # 获得框
            truths = targets[idx][:, :-1]  # 单个图片中所有物体的位置信息 torch.Size([1, 4])
            # 获得标签
            labels = targets[idx][:, -1]  # 单个图片的所有物体的类别信息 torch.Size([1])
            # 获得先验框
            defaults = priors  # torch.Size([8732, 4])
            # 找到标签对应的先验框
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        # 转化成Variable
        # 不清楚这两行代码作用,注释掉仍然能运行
        loc_t = Variable(loc_t,
                         requires_grad=False)  # torch.Size([4, 8732, 4])
        conf_t = Variable(conf_t, requires_grad=False)  # torch.Size([4, 8732])

        # 所有conf_t>0的地方,代表内部包含物体
        pos = conf_t > 0  # torch.Size([4, 8732])
        # 求和得到每一个图片内部有多少正样本
        num_pos = pos.sum(dim=1, keepdim=True)  # torch.Size([4, 1])
        # 计算回归loss
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(
            loc_data)  # torch.Size([4, 8732, 4])
        loc_p = loc_data[pos_idx].view(-1,
                                       4)  # torch.Size([44, 4])  # 预测框的位置信息
        loc_t = loc_t[pos_idx].view(-1,
                                    4)  # torch.Size([44, 4])     # 真值框的位置信息
        # loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
        loss_l = F.smooth_l1_loss(loc_p, loc_t,
                                  reduction='sum')  # 建议使用这行代码,上一行已废弃

        # 转化形式
        batch_conf = conf_data.view(-1,
                                    self.num_classes)  # torch.Size([34928, 3])
        # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法
        # 获得每个框预测到真实框的类的概率
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))  # torch.Size([34928, 1])
        loss_c = loss_c.view(num, -1)  # torch.Size([4, 8732])

        loss_c[pos] = 0
        # 获得每一张图新的softmax的结果
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # 计算每一张图的正样本数量
        num_pos = pos.long().sum(1, keepdim=True)  # torch.Size([4, 1])
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio * num_pos,
                              max=pos.size(1) - 1)  # torch.Size([4, 1])
        neg = idx_rank < num_neg.expand_as(idx_rank)  # torch.Size([4, 8732])
        # 筛选出前self.negpos_ratio*num_pos个样本

        # 计算正样本的loss和负样本的loss
        pos_idx = pos.unsqueeze(2).expand_as(
            conf_data)  # torch.Size([4, 8732, 3])
        neg_idx = neg.unsqueeze(2).expand_as(
            conf_data)  # torch.Size([4, 8732, 3])
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)  # torch.Size([176, 3])
        targets_weighted = conf_t[(pos + neg).gt(0)]  # torch.Size([176])
        # loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)  # torch.Size([])
        loss_c = F.cross_entropy(conf_p, targets_weighted,
                                 reduction='sum')  # 建议使用这行代码,上一行已废弃
        # torch.Size([176, 3])
        # torch.Size([176])

        N = num_pos.data.sum()  # tensor(44, device='cuda:0')
        # N = num_pos.sum()  # tensor(44, device='cuda:0')
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions # (batch_size,num_priors,4) / (batch_size,num_priors,num_classes)
        priors = priors # (num_priors,4)
        num = loc_data.size(0) # batch_size
        num_priors = (priors.size(0)) # num_priors
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes,
        #将batch_size内每张图像中pred bbox与gt bbox做匹配,进一步计算loss
        loc_t = torch.Tensor(num, num_priors, 4) #gt_loc定义好
        conf_t = torch.LongTensor(num, num_priors) #gt_conf定义好
        for idx in range(num): # 在batch_size中遍历每张图
            truths = targets[idx][:,:-1].data # 前4个参数是gt bbox
            labels = targets[idx][:,-1].data # 最后1个参数是gt label
            defaults = priors.data # 这个defaults定义很好,就是SSD中预定义的default box
            # match函数在box_utils.py里,返回的是conf_t + loc_t,对应gt_cls,gt_offsets
            match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
            # match这个函数给每个ground truth匹配了最好的priors,给每个priors匹配最好的ground truth
            # 经过encode后的offset([g_cxcy, g_wh])->loc_t,top class label for each prior->conf_t
        if GPU:
            loc_t = loc_t.cuda()#刚刚返回的gt的位置loc_t
            conf_t = conf_t.cuda()#刚刚返回的gt的类别conf_t
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t,requires_grad=False)

        pos = conf_t > 0 # 从gt里取出正样本,正样本才需要计算loc loss

        # Localization Loss (Smooth L1)
        # loc Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) # 相当于取出所有正样本对应的index位置
        loc_p = loc_data[pos_idx].view(-1,4) #取出预测结果里的正样本pred bbox,结合RFB_Net_vgg.py和detection.py,可以发现其实预测的也是offsets
        # 也就是保留与计算出来的positive的default box所对应的卷积生成的encode offset(相当于预测的)
        loc_t = loc_t[pos_idx].view(-1,4) # gt offsets也reshape
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Localization Loss (Smooth L1)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1,self.num_classes) # batch_size内所有pred bbox进行reshape操作
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) #把conf_t变为和batch_conf相同shape,正负样本分类loss

        # Hard Negative Mining,仅筛选难负样本计算loss
        loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now,OHEM操作不考虑正样本,仅在负样本上操作
        loss_c = loss_c.view(num, -1) # 按图像归类各个负样本
        _,loss_idx = loss_c.sort(1, descending=True) # loss降序排序,那么仅需要选择前面的高loss即可
        _,idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1,keepdim=True)  # 正样本数量
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) # 由正样本数量按1:3比例得出需保留的难负样本数量
        neg = idx_rank < num_neg.expand_as(idx_rank) # 结合_,idx_rank = loss_idx.sort(1)理解,为了取出难neg pred bbox

        # Confidence Loss Including Positive and Negative Examples最终只有难负样本loss + 正样本loss参与模型参数的更新
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) # pred bbox的预测结果,经过难负样本挖掘后留存下来的
        targets_weighted = conf_t[(pos+neg).gt(0)] # 剩余需要计算cls gt label,包含了正负样本的gt label
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # 分类的交叉熵损失函数

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = max(num_pos.data.sum().float(), 1) # N: number of matched default boxes
        loss_l/=N
        loss_c/=N
        return loss_l,loss_c
예제 #17
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
    Args:
        predictions (tuple): A tuple containing loc preds, conf preds,
        and prior boxes from SSD net.
            conf shape: torch.size(batch_size,num_priors,num_classes)
            loc shape: torch.size(batch_size,num_priors,4)
            priors shape: torch.size(num_priors,4)

        ground_truth (tensor): Ground truth boxes and labels for a batch,
            shape: [batch_size,num_objs,5] (last idx is the label).
    """

        loc_data, conf_data = predictions  # loc_data shape: tensor.Size(64, 21824, 4)
        #                                   # conf_data shape: torch.Size([64, 21824, 2])
        priors = priors  # priors shape: torch.Size([21824, 4])
        # priors: tensor([[0.x, 0.x, 0.x, 0.x], [0.x, 0.x, 0.x, 0.x], ...])
        num = loc_data.size(0)  # num: 64, this is batch size
        num_priors = (priors.size(0)
                      )  # num_priors: 21824, total number of anchors

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors,
                             4)  # loc_t: torch.Size([64, 21824, 4])
        conf_t = torch.LongTensor(
            num, num_priors)  # conf_t: torch.Size([64, 21824])
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data

            # threshold: 0.35
            # variance: [0.1, 0.2]
            #  idx : 0, 1, ...., or 63 which image
            # loc_t: [64, 21824, 4]
            # conf_t: [64, 21824, 2]
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()

        # conf[best_truth_overlap < threshold] = 0
        #                   dim = 21824, which is also the prior number
        # conf_t: tensor([[0, 0, ....],
        #                  [0, 0, 0, ....]
        #                  ...])
        # conf_t.shape: torch.Size([64, 21824])
        # loc_t     torch.Size([64, 21824, 4])
        pos = conf_t > 0  # torch.Size(64, 21824)
        # pos: tensor([[False, False, ...],     num = 64
        #             [False, False, ...]]),  # almost  all false

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        """ here, loc_data = torch.Size([645, 21824, 4]) """
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(
            loc_data)  # torch.Size([64, 21824, 4])
        # pos_idx: tensor([[[False, False, False, False]]])
        loc_p = loc_data[pos_idx].view(-1, 4)
        # loc_p: positive predicted sample (prior)s location, tensor([[1.074, -0.836, -0.934, 0.414]])
        # loc_p.shape: torch.Size([1186, 4]), torch.Size([num of True, 4])
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
        """ now we are dueling with classes """
        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        # conf_data.shape: torch.Sie([64, 21824, 2])
        # batch_conf.shape: torch.Size(64x21824=1396736, 2)
        # batch_conf
        # tensor([[0.0473, -0.1172], [0.1001, 0.2789], ...])
        # conf_t.shape: torch.Size([64, 21824]),
        # conf_t: almost all 0
        #
        # log_sum_exp: log(softmax(batch_conf))
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive  and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N

        return loss_l, loss_c
    def forward(self, predictions, targets):
        # 回归信息,置信度,先验框
        loc_data, conf_data, priors = predictions
        # 计算出batch_size
        num = loc_data.size(0)
        # 取出所有的先验框
        priors = priors[:loc_data.size(1), :]
        # 先验框的数量
        num_priors = (priors.size(0))
        num_classes = self.num_classes
        # 创建一个tensor进行处理
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            # 获得框
            truths = targets[idx][:, :-1].data
            # 获得标签
            labels = targets[idx][:, -1].data
            # 获得先验框
            defaults = priors.data
            # 找到标签对应的先验框 !!!
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        # if self.use_gpu:
        #     loc_t = loc_t.cuda()
        #     conf_t = conf_t.cuda()

        # 所有conf_t>0的地方,代表内部包含物体
        pos = conf_t > 0
        # 求和得到每一个图片内部有多少正样本
        num_pos = pos.sum(dim=1, keepdim=True)

        # 回归
        # 计算回归loss,使用正样本的先验框进行计算,背景框无意义
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # 计算置信度loss,分类
        # 转化形式
        batch_conf = conf_data.view(-1, self.num_classes)
        # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法
        # 获得每个框预测到真实框的类的概率
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        # 对预测结果分布进行修改 批量归一化
        loss_c = loss_c.view(num, -1)

        loss_c[pos] = 0
        # 获得每一张图新的softmax的结果
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # 计算每一张图的正样本数量
        num_pos = pos.long().sum(1, keepdim=True)
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # 计算正样本的loss和负样本的loss
        # 平衡正负样本1:3 8732个框 可能只有十几个正样本
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        # loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) size_average (bool, optional) – 默认情况下,是mini-batchloss的平均值,然而,如果size_average=False,则是mini-batchloss的总和
        loss_c = F.cross_entropy(conf_p, targets_weighted,
                                 reduction='sum')  #改版后使用sum

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
예제 #19
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        # predictions.shape == ([32, 16800, 4]), ([32, 16800, 2]), ([32, 16800, 10])
        # priors.shape == [16800, 4]
        # len(targets) == 32, targets[0].shape == [num_of_faces_in_this_image, 15], 15 = 4(bbox) + 5*2(lmk) + 1(have_lmk)

        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0) # 32 batch
        num_priors = (priors.size(0)) # 16800 anchor

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :4].data # bbox
            labels = targets[idx][:, -1].data # have_landmark
            landms = targets[idx][:, 4:14].data # everyone's lmk
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()
        # print(loc_t.shape) # [32, 16800, 4]
        # print(conf_t.shape) # [32, 16800]
        # print(landm_t.shape) # [32, 16800, 10]
        # exit()




        # LANDMARK LOSS

        zeros = torch.tensor(0).cuda()
        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros
        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        # print(landm_p.shape) # [xxxx,10]
        landm_t = landm_t[pos_idx1].view(-1, 10)
        # print(landm_t.shape) # [xxxx,10]
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
        # print(loss_landm) # xxxxx.xxxx

        

        # LOCALIZATION LOSS

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')





        # CLASSIFICATION LOSS

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1

        return loss_l, loss_c, loss_landm
예제 #20
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        #[ batch_size,num_priors,4 ]
        #[batch_size,num_priors,21]
        #[batch_size,num_priors,2]
        loc_data, conf_data, obj_data = predictions
        priors = priors
        #bath_size
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        obj_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
            obj_t = conf_t.clone()
            obj_t[conf_t > 0] = 1
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            obj_t = obj_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)
        obj_t = Variable(obj_t, requires_grad=False)

        obj_conf_data = obj_data[:, :, 1].detach()
        #print(obj_data.requires_grad)
        #print(obj_conf_data.requires_grad)
        pos = conf_t > 0
        #print(type(pos))
        neg_positive = obj_conf_data < self.obj_score
        #print(type(neg_positive))
        neg_positive = (pos + neg_positive) > 2
        pos = pos - neg_positive
        #print(pos.type())
        #byte tensor
        # for pose conf_t  > 0 ---> 1
        # for neg_positive  conf < obj_score --> 1
        # 1 1 -> 0  # focus
        # 1 0 -> 1
        # 0 1 -> 0
        # 0 0 -> 0
        #print(type(pos))

        #pos = ( conf_t > 0 ) - (obj_conf_data <= self.obj_score)
        #pos[ (obj_conf_data < self.obj_score).detach()] = 0

        if pos.data.long().sum() == 0:
            pos = conf_t > 0

        #print('conf_t shape:'+str(conf_t.shape))
        #print('conf_t >0 shape:'+str( (conf_t>0).sum()))
        #print('obj_t > obj_score'+str( (obj_t > self.obj_score).sum() ))
        #print('pos shape:'+str( pos.sum() ))

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        pos_obj = obj_t > 0
        #print('pos_obj shape: '+str(pos_obj.shape))
        batch_obj_conf = obj_data.view(-1, 2)
        loss_obj = log_sum_exp(batch_obj_conf) - batch_obj_conf.gather(
            1, obj_t.view(-1, 1))

        loss_obj[pos_obj] = 0
        loss_obj = loss_obj.view(num, -1)
        _, loss_obj_idx = loss_obj.sort(1, descending=True)
        _, idx_obj_rank = loss_obj_idx.sort(1)
        num_obj_pos = pos_obj.long().sum(1, keepdim=True)
        num_obj_neg = torch.clamp(self.negpos_ratio * num_obj_pos,
                                  max=pos_obj.size(1) - 1)
        #print('num_obj_pos:'+str(num_obj_pos.shape))
        #print('num_obj_neg:'+str(num_obj_neg.shape))
        neg_obj = idx_obj_rank < num_obj_neg.expand_as(idx_obj_rank)

        pos_obj_idx = pos_obj.unsqueeze(2).expand_as(obj_data)
        neg_obj_idx = neg_obj.unsqueeze(2).expand_as(obj_data)

        conf_obj_p = obj_data[(pos_obj_idx + neg_obj_idx).gt(0)].view(-1, 2)
        targets_weighted = obj_t[(pos_obj + neg_obj).gt(0)]
        loss_obj = F.cross_entropy(conf_obj_p,
                                   targets_weighted,
                                   size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        #print('num_pos:'+str(num_pos.sum()))
        #print('num_neg:'+str(num_neg.sum()))
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        #print('conf_p.shape'+str(conf_p.shape))
        #print('targets_weighted'+str(targets_weighted.shape))
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = max(num_pos.data.sum(), 1)
        loss_l /= N * 1.0
        loss_c /= N * 1.0
        N1 = max(num_obj_neg.data.sum(), 1)
        loss_obj /= N1
        loss_obj = 0.4 * loss_obj

        return loss_l, loss_c, loss_obj
예제 #21
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                bin_conf: torch.size(batch_size,num_priors,2)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, bin_conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        bin_conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match_3_terms(self.threshold, truths, defaults, self.variance,
                          labels, loc_t, conf_t, bin_conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            bin_conf_t = bin_conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)
        bin_conf_t = Variable(bin_conf_t, requires_grad=False)

        pos = bin_conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)

        #[0.,  47.,  54.,  54.,  48.,  69.,  45.,  91.,  41., 189.,  51.,  57.,  51.,  43.,  48., 514.,  80., 107.,  68.,  32.,  65.]

        loss_l_elements = F.smooth_l1_loss(loc_p, loc_t,
                                           reduction='none').sum(1)
        conf_t_label = conf_t[conf_t > 0]
        conf_t_stat = torch.zeros(num_classes).cuda()
        for index in range(1, num_classes):
            conf_t_stat[index] = (conf_t_label == index).sum()
        conf_t_weights = torch.zeros(num_classes).cuda()
        for index in range(1, num_classes):
            if conf_t_stat[index] > 0:
                conf_t_weights[index] = 1. / conf_t_stat[index]
        loss_l = 0
        #conf_t_weights = torch.exp(1-torch.tensor([0., 10., 12., 12., 10., 15., 11., 17., 16., 34., 14., 19., 15., 12., 10., 66., 16., 10.,
        # 17., 10., 15.])/66).cuda()
        conf_t_weights = torch.exp(
            torch.tensor([
                2, 1.3930, 1.2377, 2.3323, 1.7176, 2.0606, 1.3950, 1.0631,
                1.7324, 1.3473, 1.6245, 1.1843, 2.2063, 1.3083, 1.3608, 1.2330,
                1.8409, 1.6459, 1.3869, 1.4504, 1.2606
            ])).cuda()
        conf_t_weights = torch.ones(21).cuda()
        for index in range(1, num_classes):
            loss_l += conf_t_weights[index] * (
                loss_l_elements[conf_t_label == index]).sum()

        # Compute max binary_conf across batch for hard negative mining
        batch_bin_conf = bin_conf_data.view(-1, 2)
        loss_bin = log_sum_exp(batch_bin_conf) - batch_bin_conf.gather(
            1, bin_conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_bin[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_bin = loss_bin.view(num, -1)
        _, loss_idx = loss_bin.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        neg_binary = neg

        # Binary confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(bin_conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(bin_conf_data)
        bin_conf_p = bin_conf_data[(pos_idx + neg_idx).gt(0)].view(-1, 2)
        targets_weighted = bin_conf_t[(pos + neg).gt(0)]
        loss_bin_elements = F.cross_entropy(bin_conf_p,
                                            targets_weighted,
                                            reduction='none')

        conf_t_f_label_for_bin = conf_t[(pos + neg) > 0]
        conf_t_f_stat_for_bin = torch.zeros(num_classes).cuda()
        for index in range(0, num_classes):
            conf_t_f_stat_for_bin[index] = (
                conf_t_f_label_for_bin == index).sum()
        conf_t_f_weights_for_bin = torch.zeros(num_classes).cuda()
        for index in range(0, num_classes):
            if conf_t_f_stat_for_bin[index] > 0:
                conf_t_f_weights_for_bin[
                    index] = 1. / conf_t_f_stat_for_bin[index]
        loss_b = 0
        #conf_t_f_weights_for_bin = torch.exp(1 - torch.tensor(
        #    [0., 10., 12., 12., 10., 15., 11., 17., 16., 34., 14., 19., 15., 12., 10., 66., 16., 10.,
        #    17., 10., 15.]) / 66).cuda()
        conf_t_f_weights_for_bin = torch.exp(
            torch.tensor([
                2, 1.3930, 1.2377, 2.3323, 1.7176, 2.0606, 1.3950, 1.0631,
                1.7324, 1.3473, 1.6245, 1.1843, 2.2063, 1.3083, 1.3608, 1.2330,
                1.8409, 1.6459, 1.3869, 1.4504, 1.2606
            ])).cuda()
        conf_t_f_weights_for_bin = torch.ones(21).cuda()
        conf_t_f_weights_for_bin[0] *= 3
        for index in range(0, num_classes):
            loss_b += conf_t_f_weights_for_bin[index] * (
                loss_bin_elements[conf_t_f_label_for_bin == index]).sum()

        # Compute max binary_conf across batch for hard negative mining
        batch_bin_conf = bin_conf_data.view(-1, 2)
        batch_conf = conf_data.view(-1, self.num_classes - 1)

        P_k = (batch_conf[:, ].t() + batch_bin_conf[:, 1]).t()
        P_0 = batch_bin_conf[:, 0].unsqueeze(1) + torch.log(
            torch.exp(batch_conf).sum(dim=1, keepdim=True))
        P_logit = torch.cat((P_0, P_k), dim=1).view(num, -1, self.num_classes)

        # Compute max conf across batch for hard negative mining
        batch_P_logit = P_logit.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_P_logit) - batch_P_logit.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        neg_multi = neg

        # Confidence Loss Including Positive and Negative Examples
        '''
        pos_idx = pos.unsqueeze(2).expand_as(P_logit)
        neg_idx = neg.unsqueeze(2).expand_as(P_logit)
        conf_p = P_logit[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_cls = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
        '''
        pos_idx = pos.unsqueeze(2).expand_as(P_logit)
        conf_p = P_logit[(pos_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos).gt(0)]
        loss_mul_elements = F.cross_entropy(conf_p,
                                            targets_weighted,
                                            reduction='none')

        conf_t_f_label_for_mul = conf_t[(pos) > 0]
        conf_t_f_stat_for_mul = torch.zeros(num_classes).cuda()
        for index in range(1, num_classes):
            conf_t_f_stat_for_mul[index] = (
                conf_t_f_label_for_mul == index).sum()
        conf_t_f_weights_for_mul = torch.zeros(num_classes).cuda()
        for index in range(1, num_classes):
            if conf_t_f_stat_for_mul[index] > 0:
                conf_t_f_weights_for_mul[
                    index] = 1. / conf_t_f_stat_for_mul[index]
        loss_cls = 0
        #conf_t_f_weights_for_mul = torch.exp(1 - torch.tensor(
        #    [0., 10., 12., 12., 10., 15., 11., 17., 16., 34., 14., 19., 15., 12., 10., 66., 16., 10.,
        #     17., 10., 15.]) / 66).cuda()
        conf_t_f_weights_for_mul = torch.exp(
            torch.tensor([
                2, 1.3930, 1.2377, 2.3323, 1.7176, 2.0606, 1.3950, 1.0631,
                1.7324, 1.3473, 1.6245, 1.1843, 2.2063, 1.3083, 1.3608, 1.2330,
                1.8409, 1.6459, 1.3869, 1.4504, 1.2606
            ])).cuda()
        conf_t_f_weights_for_mul = torch.ones(21).cuda()
        for index in range(1, num_classes):
            loss_cls += conf_t_f_weights_for_mul[index] * (
                loss_mul_elements[conf_t_f_label_for_mul == index]).sum()

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g) + \beta Lbinconf(x, c)) / N

        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_cls /= N
        loss_b /= N
        return loss_l, loss_cls, loss_b, pos, neg_binary, neg_multi
예제 #22
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:,:-1].data
            labels = targets[idx][:,-1].data
            defaults = priors.data
            match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t,requires_grad=False)

        pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1,4)
        loc_t = loc_t[pos_idx].view(-1,4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1,self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))

        # Hard Negative Mining
        loss_c[pos.view(-1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _,loss_idx = loss_c.sort(1, descending=True)
        _,idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1,keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum()
        loss_l/=N
        loss_c/=N
        return loss_l,loss_c
예제 #23
0
    def forward(self,
                predictions,
                targets,
                use_arm=False,
                filter_object=False,
                filter_score=0,
                debug=False):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        # arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions
        if use_arm:
            arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions
        else:
            loc_data, conf_data, _, _, priors = predictions
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        defaults = priors.data
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data

            if self.num_classes == 2:
                labels = labels > 0
            if use_arm:
                bbox_weight = refine_match(self.threshold,
                                           truths,
                                           defaults,
                                           self.variance,
                                           labels,
                                           loc_t,
                                           conf_t,
                                           idx,
                                           arm_loc_data[idx].data,
                                           use_weight=False)
            else:
                match(self.threshold, truths, defaults, self.variance, labels,
                      loc_t, conf_t, idx)

        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        if use_arm and filter_object:
            P = F.softmax(arm_conf_data, 2)
            arm_conf_data_temp = P[:, :, 1]
            object_score_index = arm_conf_data_temp <= self.object_score
            pos = conf_t > 0
            pos[object_score_index.detach()] = 0
        else:
            pos = conf_t > 0

        num_pos = pos.sum(1, keepdim=True)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        if debug:
            if use_arm:
                print("odm pos num: ", str(loc_t.size(0)), str(loc_t.size(1)))
            else:
                print("arm pos num", str(loc_t.size(0)), str(loc_t.size(1)))

        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)

        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)

        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)

        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        N = num_pos.data.sum()
        loss_l /= float(N)
        loss_c /= float(N)
        return loss_l, loss_c
예제 #24
0
    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, priors = predictions
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            if self.num_classes == 2:
                labels = labels > 0
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        loc_t = loc_t.cuda()
        conf_t = conf_t.cuda()

        pos = conf_t > 0
        num_pos = pos.sum(1, keepdim=True)

        if self.OHEM:
            # Compute max conf across batch for hard negative mining
            batch_conf = conf_data.view(-1, self.num_classes)

            loss_hard = log_sum_exp(batch_conf) - batch_conf.gather(
                1, conf_t.view(-1, 1))
            # Hard Negative Mining
            loss_hard[pos.view(-1, 1)] = 0  # filter out pos boxes for now
            loss_hard = loss_hard.view(num, -1)
            _, loss_idx = loss_hard.sort(1, descending=True)
            _, idx_rank = loss_idx.sort(1)
            num_pos = pos.long().sum(1, keepdim=True)
            if num_pos.data.sum() > 0:
                num_neg = torch.clamp(self.negpos_ratio * num_pos,
                                      max=pos.size(1) - 1)
            else:
                fake_num_pos = torch.ones(32, 1).long() * 15
                num_neg = torch.clamp(self.negpos_ratio * fake_num_pos,
                                      max=pos.size(1) - 1)
            neg = idx_rank < num_neg.expand_as(idx_rank)

            # Confidence Loss Including Positive and Negative Examples
            pos_idx = pos.unsqueeze(2).expand_as(conf_data)
            neg_idx = neg.unsqueeze(2).expand_as(conf_data)
            conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
                -1, self.num_classes)
            targets_weighted = conf_t[(pos + neg).gt(0)]
            loss_c = F.cross_entropy(conf_p,
                                     targets_weighted,
                                     size_average=False)
        else:
            loss_c = F.cross_entropy(conf_p, conf_t, size_average=False)
        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        if num_pos.data.sum() > 0:
            pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
            loc_p = loc_data[pos_idx].view(-1, 4)
            loc_t = loc_t[pos_idx].view(-1, 4)
            loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
            N = num_pos.data.sum()
        else:
            loss_l = torch.zeros(1)
            N = 1.0
        loss_l /= float(N)
        loss_c /= float(N)
        return loss_l, loss_c
예제 #25
0
    def forward(self, predictions, priors, targets):
        #--------------------------------------------------------------------#
        #   取出预测结果的三个值:框的回归信息,置信度,人脸关键点的回归信息
        #--------------------------------------------------------------------#
        loc_data, conf_data, landm_data = predictions
        #--------------------------------------------------#
        #   计算出batch_size和先验框的数量
        #--------------------------------------------------#
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        #--------------------------------------------------#
        #   创建一个tensor进行处理
        #--------------------------------------------------#
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)

        for idx in range(num):
            # 获得真实框与标签
            truths = targets[idx][:, :4].data
            labels = targets[idx][:, -1].data
            landms = targets[idx][:, 4:14].data

            # 获得先验框
            defaults = priors.data
            #--------------------------------------------------#
            #   利用真实框和先验框进行匹配。
            #   如果真实框和先验框的重合度较高,则认为匹配上了。
            #   该先验框用于负责检测出该真实框。
            #--------------------------------------------------#
            match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
            
        #--------------------------------------------------#
        #   转化成Variable
        #   loc_t   (num, num_priors, 4)
        #   conf_t  (num, num_priors)
        #   landm_t (num, num_priors, 10)
        #--------------------------------------------------#
        zeros = torch.tensor(0)
        if self.cuda:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()
            zeros = zeros.cuda()

        #------------------------------------------------------------------------#
        #   有人脸关键点的人脸真实框的标签为1,没有人脸关键点的人脸真实框标签为-1
        #   所以计算人脸关键点loss的时候pos1 = conf_t > zeros
        #   计算人脸框的loss的时候pos = conf_t != zeros
        #------------------------------------------------------------------------#  
        pos1 = conf_t > zeros
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
        
        pos = conf_t != zeros
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        #--------------------------------------------------#
        #   batch_conf  (num * num_priors, 2)
        #   loss_c      (num, num_priors)
        #--------------------------------------------------#
        conf_t[pos] = 1
        batch_conf = conf_data.view(-1, self.num_classes)
        # 这个地方是在寻找难分类的先验框
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

        # 难分类的先验框不把正样本考虑进去,只考虑难分类的负样本
        loss_c[pos.view(-1, 1)] = 0
        loss_c = loss_c.view(num, -1)
        #--------------------------------------------------#
        #   loss_idx    (num, num_priors)
        #   idx_rank    (num, num_priors)
        #--------------------------------------------------#
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        #--------------------------------------------------#
        #   求和得到每一个图片内部有多少正样本
        #   num_pos     (num, )
        #   neg         (num, num_priors)
        #--------------------------------------------------#
        num_pos = pos.long().sum(1, keepdim=True)
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        #--------------------------------------------------#
        #   求和得到每一个图片内部有多少正样本
        #   pos_idx   (num, num_priors, num_classes)
        #   neg_idx   (num, num_priors, num_classes)
        #--------------------------------------------------#
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        
        # 选取出用于训练的正样本与负样本,计算loss
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N

        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        loss_landm /= N1
        return loss_l, loss_c, loss_landm
예제 #26
0
    def forward(self, predictions, priors, targets):
        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :4].data
            labels = targets[idx][:, -1].data
            landms = targets[idx][:, 4:14].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
            
        zeros = torch.tensor(0)
        if self.cuda:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()
            zeros = zeros.cuda()

        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros
        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1

        return loss_l, loss_c, loss_landm
예제 #27
0
    def forward(self, predictions, targets):
        loc_data, conf_data, priors = predictions
        # get batch_size
        num = loc_data.size(0)
        # get all default boxes
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            # get the box
            truths = targets[idx][:, :-1].data
            # get the label
            labels = targets[idx][:, -1].data
            # get the data
            defaults = priors.data
            # get the default box corresponding to the label
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()

        # convert to Variable
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        # conf_t > 0
        pos = conf_t > 0
        # num of pos-samples around the box
        num_pos = pos.sum(dim=1, keepdim=True)
        # loss
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        batch_conf = conf_data.view(-1, self.num_classes)
        # softmax
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        loss_c = loss_c.view(num, -1)

        loss_c[pos] = 0
        # softmax for each picture
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # pos-samples num
        num_pos = pos.long().sum(1, keepdim=True)
        # constrain the num of nag-samples
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # pos-loss + nag-loss
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size, num_priors, num_classes) # n, 3(anchor), 3
                loc shape: torch.size(batch_size,num_priors,4) # n, 3, 4 as we only count each anchor for only one cls not cal for every cls, for rcnn it does for every cls
                priors shape: torch.size(num_priors, 4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size, num_objs, 5] (last idx is the label).
        """
        loc_data, conf_data, landm_data = predictions

        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)

        for idx in range(num):
            truths = targets[idx][:, :4].data
            labels = targets[
                idx][:,
                     -1].data  # label only has 1 and -1 and 2, 1 and 2 used for cls
            landms = targets[idx][:, 4:14].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  landms, loc_t, conf_t, landm_t, idx)

        if 1:  # use gpu
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()

        # cos label file including -1 landmarks so ignore this data for landmark regression
        zeros = torch.tensor(0).cuda()
        pos = conf_t > zeros
        num_pos_landm = pos.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)

        # 1. get index for face class
        face_tensor = torch.tensor(1).cuda()
        # Shape: [batch,num_priors,10]
        face_pos = conf_t == face_tensor
        face_pos_idx = face_pos.unsqueeze(face_pos.dim()).expand_as(
            landm_data)  # 32, 16800, 10
        face_landm_p = landm_data[face_pos_idx].view(-1, 10)
        face_landm_t = landm_t[face_pos_idx].view(-1, 10)

        # 2. get index for mask class, set all these target landmarks to 0.
        # conf_t is target and anchor, if anchor matched target bbox cls 1, then the conf_t is 1, if anchor matched cls2, conf_t is 2
        # after match conf_t, we can get all cls2's anchors.
        mask_tensor = torch.tensor(2).cuda()
        mask_pos = conf_t == mask_tensor
        mask_pos_idx = mask_pos.unsqueeze(mask_pos.dim()).expand_as(landm_data)
        mask_landm_p = landm_data[mask_pos_idx].view(-1, 10)
        mask_landm_t = landm_t[mask_pos_idx].view(-1, 10)

        mask_landm_p[:, 4:] = 0
        mask_landm_t[:, 4:] = 0

        landm_p = torch.cat([face_landm_p, mask_landm_p], 0)
        landm_t = torch.cat([face_landm_t, mask_landm_t], 0)

        loss_landm = F.smooth_l1_loss(landm_p, landm_t)
        ##############################################################################################################
        zeros = torch.tensor(0).cuda()
        pos = conf_t != zeros

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t)

        ##############################################################################################################
        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        # Note here, as label file including -1 label, so we have to make them to 0~2
        no_landmark_pos = conf_t < zeros
        conf_t[no_landmark_pos] = 1
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted)
        ##############################################################################################################

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1

        return loss_l, loss_c, loss_landm
예제 #29
0
    def forward(self,
                odm_data,
                priors,
                targets,
                arm_data=None,
                filter_object=False):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
            arm_data (tuple): arm branch containg arm_loc and arm_conf
            filter_object: whether filter out the  prediction according to the arm conf score
        """

        loc_data, conf_data = odm_data
        if arm_data:
            arm_loc, arm_conf = arm_data
        priors = priors.data
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            #for object detection
            if self.num_classes == 2:
                labels = labels > 0
            if arm_data:
                refine_match(self.threshold, truths, priors, self.variance,
                             labels, loc_t, conf_t, idx, arm_loc[idx].data)
            else:
                match(self.threshold, truths, priors, self.variance, labels,
                      loc_t, conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)
        if arm_data and filter_object:
            arm_conf_data = arm_conf.data[:, :, 1]
            pos = conf_t > 0
            object_score_index = arm_conf_data <= self.object_score
            pos[object_score_index] = 0

        else:
            pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
예제 #30
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        #loc_data, conf_data, _ = predictions
        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()

        pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N

        return loss_l, loss_c
예제 #31
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0)
                      )  #11620   all feature_maps grid default bboxes number
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        """
        for循环中的代码是为了将输入的target改造成网络的学习目标,也就是计算损失时的target最终得到的是loc_t和conf_t
        注:对于Tensor来说,在子函数中修改其值,原有的值也会跟着改变,因此match函数无返回值
        """
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data  # 真实loc
            labels = targets[idx][:, -1].data  # 真实label
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)  # gt 和 default boxes
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)
        """
        conf_t > 0等价于torch.gt(conf_t,0)或者conf_t.gt(0)
        返回和conf_t同形状的Tensor,符合条件的为1,否则为0
        """
        pos = conf_t > 0  #忽略背景,pos是mask

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)  # predictions
        loc_t = loc_t[pos_idx].view(-1, 4)  # encoded offsets to learn
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        """
        conf_data Shape:[batch,num_priors,num_classes]
        batch_conf Shape:[batch*num_priors,num_classes]
        因为pytorch中cross_entropy的input要求为[N,C]的2-d Tensor
        """
        batch_conf = conf_data.view(-1, self.num_classes)  #predictions
        """
        conf_t的shape为[batch,num_priors],其中选中的正样本为相应的类别,未选中的为0
        Tensor.gather(dim,index)在dim维度上,按照index = 1 。此处就是计算cross_entropy的x[class]项
        loss(x,class) = −x[class]+log(∑jexp(x[j]))
        """
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))  #为了筛选负样本

        # Hard Negative Mining
        """
        先将正样本loss置为0,然后对loss排序(每张图片内部挑选)之后,取前self.negpos_ratio*num_pos个负样本的loss
        """
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes,为了选择负样本。
        """
        下一步loss_c shape转变为[batch,num_priors]
        下面这种挑选前n个数的操作
        """
        loss_c = loss_c.view(num, -1)

        a_, loss_idx = loss_c.sort(1, descending=True)
        # print('loss_idx : ',loss_idx)
        # print('loss_idx : ',loss_idx.size(),a_.size())
        # print('a_ : ',a_)
        b_, idx_rank = loss_idx.sort(1)
        # print('idx_rank : ',idx_rank)
        # print('b_ : ',b_)
        # print('loss_idx : ',idx_rank.size(),b_.size())
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos,
                              max=pos.size(1) - 1)  # 夹紧范围,限制 num_neg的范围
        neg = idx_rank < num_neg.expand_as(idx_rank)  # 负样本 index 掩码

        # print(neg.size(),' neg',neg)
        # print('neg sum : ',neg.sum())
        # print('num_pos : ',num_pos.sum(),'\n',num_pos)

        # Confidence Loss Including Positive and Negative Examples
        """
        上面几步的操作就是为获得pos_idx和neg_idx
        conf_data 的shape为[batch,num_priors,num_classes]
        """
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        """
        (pos_idx+neg_idx).gt(0)的原因个人猜测可能是因为挑选的正样本和负样本可能会重复,因此将大于1的数变成1.
        """
        # gt举例:torch.gt(x,1)# tensor 大于 1
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c