Пример #1
0
    def forward(self, predictions, targets):
        loc_data, conf_data, dbox_list = predictions

        #(batch_num, num_dbox, num_classes)
        num_batch = loc_data.size(0)
        num_dbox = loc_data.size(1)  #8732
        num_classes = conf_data.size(2)

        conf_t_label = torch.LongTensor(num_batch, num_dbox).to(self.device)
        loc_t = torch.Tensor(num_batch, num_dbox, 4).to(self.device)

        for idx in range(num_batch):
            truths = targets[idx][:, :-1].to(
                self.device)  #(xmin, ymin, xmax, ymax) BBox
            labels = targets[idx][:, -1].to(self.device)  #label

            dbox = dbox_list.to(self.device)
            variances = [0.1, 0.2]
            match(self.jaccard_threshold, truths, dbox, variances, labels,
                  loc_t, conf_t_label, idx)

        #SmoothL1Loss
        pos_mask = conf_t_label > 0
        # loc_data(num_batch, 8732, 4)
        pos_idx = pos_mask.unsqueeze(pos_mask.dim()).expand_as(loc_data)

        # positive dbox, loc_data
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_loc = F.smooth_l1_loss(loc_p, loc_t, reduction="sum")

        #loss_conf
        #CrossEntropy
        batch_conf = conf_data.view(
            -1, num_classes)  #(num_batch*num_box, num_classes)
        loss_conf = F.cross_entropy(batch_conf,
                                    conf_t_label.view(-1),
                                    reduction="none")

        # hard negative mining
        num_pos = pos_mask.long().sum(1, keepdim=True)
        loss_conf = loss_conf.view(num_batch,
                                   -1)  # torch.size([num_batch, 8732])

        _, loss_idx = loss_conf.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # idx_rank chính là thông số để biết được độ lớn loss nằm ở vị trí bao nhiêu

        num_neg = torch.clamp(num_pos * self.neg_pos, max=num_dbox)
        neg_mask = idx_rank < (num_neg).expand_as(idx_rank)

        #(num_batch, 8732) -> (num_batch, 8732, 21)
        pos_idx_mask = pos_mask.unsqueeze(2).expand_as(conf_data)
        neg_idx_mask = neg_mask.unsqueeze(2).expand_as(conf_data)
        conf_t_pre = conf_data[(pos_idx_mask + neg_idx_mask).gt(0)].view(
            -1, num_classes)
        conf_t_label_ = conf_t_label[(pos_mask + neg_mask).gt(0)]
        loss_conf = F.cross_entropy(conf_t_pre, conf_t_label_, reduction="sum")

        # total loss = loss_loc + loss_conf
        N = num_pos.sum()
        loss_loc = loss_loc / N
        loss_conf = loss_conf / N

        return loss_loc, loss_conf
    def forward(self,
                predictions,
                targets,
                use_arm=False,
                filter_object=False,
                debug=False):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        # arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions
        if use_arm:
            arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions
        else:
            loc_data, conf_data, _, _, priors = predictions
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        loc_g = torch.Tensor(num, num_priors, 4)
        defaults = priors.data
        for idx in range(num):
            predicts = loc_data[idx].data
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            if self.num_classes == 2:
                labels = labels > 0
            if use_arm:
                bbox_weight = refine_match(
                    self.threshold,
                    truths,
                    defaults,
                    self.variance,
                    labels,
                    loc_t,
                    conf_t,
                    idx,
                    arm_loc_data[idx].data,
                    use_weight=False)
            else:
                match(self.threshold, predicts, truths, defaults, self.variance, labels,
                  loc_t, loc_g, conf_t, idx)

        loc_t = loc_t.cuda()
        loc_g = loc_g.cuda()
        conf_t = conf_t.cuda()

        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        loc_g = Variable(loc_g, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        if use_arm and filter_object:
            P = F.softmax(arm_conf_data, 2)
            arm_conf_data_temp = P[:, :, 1]
            object_score_index = arm_conf_data_temp <= self.object_score
            pos = conf_t > 0
            pos[object_score_index.detach()] = 0
        else:
            pos = conf_t > 0
        num_pos = pos.sum(1, keepdim=True)
        if debug:
            if use_arm:
                print("odm pos num: ", str(loc_t.size(0)), str(loc_t.size(1)))
            else:
                print("arm pos num", str(loc_t.size(0)), str(loc_t.size(1)))

        if self.OHEM:
            # Compute max conf across batch for hard negative mining
            batch_conf = conf_data.view(-1, self.num_classes)

            loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
                1, conf_t.view(-1, 1))

            # Hard Negative Mining
            loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
            loss_c = loss_c.view(num, -1)
            _, loss_idx = loss_c.sort(1, descending=True)
            _, idx_rank = loss_idx.sort(1)
            num_pos = pos.long().sum(1, keepdim=True)

            if num_pos.data.sum() > 0:
                num_neg = torch.clamp(
                self.negpos_ratio * num_pos, max=pos.size(1) - 1)
            else:
                fake_num_pos = torch.ones(32, 1).long() * 15
                num_neg = torch.clamp(
                self.negpos_ratio * fake_num_pos, max=pos.size(1) - 1)
            neg = idx_rank < num_neg.expand_as(idx_rank)

            # Confidence Loss Including Positive and Negative Examples
            pos_idx = pos.unsqueeze(2).expand_as(conf_data)
            neg_idx = neg.unsqueeze(2).expand_as(conf_data)
            conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
                -1, self.num_classes)

            targets_weighted = conf_t[(pos + neg).gt(0)]
            loss_c = F.cross_entropy(
                conf_p, targets_weighted, size_average=False)
        else:
            loss_c = F.cross_entropy(conf_p, conf_t, size_average=False)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        if num_pos.data.sum() > 0:
            pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
            loc_p = loc_data[pos_idx].view(-1, 4)
            loc_t = loc_t[pos_idx].view(-1, 4)
            loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
            if not use_arm:
                loc_g = loc_g[pos_idx].view(-1, 4)
                priors = priors.expand_as(pos_idx)
                priors = priors[pos_idx].view(-1, 4)
                # c = torch.randn((5,16340,4))
                # c = c.cuda()
                # c = Variable(c, requires_grad=False)
                
                # c = c[pos_idx].view(-1, 4)
                repul_loss = RepulsionLoss(sigma=0., variance=self.variance)
                loss_l_repul = repul_loss(loc_p, loc_g, priors)

            N = num_pos.data.sum()
        else:
            loss_l = torch.zeros(1)
            N = 1.0

        loss_l /= float(N)
        loss_c /= float(N)
        if not use_arm:
            loss_l_repul /= float(N)
            return loss_l, loss_c, loss_l_repul
        return loss_l, loss_c
Пример #3
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
    Args:
        predictions (tuple): A tuple containing loc preds, conf preds,
        and prior boxes from SSD net.
            conf shape: torch.size(batch_size,num_priors,num_classes)
            loc shape: torch.size(batch_size,num_priors,4)
            priors shape: torch.size(num_priors,4)

        ground_truth (tensor): Ground truth boxes and labels for a batch,
            shape: [batch_size,num_objs,5] (last idx is the label).
    """

        loc_data, conf_data = predictions  # loc_data shape: tensor.Size(64, 21824, 4)
        #                                   # conf_data shape: torch.Size([64, 21824, 2])
        priors = priors  # priors shape: torch.Size([21824, 4])
        # priors: tensor([[0.x, 0.x, 0.x, 0.x], [0.x, 0.x, 0.x, 0.x], ...])
        num = loc_data.size(0)  # num: 64, this is batch size
        num_priors = (priors.size(0)
                      )  # num_priors: 21824, total number of anchors

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors,
                             4)  # loc_t: torch.Size([64, 21824, 4])
        conf_t = torch.LongTensor(
            num, num_priors)  # conf_t: torch.Size([64, 21824])
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data

            # threshold: 0.35
            # variance: [0.1, 0.2]
            #  idx : 0, 1, ...., or 63 which image
            # loc_t: [64, 21824, 4]
            # conf_t: [64, 21824, 2]
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()

        # conf[best_truth_overlap < threshold] = 0
        #                   dim = 21824, which is also the prior number
        # conf_t: tensor([[0, 0, ....],
        #                  [0, 0, 0, ....]
        #                  ...])
        # conf_t.shape: torch.Size([64, 21824])
        # loc_t     torch.Size([64, 21824, 4])
        pos = conf_t > 0  # torch.Size(64, 21824)
        # pos: tensor([[False, False, ...],     num = 64
        #             [False, False, ...]]),  # almost  all false

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        """ here, loc_data = torch.Size([645, 21824, 4]) """
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(
            loc_data)  # torch.Size([64, 21824, 4])
        # pos_idx: tensor([[[False, False, False, False]]])
        loc_p = loc_data[pos_idx].view(-1, 4)
        # loc_p: positive predicted sample (prior)s location, tensor([[1.074, -0.836, -0.934, 0.414]])
        # loc_p.shape: torch.Size([1186, 4]), torch.Size([num of True, 4])
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
        """ now we are dueling with classes """
        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        # conf_data.shape: torch.Sie([64, 21824, 2])
        # batch_conf.shape: torch.Size(64x21824=1396736, 2)
        # batch_conf
        # tensor([[0.0473, -0.1172], [0.1001, 0.2789], ...])
        # conf_t.shape: torch.Size([64, 21824]),
        # conf_t: almost all 0
        #
        # log_sum_exp: log(softmax(batch_conf))
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive  and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N

        return loss_l, loss_c
Пример #4
0
    def forward(self, predictions, targets):
        loc_data, conf_data, priors = predictions
        # get batch_size
        num = loc_data.size(0)
        # get all default boxes
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            # get the box
            truths = targets[idx][:, :-1].data
            # get the label
            labels = targets[idx][:, -1].data
            # get the data
            defaults = priors.data
            # get the default box corresponding to the label
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()

        # convert to Variable
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        # conf_t > 0
        pos = conf_t > 0
        # num of pos-samples around the box
        num_pos = pos.sum(dim=1, keepdim=True)
        # loss
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        batch_conf = conf_data.view(-1, self.num_classes)
        # softmax
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        loss_c = loss_c.view(num, -1)

        loss_c[pos] = 0
        # softmax for each picture
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # pos-samples num
        num_pos = pos.long().sum(1, keepdim=True)
        # constrain the num of nag-samples
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # pos-loss + nag-loss
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
Пример #5
0
    def forward(self, predictions, priors, targets):
        #--------------------------------------------------------------------#
        #   取出预测结果的三个值:框的回归信息,置信度,人脸关键点的回归信息
        #--------------------------------------------------------------------#
        loc_data, conf_data, landm_data = predictions
        #--------------------------------------------------#
        #   计算出batch_size和先验框的数量
        #--------------------------------------------------#
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        #--------------------------------------------------#
        #   创建一个tensor进行处理
        #--------------------------------------------------#
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)

        for idx in range(num):
            # 获得真实框与标签
            truths = targets[idx][:, :4].data
            labels = targets[idx][:, -1].data
            landms = targets[idx][:, 4:14].data

            # 获得先验框
            defaults = priors.data
            #--------------------------------------------------#
            #   利用真实框和先验框进行匹配。
            #   如果真实框和先验框的重合度较高,则认为匹配上了。
            #   该先验框用于负责检测出该真实框。
            #--------------------------------------------------#
            match(self.threshold, truths, defaults, self.variance, labels,
                  landms, loc_t, conf_t, landm_t, idx)

        #--------------------------------------------------#
        #   转化成Variable
        #   loc_t   (num, num_priors, 4)
        #   conf_t  (num, num_priors)
        #   landm_t (num, num_priors, 10)
        #--------------------------------------------------#
        zeros = torch.tensor(0)
        if self.cuda:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()
            zeros = zeros.cuda()

        #------------------------------------------------------------------------#
        #   有人脸关键点的人脸真实框的标签为1,没有人脸关键点的人脸真实框标签为-1
        #   所以计算人脸关键点loss的时候pos1 = conf_t > zeros
        #   计算人脸框的loss的时候pos = conf_t != zeros
        #------------------------------------------------------------------------#
        pos1 = conf_t > zeros
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')

        pos = conf_t != zeros
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        #--------------------------------------------------#
        #   batch_conf  (num * num_priors, 2)
        #   loss_c      (num, num_priors)
        #--------------------------------------------------#
        conf_t[pos] = 1
        batch_conf = conf_data.view(-1, self.num_classes)
        # 这个地方是在寻找难分类的先验框
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # 难分类的先验框不把正样本考虑进去,只考虑难分类的负样本
        loss_c[pos.view(-1, 1)] = 0
        loss_c = loss_c.view(num, -1)
        #--------------------------------------------------#
        #   loss_idx    (num, num_priors)
        #   idx_rank    (num, num_priors)
        #--------------------------------------------------#
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        #--------------------------------------------------#
        #   求和得到每一个图片内部有多少正样本
        #   num_pos     (num, )
        #   neg         (num, num_priors)
        #--------------------------------------------------#
        num_pos = pos.long().sum(1, keepdim=True)
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        #--------------------------------------------------#
        #   求和得到每一个图片内部有多少正样本
        #   pos_idx   (num, num_priors, num_classes)
        #   neg_idx   (num, num_priors, num_classes)
        #--------------------------------------------------#
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)

        # 选取出用于训练的正样本与负样本,计算loss
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N

        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        loss_landm /= N1
        return loss_l, loss_c, loss_landm
Пример #6
0
    def forward(self, predictions, targets):
        # 回归信息,置信度,先验框
        loc_data, conf_data, priors = predictions
        # 计算出batch_size
        num = loc_data.size(0)
        # 取出所有的先验框
        priors = priors[:loc_data.size(1), :]
        # 先验框的数量
        num_priors = (priors.size(0))
        # 创建一个tensor进行处理
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)

        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            priors = priors.cuda()

        for idx in range(num):
            # 获得框
            truths = targets[idx][:, :-1]
            # 获得标签
            labels = targets[idx][:, -1]
            # 获得先验框
            defaults = priors
            # 找到标签对应的先验框
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        # 转化成Variable
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        # 所有conf_t>0的地方,代表内部包含物体
        pos = conf_t > 0
        # 求和得到每一个图片内部有多少正样本
        num_pos = pos.sum(dim=1, keepdim=True)
        # 计算回归loss
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # 转化形式
        batch_conf = conf_data.view(-1, self.num_classes)
        # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法
        # 获得每个框预测到真实框的类的概率
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
        loss_c = loss_c.view(num, -1)

        loss_c[pos] = 0 
        # 获得每一张图新的softmax的结果
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # 计算每一张图的正样本数量
        num_pos = pos.long().sum(1, keepdim=True)
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # 计算正样本的loss和负样本的loss
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:,:-1].data
            labels = targets[idx][:,-1].data
            defaults = priors.data
            match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t,requires_grad=False)

        pos = conf_t > 0
        num_pos = pos.sum(dim=1, keepdim=True)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1,4)
        loc_t = loc_t[pos_idx].view(-1,4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        #GIoU
        # transform_weights = (10.,10.,10.,10.)
        # x1p,y1p,x2p,y2p = self.bbox_transform(loc_p,transform_weights)
        # x1gt,y1gt,x2gt,y2gt = self.bbox_transform(loc_t,transform_weights)

        # #For predicted box Bp, ensuring x2p > x1p and y2p > y1p
        # x1p_hat = torch.min(x1p,x2p)
        # x2p_hat = torch.max(x1p,x2p)
        # y1p_hat = torch.min(y1p,y2p)
        # y2p_hat = torch.max(y1p,y2p)

        # #Ensuring x2g > x1g and y2g > y1g
        # x1g = torch.min(x1gt,x2gt)
        # x2g = torch.max(x1gt,x2gt)
        # y1g = torch.min(y1gt,y2gt)
        # y2g = torch.max(y1gt,y2gt)

        # #Calculating area Bg : Ag = (x2gt - x1gt)*(y2gt - y1gt)
        # Ag = (x2g - x1g)*(y2g - y1g)

        # #Calculating area Bp : Ap = (x2p - x1p)*(y2p - y1p)
        # Ap = (x2p_hat - x1p_hat)*(y2p_hat - y1p_hat)

        # #Calculating intersection I between Bp and Bg
        # x1I = torch.max(x1p_hat,x1g)
        # x2I = torch.min(x2p_hat,x2g)
        # y1I = torch.max(y1p_hat,y1g)
        # y2I = torch.min(y2p_hat,y2g)

        # I=torch.zeros(loc_p.size(0))

        # for i in range(loc_p.size(0)):
        #     if(x2I[i] > x1I[i] and y2I[i] > y1I[i]):
        #         I[i] = (x2I[i] - x1I[i])*(y2I[i] - y1I[i])   

        # #Finding the coordinate of smallest enclosing box Bc
        # x1c = torch.min(x1p_hat,x1g)
        # x2c = torch.max(x2p_hat,x2g)
        # y1c = torch.min(y1p_hat,y1g)
        # y2c = torch.max(y2p_hat,y2g)

        # #Calculating area of Bc : Ac = (x2c - x1c)*(y2c - y1c)
        # Ac = (x2c - x1c)*(y2c - y1c)

        # #IoU = I/U, where U = Ap + Ag - I
        # U = Ap + Ag - I
        # IoU = I/U
        # GIoU = IoU - (Ac-U)/Ac

        # #Loss GIoU
        # loss_l = torch.sum(1-GIoU)
        # loss_l = loss_l.mean()

        #Focal Loss
        # conf_p = conf_data.view(-1,self.num_classes)
        # loss_c = self.f_loss(conf_p,conf_t) #Focal Loss

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1,self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))

        # Hard Negative Mining
        loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _,loss_idx = loss_c.sort(1, descending=True)
        _,idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1,keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted,reduction='sum')#CATEGORICAL CROSS ENTROPY
        # loss_c = self.f_loss(conf_p,targets_weighted) #Focal Loss
 
        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l/=N
        loss_c/=N
        return loss_l,loss_c
Пример #8
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :4].data
            labels = targets[idx][:, -1].data
            landms = targets[idx][:, 4:14].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  landms, loc_t, conf_t, landm_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()

        zeros = torch.tensor(0).cuda()
        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros
        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1

        return loss_l, loss_c, loss_landm
Пример #9
0
    def forward(self,
                odm_data,
                priors,
                loc_targets,
                cls_targets,
                arm_data=None,
                filter_object=False):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
            arm_data (tuple): arm branch containg arm_loc and arm_conf
            filter_object: whether filter out the  prediction according to the arm conf score
        """

        loc_data, conf_data = odm_data
        if arm_data:
            arm_loc, arm_conf = arm_data

        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.Tensor(num, num_priors)
        for idx in range(num):
            truths = loc_targets[idx]
            labels = cls_targets[idx] + 1  # background as 0

            truths = truths.to(self.opt.device)
            labels = labels.to(self.opt.device)

            # for object detection
            if self.num_classes == 2:
                labels = labels > 0
            if arm_data:
                refine_match(self.threshold, truths, priors, self.variance,
                             labels, loc_t, conf_t, idx, arm_loc[idx])
            else:
                match(self.threshold, truths, priors, self.variance, labels,
                      loc_t, conf_t, idx)

        # wrap targets
        loc_t = loc_t
        conf_t = conf_t
        if arm_data and filter_object:
            arm_conf_data = arm_conf[:, :, 1]
            pos = conf_t > 0
            object_score_index = arm_conf_data <= self.object_score
            pos[object_score_index] = 0

        else:
            pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loc_t = loc_t.detach()
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1,
            conf_t.view(-1, 1).long())

        # Hard Negative Mining
        loss_c[pos.view(-1).long()] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1).detach()
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p,
                                 targets_weighted.long(),
                                 size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = num_pos.sum().item()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
Пример #10
0
    def forward(self, predictions, priors, targets, using_gpu):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0)  # num = batch_size
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :4].data  # [num_objs, 4]
            labels = targets[idx][:, -1].data  # [num_objs]
            landms = targets[idx][:, 4:14].data  # [num_objs, 10]
            defaults = priors.data
            # 关键函数, 实现候选框与真实框之间的匹配
            match(self.threshold, truths, defaults, self.variance, labels,
                  landms, loc_t, conf_t, landm_t, idx)

        zeros = torch.tensor(0)
        if using_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()
            zeros = zeros.cuda()

        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros  # 筛选出 >0 的box下标(大部分都是=0的)
        # 求和, 取得满足条件的box的数量
        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        # conf_t: [batch, num_priors]
        # loss_c: [batch*num_priors, 1], 计算每个priorbox预测后的损失
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        # 难负样本挖掘, 按照loss进行排序, 取loss最大的负样本参与更新
        # 将所有的pos下标的box的loss置为0(pos指示的是正样本的下标)
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        # 将 loss_c 的shape 从 [batch*num_priors, 1] 转换成 [batch, num_priors]
        loss_c = loss_c.view(num, -1)
        # 进行降序排序, 并获取到排序的下标
        _, loss_idx = loss_c.sort(1, descending=True)
        # 将下标进行升序排序, 并获取到下标的下标
        _, idx_rank = loss_idx.sort(1)
        # num_pos: [batch, 1], 统计每个样本中的obj个数
        num_pos = pos.long().sum(1, keepdim=True)
        # 根据obj的个数, 确定负样本的个数(正样本的3倍)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        # 获取到负样本的下标
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        # 计算包括正样本和负样本的置信度损失
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        # 按照pos_idx和neg_idx指示的下标筛选参与计算损失的预测数据
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        # 将损失函数归一化后返回
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1

        return loss_l, loss_c, loss_landm
Пример #11
0
    def forward(self, predicts, targets):
        # 回归信息,置信度,先验框
        loc_data, conf_data, priors = predicts
        # print(conf_data.shape) torch.Size([batch_size, 8732, num_classes+1])
        # print(conf_data[0][2]) tensor([ 0.5261, -0.1007,  0.1242, -0.0905,  0.0839, -0.7308,  0.0174],device='cuda:0', grad_fn=<SelectBackward>)
        # 计算出batch_size
        num = loc_data.size(0)
        # print(loc_data.shape) torch.Size([1, 8732, 4])
        # print('1',priors.shape) torch.Size([8732, 4])
        # 取出所有的先验框
        priors = priors[:loc_data.size(1), :]  # 这一步就是保证priors的个数是和loc_data、conf_data的大小一样,其实本身就是一样的
        # print('2',priors.shape) torch.Size([8732, 4])
        # 先验框的数量
        num_priors = (priors.size(0))
        # 创建一个tensor进行处理
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)


        loc_t = loc_t.cuda()
        conf_t = conf_t.cuda()
        priors = priors.cuda()

        for idx in range(num):
            # 获得框
            truths = targets[idx][:, :-1]  # target存放的很多行,每一行就是一张照片,里面包括了照片里面的每一个框和对应的标签
            # 获得标签
            labels = targets[idx][:, -1]
            # 获得先验框
            defaults = priors
            # 找到标签对应的先验框
            match(self.threshold, truths, defaults, self.variance, labels,
                  # 每一个标签都对应了先验框,虽然这里没有返回值,但是loc_t和conf_t是一个tensor,函数里面对其改变了值,主函数也会跟着变化
                  loc_t, conf_t, idx)
        # 转化成Variable
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        # 所有conf_t>0的地方,代表内部包含物体
        pos = conf_t > 0  # conf_t 有8732行,找到大于0的个数,相当于一张图片中8732个先验框中有pos个框是正样本
        # print(pos.shape) torch.Size([1, 8732])
        # 求和得到每一个图片内部有多少正样本
        num_pos = pos.sum(dim=1, keepdim=True)
        # print(num_pos) tensor([[12]], device='cuda:0')
        # 计算回归loss,只是对正样本进行求解回归loss
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        # print(pos_idx)
        loc_p = loc_data[pos_idx].view(-1, 4)  # 此时loc_data和pos_idx维度一样,选择出positive的loc
        # print(loc_p.shape)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # 转化形式
        batch_conf = conf_data.view(-1, self.num_classes)
        # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法
        # 获得每个框预测到真实框的类的概率
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
        loss_c = loss_c.view(num, -1)

        loss_c[pos] = 0
        # 获得每一张图新的softmax的结果
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # 计算每一张图的正样本数量
        num_pos = pos.long().sum(1, keepdim=True)
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # 计算正样本的loss和负样本的loss
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        total_loss = loss_l + loss_c
        losses = [loss_l,loss_c,total_loss]
        return LossTuple(*losses)
Пример #12
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        # loc_data[batch_size, num_priors, 4]
        # conf_data[batch_size, num_priors, num_classes]
        # obj_data[batch_size, num_priors, 2]
        loc_data, conf_data, obj_data = predictions

        device = loc_data.device
        targets = [anno.to(device) for anno in targets]
        num = loc_data.size(0)
        num_priors = priors.size(0)

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4).to(device)
        conf_t = torch.Tensor(num, num_priors, 2).to(device)
        obj_t = torch.BoolTensor(num, num_priors).to(device)

        # match priors with gt
        for idx in range(num):  # batch_size
            truths = targets[idx][:, :-2].data  # [obj_num, 4]
            labels = targets[idx][:, -2:].data  # [obj_num]
            defaults = priors.data  # [num_priors,4]
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, obj_t, idx)

        pos = (conf_t[:, :, 0] > 0).bool()  # [num, num_priors]
        num_pos = (conf_t[:, :, 1] * pos.float()).sum(1, keepdim=True).long()

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        loc_p = loc_data[pos]
        loc_t = loc_t[pos]
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='none')
        weight_pos = conf_t[pos][:, 1]
        loss_l = torch.sum(torch.sum(loss_l, dim=1) * weight_pos)

        # Compute object loss across batch for hard negative mining
        with torch.no_grad():
            loss_obj = F.cross_entropy(obj_data.view(-1, 2),
                                       obj_t.long().view(-1),
                                       reduction='none')
            # Hard Negative Mining
            loss_obj[obj_t.view(
                -1
            )] = 0  # filter out pos boxes (label>0) and ignored boxes (label=-1) for now
            loss_obj = loss_obj.view(num, -1)
            _, loss_idx = loss_obj.sort(1, descending=True)
            _, idx_rank = loss_idx.sort(1)
            num_neg = torch.clamp(self.negpos_ratio * num_pos,
                                  max=num_priors - 1)
            neg = idx_rank < num_neg.expand_as(idx_rank)  # [num, num_priors]

        # Object Loss Including Positive and Negative Examples
        mask = pos | neg
        weight = conf_t[mask][:, 1]
        loss_obj = torch.sum(
            F.cross_entropy(
                obj_data[mask], obj_t[mask].long(), reduction='none') * weight)

        # Confidence Loss (cosine distance to classes center)
        # pos [num, num_priors]
        # conf_data [num, num_priors, feature_dim]
        batch_conf = conf_data.view(-1, self.num_classes - 1)

        # Compute max conf across batch for hard negative mining (logit-combined)
        batch_obj = obj_data.view(-1, 2)  # [num*num_priors, 2]
        logit_0 = batch_obj[:, 0].unsqueeze(1) + torch.log(
            torch.exp(batch_conf).sum(dim=1, keepdim=True))
        logit_k = batch_obj[:,
                            1].unsqueeze(1).expand_as(batch_conf) + batch_conf
        logit = torch.cat((logit_0, logit_k), 1)

        # Confidence Loss Including Positive and Negative Examples
        logit = logit.view(num, -1, self.num_classes)
        loss_c = torch.sum(
            F.cross_entropy(
                logit[mask], conf_t[mask][:, 0].long(), reduction='none') *
            weight)

        N = num_pos.sum()
        loss_l /= N
        loss_c /= N
        loss_obj /= N

        return {
            'loss_box_reg': loss_l,
            'loss_cls': loss_c,
            'loss_obj': loss_obj
        }
Пример #13
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c = loss_c.view(
            pos.size()[0],
            pos.size()
            [1])  # add line #[32, 8732], /lzx1413/PytorchSSD/issues/10
        loss_c[pos] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum().double()
        loss_l = loss_l.double()
        loss_c = loss_c.double()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
Пример #14
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        if targets[0].shape[1] == 6:  # mixup
            weight_t = torch.Tensor(num, num_priors)
        for idx in range(num):
            defaults = priors.data
            if targets[idx].shape[1] == 6:  # mixup
                truths = targets[idx][:, :-2].data
                labels = targets[idx][:, -2].data
                weight_loss = targets[idx][:, -1].data
                match_mixup(self.threshold, truths, defaults, self.variance,
                            labels, loc_t, conf_t, idx, weight_t, weight_loss,
                            self.giou)
            elif targets[idx].shape[1] == 5:  # no moxiup
                truths = targets[idx][:, :-1].data
                labels = targets[idx][:, -1].data
                match(self.threshold, truths, defaults, self.variance, labels,
                      loc_t, conf_t, idx, self.giou)
            else:
                print('The shape of targets is error')

        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0

        mix_up = (False, True)[targets[0].shape[1] == 6]
        pos_weight = None
        weights_conf = None

        # Localization Loss (Smooth L1)
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)

        if self.giou:
            prior_giou = point_form(priors)  # [x,y,h,w]->[x0,y0,x1,y1]
            prior_giou = prior_giou.unsqueeze(0).expand(num, num_priors, 4)
            prior_giou = prior_giou[pos_idx].view(-1, 4)
            reg_loss = GIoUloss()
            loss_l = reg_loss(loc_p, prior_giou, loc_t)
        else:
            if mix_up:
                weight_t = weight_t.cuda()
                weight_t = Variable(weight_t, requires_grad=False)
                pos_weight = weight_t[pos].view(-1, 1)

            reg_loss = SmoothL1_Mixup_Balance_loss(mixup=mix_up,
                                                   balance=self.balance_l1,
                                                   size_average=False)
            loss_l = reg_loss(loc_p, loc_t, pos_weight)

        # Confidence Loss
        if self.sigmoid_focal:
            # if use original focal loss, please modify the output of the test in models/SSD.py to the sigmoid
            batch_conf = conf_data.view(-1, self.num_classes)
            label_onehot = batch_conf.clone().zero_().scatter(
                1, conf_t.view(-1, 1), 1)
            alpha = self.alpha * label_onehot + (1 - self.alpha) * (
                1 - label_onehot)
            p = torch.sigmoid(batch_conf)
            pt = torch.where(label_onehot == 1, p, 1 - p)
            loss_c = -alpha * ((1 - pt)**self.gamma) * torch.log(pt)
            loss_c = loss_c.sum()
            num_pos = pos.long().sum(1, keepdim=True)
        else:
            batch_conf = conf_data.view(-1, self.num_classes)
            loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
                1, conf_t.view(-1, 1))

            # Hard Negative Mining
            loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
            loss_c = loss_c.view(num, -1)
            _, loss_idx = loss_c.sort(1, descending=True)
            _, idx_rank = loss_idx.sort(1)
            num_pos = pos.long().sum(1, keepdim=True)
            num_neg = torch.clamp(self.negpos_ratio * num_pos,
                                  max=pos.size(1) - 1)
            neg = idx_rank < num_neg.expand_as(idx_rank)

            # Confidence Loss Including Positive and Negative Examples
            pos_idx = pos.unsqueeze(2).expand_as(conf_data)
            neg_idx = neg.unsqueeze(2).expand_as(conf_data)
            conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
                -1, self.num_classes)
            if self.label_smooth:
                p = conf_t.clone().view(-1, 1).float()
                lp = torch.where(p < 1, p + 1,
                                 torch.tensor(self.label_pos).cuda())
                label = batch_conf.clone().zero_().scatter_(
                    1, conf_t.view(-1, 1), lp)
                label[:, 1:][pos.clone().view(-1,
                                              1).flatten()] += self.label_neg
                label_ohem = (pos + neg).view(-1, 1).expand_as(batch_conf)
                targets_weighted = label[label_ohem.gt(0)].view(
                    -1, self.num_classes)
            else:
                targets_weighted = conf_t[(pos + neg).gt(0)]
            if mix_up:
                weights_conf = weight_t[(pos + neg).gt(0)]
                weights_conf = torch.where(weights_conf > 0, weights_conf,
                                           weights_conf + 1.0).view(-1, 1)

            conf_loss = Crossentropy_Mixup_SoftmaxFocal_LableSmooth_loss(
                mixup=mix_up,
                focal_loss=self.softmax_focal,
                gamma=2.0,
                alpha=1.0,
                label_smooth=self.label_smooth,
                size_average=False)
            loss_c = conf_loss(conf_p, targets_weighted, weights_conf)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
Пример #15
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        ious = torch.Tensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold,self.size_range,self.iou_param, self.adapt_param, \
                self.iou_type, truths,defaults, self.variance,labels,loc_t,conf_t,idx,ious)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            ious = ious.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        if self.soft_label:
            # bce_target = torch.eye(self.num_classes)[targets_weighted]
            # if GPU:
            #     bce_target = bce_target.cuda()
            # USE THE FULL GRADIENT OF NEGTIVE SAMPLES AND WEIGHTED GRADIENTS OF POSITIVE SAMPLES.
            ious[neg] = 1
            target_ious = ious[pos + neg]
            loss_c = F.cross_entropy(conf_p,
                                     targets_weighted,
                                     reduction='none')
            loss_c = torch.sum(loss_c * target_ious)
            # loss_c = F.binary_cross_entropy_with_logits(conf_p, bce_target, \
            #     target_ious, size_average=False)
        else:
            loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
    def forward(self,
                odm_data,
                priors,
                targets,
                arm_data=None,
                filter_object=False):

        loc_data, conf_data = odm_data
        if arm_data:
            arm_loc, arm_conf = arm_data
        priors = priors.detach()
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].detach()
            labels = targets[idx][:, -1].detach()
            #for object detection
            if self.num_classes == 2:
                labels = labels > 0

            if arm_data:
                refine_match(self.threshold, truths, priors, self.variance,
                             labels, loc_t, conf_t, idx, arm_loc[idx].detach())
            else:
                match(self.threshold, truths, priors, self.variance, labels,
                      loc_t, conf_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()

        if arm_data and filter_object:
            P = F.softmax(arm_conf, 2)
            arm_conf_tmp = P[:, :, 1]
            object_score_index = arm_conf_tmp <= self.object_score
            pos = conf_t > 0
            pos[object_score_index.detach()] = 0
        else:
            pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        #loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))
        loss_c = F.cross_entropy(batch_conf,
                                 conf_t.view(-1),
                                 ignore_index=-1,
                                 reduction='none')
        loss_c = loss_c.view(num, -1)

        # Hard Negative Mining
        pos_loss_c = loss_c[pos]
        loss_c[pos] = 0  # filter out pos boxes for now
        #loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        neg_loss_c = loss_c[neg]
        # Confidence Loss Including Positive and Negative Examples
        # pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        # neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        #conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        #targets_weighted = conf_t[(pos+neg).gt(0)]
        #loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        loss_c = pos_loss_c.sum() + neg_loss_c.sum()
        N = num_pos.data.sum().float()
        loss_l = loss_l / N
        loss_c = loss_c / N
        return loss_l, loss_c
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0)  #batch
        num_priors = (priors.size(0))  # 先验框个数

        # 获取匹配每个prior box的 ground truth
        # 创建 loc_t 和 conf_t 保存真实box的位置和类别
        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)

        for idx in range(num):
            truths = targets[idx][:, :4].data  # ground truth box信息
            labels = targets[idx][:, -1].data  # ground truth conf信息
            landms = targets[idx][:, 4:14].data  # ground truth landmark信息
            defaults = priors.data  # priors的 box 信息
            # 匹配 ground truth
            match(self.threshold, truths, defaults, self.variance, labels,
                  landms, loc_t, conf_t, landm_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()

        zeros = torch.tensor(0).cuda()
        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros
        num_pos_landm = pos1.long().sum(1, keepdim=True)  # 匹配中所有的正样本
        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)  # 预测的正样本box信息
        loc_t = loc_t[pos_idx].view(-1, 4)  # 真实的正样本box信息
        loss_l = F.smooth_l1_loss(loc_p, loc_t,
                                  reduction='sum')  # Smooth L1 损失
        '''
        Target;
            下面进行hard negative mining
        过程:
            1、 针对所有batch的conf,按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列;
            2、 负样本的label全是背景,那么利用log softmax 计算出logP,
               logP越大,则背景概率越低,误差越大;
            3、 选取误差较大的top_k作为负样本,保证正负样本比例接近1:3;
        '''

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        # 使用logsoftmax,计算置信度
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(
            -1, 1)] = 0  # filter out pos boxes for now 把正样本排除,剩下的就全是负样本,可以进行抽样
        loss_c = loss_c.view(num, -1)
        # 两次sort排序,能够得到每个元素在降序排列中的位置idx_rank
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # 抽取负样本
        # 每个batch中正样本的数目
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)  # 抽取前top_k个负样本

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        # 提取出所有筛选好的正负样本(预测的和真实的)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1

        return loss_l, loss_c, loss_landm
Пример #18
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:,:-1].data
            labels = targets[idx][:,-1].data
            defaults = priors.data
            match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t,requires_grad=False)

        pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1,4)
        loc_t = loc_t[pos_idx].view(-1,4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1,self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))

        # Hard Negative Mining
        loss_c[pos.view(-1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _,loss_idx = loss_c.sort(1, descending=True)
        _,idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1,keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum()
        loss_l/=N
        loss_c/=N
        return loss_l,loss_c
Пример #19
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, landm_data, visible_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)

        angle_t = torch.LongTensor(num, num_priors)
        visible_t = torch.Tensor(num, num_priors, 5)
        # euler_t = torch.Tensor(num, num_priors, 3)

        for idx in range(num):
            '''
            for label with angle
            '''
            truths = targets[idx][:, :4].data
            labels = targets[idx][:, -7].data
            landms = targets[idx][:, 4:14].data
            angles = targets[idx][:, -6].data
            visible = targets[idx][:, -5:].data

            defaults = priors.data

            match(self.threshold, truths, defaults, self.variance, labels,
                  landms, loc_t, conf_t, landm_t, idx, angles, angle_t,
                  visible, visible_t)

        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()

            angle_t = angle_t.cuda()
            visible_t = visible_t.cuda()

        zeros = torch.tensor(0).cuda()
        ang_thr = torch.tensor(60).cuda()

        pos1 = conf_t > zeros

        num_pos_landm = pos1.long().sum(1, keepdim=True)

        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)

        mask_angle = angle_t > ang_thr
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)

        # # baseline
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)

        # HEM
        mask = (landm_t == -1
                )  # we only calculate the loss of visible landmarks
        mask = torch.logical_not(mask)
        # loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='none')

        # wing loss
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='none')
        loss_landm_mask = mask * loss_landm
        loss_landm_mask_mean = torch.mean(loss_landm_mask, -1)
        loss_landm_mask_sum = torch.sum(loss_landm_mask, -1)
        # print('size {}'.format(loss_landm_mask.shape))
        size = int(0.5 * loss_landm_mask.shape[0])
        _, topk_idx = torch.topk(loss_landm_mask_mean, k=size)
        loss_landm = torch.sum(loss_landm_mask_sum[topk_idx])
        N2 = size

        vis_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(visible_data)
        vis_p = visible_data[vis_idx1].view(-1, 5)
        vis_t = visible_t[vis_idx1].view(-1, 5)
        vis_p = torch.sigmoid(vis_p)

        criterions = nn.BCELoss(reduction='sum')
        loss_vis = criterions(vis_p, vis_t)

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)

        ang_pos = mask_angle.unsqueeze(mask_angle.dim()).expand_as(loc_data)
        ang_not_pos = torch.logical_not(ang_pos)

        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
        loss_l = torch.sum(loss_l)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)

        ang_conf_idx = mask_angle.unsqueeze(
            mask_angle.dim()).expand_as(conf_data)
        ang_not_conf_idx = torch.logical_not(ang_conf_idx)

        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)

        loss_l /= N
        loss_c /= N
        loss_landm /= N2
        loss_vis /= N1

        return loss_l, loss_c, loss_landm, loss_vis