def forward(self, odm_data, priors, targets, arm_data=None, filter_object=False): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). arm_data (tuple): arm branch containg arm_loc and arm_conf filter_object: whether filter out the prediction according to the arm conf score """ loc_data, conf_data = odm_data if arm_data: arm_loc, arm_conf = arm_data priors = priors.data num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data #for object detection if self.num_classes == 2: labels = labels > 0 if arm_data: refine_match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx, arm_loc[idx].data) else: match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) if arm_data and filter_object: arm_conf_data = arm_conf.data[:, :, 1] pos = conf_t > 0 object_score_index = arm_conf_data <= self.object_score pos[object_score_index] = 0 else: pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, targets, use_arm=False, filter_object=False, filter_score=0, debug=False): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ # arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions if use_arm: arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions else: loc_data, conf_data, _, _, priors = predictions num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) defaults = priors.data for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data if self.num_classes == 2: labels = labels > 0 if use_arm: bbox_weight = refine_match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, arm_loc_data[idx].data, use_weight=False) else: match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) if use_arm and filter_object: P = F.softmax(arm_conf_data, 2) arm_conf_data_temp = P[:, :, 1] object_score_index = arm_conf_data_temp <= self.object_score pos = conf_t > 0 pos[object_score_index.detach()] = 0 else: pos = conf_t > 0 num_pos = pos.sum(1, keepdim=True) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) if debug: if use_arm: print("odm pos num: ", str(loc_t.size(0)), str(loc_t.size(1))) else: print("arm pos num", str(loc_t.size(0)), str(loc_t.size(1))) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) N = num_pos.data.sum() loss_l /= float(N) loss_c /= float(N) return loss_l, loss_c
def forward(self, odm_data, priors, targets, arm_data=None, filter_object=False): loc_data, conf_data = odm_data if arm_data: arm_loc, arm_conf = arm_data priors = priors.detach() num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].detach() labels = targets[idx][:, -1].detach() #for object detection if self.num_classes == 2: labels = labels > 0 if arm_data: refine_match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx, arm_loc[idx].detach()) else: match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() if arm_data and filter_object: P = F.softmax(arm_conf, 2) arm_conf_tmp = P[:, :, 1] object_score_index = arm_conf_tmp <= self.object_score pos = conf_t > 0 pos[object_score_index.detach()] = 0 else: pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) #loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) loss_c = F.cross_entropy(batch_conf, conf_t.view(-1), ignore_index=-1, reduction='none') loss_c = loss_c.view(num, -1) # Hard Negative Mining pos_loss_c = loss_c[pos] loss_c[pos] = 0 # filter out pos boxes for now #loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) neg_loss_c = loss_c[neg] # Confidence Loss Including Positive and Negative Examples # pos_idx = pos.unsqueeze(2).expand_as(conf_data) # neg_idx = neg.unsqueeze(2).expand_as(conf_data) #conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) #targets_weighted = conf_t[(pos+neg).gt(0)] #loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N loss_c = pos_loss_c.sum() + neg_loss_c.sum() N = num_pos.data.sum().float() loss_l = loss_l / N loss_c = loss_c / N return loss_l, loss_c