def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / \ (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / \ (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / \ (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None): nA = self.num_anchors nB = x.size(0) nG = x.size(2) stride = self.image_dim / nG # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # Calculate offsets for each grid grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor) grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor) scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]) anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1)) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h # Training if targets is not None: if x.is_cuda: self.mse_loss = self.mse_loss.cuda() self.bce_loss = self.bce_loss.cuda() self.ce_loss = self.ce_loss.cuda() nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets( pred_boxes=pred_boxes.cpu().data, pred_conf=pred_conf.cpu().data, pred_cls=pred_cls.cpu().data, target=targets.cpu().data, anchors=scaled_anchors.cpu().data, num_anchors=nA, num_classes=self.num_classes, grid_size=nG, ignore_thres=self.ignore_thres, img_dim=self.image_dim, ) nProposals = int((pred_conf > 0.5).sum().item()) recall = float(nCorrect / nGT) if nGT else 1 precision = float(nCorrect / nProposals) # Handle masks mask = Variable(mask.type(ByteTensor)) conf_mask = Variable(conf_mask.type(ByteTensor)) # Handle target variables tx = Variable(tx.type(FloatTensor), requires_grad=False) ty = Variable(ty.type(FloatTensor), requires_grad=False) tw = Variable(tw.type(FloatTensor), requires_grad=False) th = Variable(th.type(FloatTensor), requires_grad=False) tconf = Variable(tconf.type(FloatTensor), requires_grad=False) tcls = Variable(tcls.type(LongTensor), requires_grad=False) # Get conf mask where gt and where there is no gt conf_mask_true = mask conf_mask_false = conf_mask - mask # Mask outputs to ignore non-existing objects loss_x = self.mse_loss(x[mask], tx[mask]) loss_y = self.mse_loss(y[mask], ty[mask]) loss_w = self.mse_loss(w[mask], tw[mask]) loss_h = self.mse_loss(h[mask], th[mask]) loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss( pred_conf[conf_mask_true], tconf[conf_mask_true] ) loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1)) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls return ( loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall, precision, ) else: # If not in training phase return predictions output = torch.cat( ( pred_boxes.view(nB, -1, 4) * stride, pred_conf.view(nB, -1, 1), pred_cls.view(nB, -1, self.num_classes), ), -1, ) return output