예제 #1
0
    def forward(self, outputs, tg):
        # tg: targets
        outputs['hm_mc'] = _sigmoid(outputs['hm_mc'])
        outputs['hm_ver'] = _sigmoid(outputs['hm_ver'])

        # Normalize dimension
        # TODO: What happend if the norm_dim < 0, we can't apply the log operator
        # tg['dim'] = self.normalize_dim(tg['dim'])
        # tg['dim'] = F.log(tg['dim'])  # take the log of the normalized dimension

        # Follow depth loss in CenterNet
        outputs['depth'] = 1. / (_sigmoid(outputs['depth']) + 1e-9) - 1.

        l_hm_mc = self.focal_loss(outputs['hm_mc'], tg['hm_mc'])
        l_hm_ver = self.focal_loss(outputs['hm_ver'], tg['hm_ver'])
        # output, mask, ind, target
        l_vercoor = self.vercoor_l1(outputs['vercoor'], tg['ver_coor_mask'],
                                    tg['indices_center'], tg['ver_coor'])
        l_cenoff = self.l1_loss(outputs['cenoff'], tg['obj_mask'],
                                tg['indices_center'], tg['cen_offset'])
        l_veroff = self.l1_loss(outputs['veroff'], tg['ver_offset_mask'],
                                tg['indices_vertexes'], tg['ver_offset'])
        # TODO: What happend if the norm_dim < 0, we can't apply the log operator
        # Apply dimension loss (l1_loss) in the CenterNet instead of the l2_loss in the paper
        l_dim = self.l1_loss(outputs['dim'], tg['obj_mask'],
                             tg['indices_center'], tg['dim'])
        # output, mask, ind, rotbin, rotres
        l_rot = self.rot_loss(outputs['rot'], tg['obj_mask'],
                              tg['indices_center'], tg['rotbin'], tg['rotres'])
        # Apply depth loss (l1_loss) in the CenterNet instead of the l2_loss in the paper
        # l_depth = self.l2_loss(torch.log(outputs['depth']), tg['obj_mask'], tg['indices_center'], torch.log(tg['depth']))
        l_depth = self.l1_loss(outputs['depth'], tg['obj_mask'],
                               tg['indices_center'], tg['depth'])
        l_boxwh = self.l1_loss(outputs['wh'], tg['obj_mask'],
                               tg['indices_center'], tg['wh'])

        total_loss = l_hm_mc * self.weight_hm_mc + l_hm_ver * self.weight_hm_ver + l_vercoor * self.weight_vercoor + \
                     l_cenoff * self.weight_cenoff + l_veroff * self.weight_veroff + l_dim * self.weight_dim + \
                     l_rot * self.weight_rot + l_depth * self.weight_depth + l_boxwh * self.weight_wh

        loss_stats = {
            'total_loss': to_cpu(total_loss).item(),
            'hm_mc_loss': to_cpu(l_hm_mc).item(),
            'hm_ver_loss': to_cpu(l_hm_ver).item(),
            'ver_coor_loss': to_cpu(l_vercoor).item(),
            'cen_offset_loss': to_cpu(l_cenoff).item(),
            'ver_offset_loss': to_cpu(l_veroff).item(),
            'dim_loss': to_cpu(l_dim).item(),
            'rot_loss': to_cpu(l_rot).item(),
            'depth_loss': to_cpu(l_depth).item(),
            'wh_loss': to_cpu(l_boxwh).item()
        }

        return total_loss, loss_stats
예제 #2
0
def iou_pred_vs_target_boxes(pred_boxes, target_boxes, nG, GIoU=False, DIoU=False, CIoU=False):
    assert pred_boxes.size() == target_boxes.size(), "Unmatch size of pred_boxes and target_boxes"
    device = pred_boxes.device
    pred_boxes_cpu = to_cpu(pred_boxes).numpy()
    target_boxes_cpu = to_cpu(target_boxes).numpy()
    target_boxes_cpu[:, :4] *= nG  # scale up x, y, w, l

    x, y, w, l, im, re = target_boxes_cpu.transpose(1, 0)
    yaw = np.arctan2(im, re)
    target_conners = bev_utils.get_corners_vectorize(x, y, w, l, yaw)
    target_polygons = cvt_box_2_polygon(target_conners)
    target_areas = [polygon_.area for polygon_ in target_polygons]

    x, y, w, l, im, re = pred_boxes_cpu.transpose(1, 0)
    yaw = np.arctan2(im, re)
    pred_conners = bev_utils.get_corners_vectorize(x, y, w, l, yaw)
    pred_polygons = cvt_box_2_polygon(pred_conners)
    pred_areas = [polygon_.area for polygon_ in pred_polygons]

    ious = []
    iou_losses = []
    n_boxes = target_boxes_cpu.shape[0]
    # Thinking to apply vectorization this step
    for box_idx in range(n_boxes):
        pred_cons, t_cons = pred_conners[box_idx], target_conners[box_idx]
        pred_poly, t_poly = pred_polygons[box_idx], target_polygons[box_idx]
        pred_area, t_area = pred_areas[box_idx], target_areas[box_idx]
        intersection = pred_poly.intersection(t_poly).area
        union = pred_area + t_area - intersection
        iou = intersection / (union + 1e-16)

        if GIoU:
            convex_conners = np.concatenate((pred_cons, t_cons))
            hull = ConvexHull(convex_conners)
            convex_conners = convex_conners[hull.vertices]
            convex_polygon = Polygon([(convex_conners[i, 0], convex_conners[i, 1]) for i in range(len(convex_conners))]).buffer(0)
            convex_area = convex_polygon.area
            l_iou = 1. - (iou - (convex_area - union) / (convex_area + 1e-16))
        else:
            l_iou = 1. - iou

        if DIoU or CIoU:
            raise NotImplementedError

        ious.append(iou)
        iou_losses.append(l_iou)

    return torch.tensor(ious, device=device, dtype=torch.float), torch.tensor(iou_losses, device=device, dtype=torch.float)
예제 #3
0
    def compute_grid_offsets(self, grid_size):
        self.grid_size = grid_size
        g = self.grid_size
        self.stride = self.img_size / self.grid_size
        # Calculate offsets for each grid
        self.grid_x = torch.arange(g, device=self.device,
                                   dtype=torch.float).repeat(g, 1).view(
                                       [1, 1, g, g])
        self.grid_y = torch.arange(g, device=self.device,
                                   dtype=torch.float).repeat(g, 1).t().view(
                                       [1, 1, g, g])
        self.scaled_anchors = torch.tensor(
            [(a_w / self.stride, a_h / self.stride, im, re)
             for a_w, a_h, im, re in self.anchors],
            device=self.device,
            dtype=torch.float)
        self.anchor_w = self.scaled_anchors[:, 0:1].view(
            (1, self.num_anchors, 1, 1))
        self.anchor_h = self.scaled_anchors[:, 1:2].view(
            (1, self.num_anchors, 1, 1))

        # Pre compute polygons and areas of anchors
        self.scaled_anchors_polygons = get_polygons_fix_xy(to_cpu(
            self.scaled_anchors).numpy(),
                                                           fix_xy=100)
        self.scaled_anchors_areas = [
            polygon_.area for polygon_ in self.scaled_anchors_polygons
        ]
def iou_pred_vs_target_boxes(pred_boxes, target_boxes, nG, GIoU=False, DIoU=False, CIoU=False):
    assert pred_boxes.size() == target_boxes.size(), "Unmatch size of pred_boxes and target_boxes"
    device = pred_boxes.device
    pred_boxes_cpu = to_cpu(pred_boxes).numpy()
    target_boxes_cpu = to_cpu(target_boxes).numpy()
    target_boxes_cpu[:, :4] *= nG  # scale up x, y, w, l

    ious = []
    # Thinking to apply vectorization this step
    for pred_box, target_box in zip(pred_boxes_cpu, target_boxes_cpu):
        iou = iou_rotated_11_boxes(pred_box, target_box)
        if GIoU or DIoU or CIoU:
            raise NotImplementedError

        ious.append(iou)

    return torch.tensor(ious, device=device, dtype=torch.float)
예제 #5
0
    def forward(self, outputs, tg):
        # tg: targets
        outputs['hm_cen'] = _sigmoid(outputs['hm_cen'])
        outputs['hm_conners'] = _sigmoid(outputs['hm_conners'])
        outputs['cen_offset'] = _sigmoid(outputs['cen_offset'])
        outputs['direction'] = _sigmoid(outputs['direction'])

        l_hm_cen = self.focal_loss(outputs['hm_cen'], tg['hm_cen'])
        l_hm_conners = self.focal_loss(outputs['hm_conners'], tg['hm_conners'])

        l_cen_offset = self.l1_loss(outputs['cen_offset'], tg['obj_mask'],
                                    tg['indices_center'], tg['cen_offset'])
        l_direction = self.l1_loss(outputs['direction'], tg['obj_mask'],
                                   tg['indices_center'], tg['direction'])
        # Apply the L1_loss balanced for z coor and dimension regression
        l_z_coor = self.l1_loss_balanced(outputs['z_coor'], tg['obj_mask'],
                                         tg['indices_center'], tg['z_coor'])
        l_dim = self.l1_loss_balanced(outputs['dim'], tg['obj_mask'],
                                      tg['indices_center'], tg['dim'])

        total_loss = l_hm_cen * self.weight_hm_cen + l_cen_offset * self.weight_cenoff + \
                     l_dim * self.weight_dim + l_direction * self.weight_direction + \
                     l_z_coor * self.weight_z_coor + l_hm_conners * self.weight_hm_conners

        loss_stats = {
            'total_loss': to_cpu(total_loss).item(),
            'hm_cen_loss': to_cpu(l_hm_cen).item(),
            'hm_conners_loss': to_cpu(l_hm_conners).item(),
            'cen_offset_loss': to_cpu(l_cen_offset).item(),
            'dim_loss': to_cpu(l_dim).item(),
            'direction_loss': to_cpu(l_direction).item(),
            'z_coor_loss': to_cpu(l_z_coor).item(),
        }

        return total_loss, loss_stats
예제 #6
0
    def forward(self, input, source_image, targets=None):
        # batch_size, c, h, w
        img_size = source_image.size(2)
        ind = 0
        #self.loss = None
        outputs = dict()
        loss = 0.
        yolo_outputs = []
        for block_wrapper in self.blocks:
            block = block_wrapper.dict_block

            if block['type'] == 'yolo':
                x, layer_loss = self.models[ind](input[ind], targets, img_size,
                                                 self.use_giou_loss)
                loss += layer_loss
                yolo_outputs.append(x)
                ind = ind + 1

        yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
        #yolo_outputs = torch.cat(yolo_outputs, 1)
        #yolo_outputs = x

        return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #7
0
    def build_targets(self, out_boxes, pred_cls, target, anchors):
        """ Built yolo targets to compute loss
        :param out_boxes: [num_samples or batch, num_anchors, grid_size, grid_size, 6]
        :param pred_cls: [num_samples or batch, num_anchors, grid_size, grid_size, num_classes]
        :param target: [num_boxes, 8]
        :param anchors: [num_anchors, 4]
        :return:
        """
        nB, nA, nG, _, nC = pred_cls.size()
        n_target_boxes = target.size(0)

        # Create output tensors on "device"
        obj_mask = torch.full(size=(nB, nA, nG, nG),
                              fill_value=0,
                              device=self.device,
                              dtype=torch.uint8)
        noobj_mask = torch.full(size=(nB, nA, nG, nG),
                                fill_value=1,
                                device=self.device,
                                dtype=torch.uint8)
        class_mask = torch.full(size=(nB, nA, nG, nG),
                                fill_value=0,
                                device=self.device,
                                dtype=torch.float)
        iou_scores = torch.full(size=(nB, nA, nG, nG),
                                fill_value=0,
                                device=self.device,
                                dtype=torch.float)
        tx = torch.full(size=(nB, nA, nG, nG),
                        fill_value=0,
                        device=self.device,
                        dtype=torch.float)
        ty = torch.full(size=(nB, nA, nG, nG),
                        fill_value=0,
                        device=self.device,
                        dtype=torch.float)
        tw = torch.full(size=(nB, nA, nG, nG),
                        fill_value=0,
                        device=self.device,
                        dtype=torch.float)
        th = torch.full(size=(nB, nA, nG, nG),
                        fill_value=0,
                        device=self.device,
                        dtype=torch.float)
        tim = torch.full(size=(nB, nA, nG, nG),
                         fill_value=0,
                         device=self.device,
                         dtype=torch.float)
        tre = torch.full(size=(nB, nA, nG, nG),
                         fill_value=0,
                         device=self.device,
                         dtype=torch.float)
        tcls = torch.full(size=(nB, nA, nG, nG, nC),
                          fill_value=0,
                          device=self.device,
                          dtype=torch.float)
        tconf = obj_mask.float()
        iou_losses = torch.zeros(size=(1, ),
                                 device=self.device,
                                 dtype=torch.float)

        if n_target_boxes > 0:  # Make sure that there is at least 1 box
            # Convert to position relative to box
            target_boxes = target[:, 2:8]

            gxy = target_boxes[:, :2] * nG  # scale up x, y
            gwh = target_boxes[:, 2:4] * nG  # scale up w, l
            gimre = target_boxes[:, 4:]

            targets_polygons = get_polygons_fix_xy(to_cpu(
                target_boxes[:, 2:6] * nG).numpy(),
                                                   fix_xy=100)
            targets_areas = [polygon_.area for polygon_ in targets_polygons]

            # Get anchors with best iou
            ious = iou_rotated_boxes_vs_anchors(self.scaled_anchors_polygons,
                                                self.scaled_anchors_areas,
                                                targets_polygons,
                                                targets_areas)
            best_ious, best_n = ious.max(0)

            b, target_labels = target[:, :2].long().t()

            gx, gy = gxy.t()
            gw, gh = gwh.t()
            gim, gre = gimre.t()
            gi, gj = gxy.long().t()
            # Set masks
            obj_mask[b, best_n, gj, gi] = 1
            noobj_mask[b, best_n, gj, gi] = 0

            # Set noobj mask to zero where iou exceeds ignore threshold
            for i, anchor_ious in enumerate(ious.t()):
                noobj_mask[b[i], anchor_ious > self.ignore_thresh, gj[i],
                           gi[i]] = 0

            # Coordinates
            tx[b, best_n, gj, gi] = gx - gx.floor()
            ty[b, best_n, gj, gi] = gy - gy.floor()
            # Width and height
            tw[b, best_n, gj,
               gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
            th[b, best_n, gj,
               gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
            # Im and real part
            tim[b, best_n, gj, gi] = gim
            tre[b, best_n, gj, gi] = gre

            # One-hot encoding of label
            tcls[b, best_n, gj, gi, target_labels] = 1
            class_mask[b, best_n, gj, gi] = (
                pred_cls[b, best_n, gj,
                         gi].argmax(-1) == target_labels).float()
            ious, iou_losses = iou_pred_vs_target_boxes(out_boxes[b, best_n,
                                                                  gj, gi],
                                                        target_boxes,
                                                        nG,
                                                        GIoU=True)
            iou_scores[b, best_n, gj, gi] = ious
            tconf = obj_mask.float()

        return iou_scores, iou_losses, class_mask, obj_mask.type(torch.bool), noobj_mask.type(torch.bool), \
               tx, ty, tw, th, tim, tre, tcls, tconf
예제 #8
0
    def forward(self, x, targets=None, img_size=608):
        """
        :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size]
        :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw))
        :param img_size: default 608
        :return:
        """
        self.img_size = img_size
        self.device = x.device
        num_samples, _, _, grid_size = x.size()

        prediction = x.view(num_samples, self.num_anchors,
                            self.num_classes + 7, grid_size, grid_size)
        prediction = prediction.permute(0, 1, 3, 4, 2).contiguous()
        # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7]

        # Get outputs
        pred_x = torch.sigmoid(prediction[..., 0])
        pred_y = torch.sigmoid(prediction[..., 1])
        pred_w = prediction[..., 2]  # Width
        pred_h = prediction[..., 3]  # Height
        pred_im = prediction[..., 4]  # angle imaginary part
        pred_re = prediction[..., 5]  # angle real part
        pred_conf = torch.sigmoid(prediction[..., 6])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 7:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size)

        # Add offset and scale with anchors
        # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6]
        out_boxes = torch.empty(prediction[..., :6].shape,
                                device=self.device,
                                dtype=torch.float)
        out_boxes[..., 0] = pred_x.clone().detach() + self.grid_x
        out_boxes[..., 1] = pred_y.clone().detach() + self.grid_y
        out_boxes[..., 2] = torch.exp(
            pred_w.clone().detach()).clamp(1E3) * self.anchor_w
        out_boxes[..., 3] = torch.exp(
            pred_h.clone().detach()).clamp(1E3) * self.anchor_h
        out_boxes[..., 4] = pred_im.clone().detach()
        out_boxes[..., 5] = pred_re.clone().detach()

        output = torch.cat((
            out_boxes[..., :4].view(num_samples, -1, 4) * self.stride,
            out_boxes[..., 4:6].view(num_samples, -1, 2),
            pred_conf.clone().view(num_samples, -1, 1),
            pred_cls.clone().view(num_samples, -1, self.num_classes),
        ),
                           dim=-1)
        # output size: [num_samples, num boxes, 7 + num_classes]

        if targets is None:
            return output, 0
        else:
            reduction = 'mean'
            iou_scores, iou_losses, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets(
                out_boxes=out_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors)

            loss_box = iou_losses.sum(
            ) if reduction == 'sum' else iou_losses.mean()
            loss_im = F.mse_loss(pred_im[obj_mask],
                                 tim[obj_mask],
                                 reduction=reduction)
            loss_re = F.mse_loss(pred_re[obj_mask],
                                 tre[obj_mask],
                                 reduction=reduction)
            loss_im_re = (
                1. - torch.sqrt(pred_im[obj_mask]**2 + pred_re[obj_mask]**2)
            )**2  # as tim^2 + tre^2 = 1
            loss_im_re_red = loss_im_re.sum(
            ) if reduction == 'sum' else loss_im_re.mean()
            loss_eular = loss_im + loss_re + loss_im_re_red

            loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask],
                                                   tconf[obj_mask],
                                                   reduction=reduction)
            loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask],
                                                     tconf[noobj_mask],
                                                     reduction=reduction)
            loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = F.binary_cross_entropy(pred_cls[obj_mask],
                                              tcls[obj_mask],
                                              reduction=reduction)
            total_loss = loss_box * self.lbox_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale + loss_eular * self.leular_scale

            # Metrics (store loss values using tensorboard)
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                'loss_box': to_cpu(loss_box).item(),
                'loss_eular': to_cpu(loss_eular).item(),
                'loss_im': to_cpu(loss_im).item(),
                'loss_re': to_cpu(loss_re).item(),
                "loss_obj": to_cpu(loss_obj).item(),
                "loss_cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item()
            }

            return output, total_loss
예제 #9
0
    def forward(self, x, targets=None, img_size=608):
        """
        :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size]
        :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw))
        :param img_size: default 608
        :return:
        """
        self.img_size = img_size
        self.device = x.device
        num_samples, _, _, grid_size = x.size()

        prediction = x.view(num_samples, self.num_anchors,
                            self.num_classes + 7, grid_size, grid_size)
        prediction = prediction.permute(0, 1, 3, 4, 2).contiguous()
        # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7]

        # Get outputs
        x = torch.sigmoid(prediction[..., 0]) * self.scale_x_y - 0.5 * (
            self.scale_x_y - 1)  # Center x
        y = torch.sigmoid(prediction[..., 1]) * self.scale_x_y - 0.5 * (
            self.scale_x_y - 1)  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        im = prediction[..., 4]  # angle imaginary part
        re = prediction[..., 5]  # angle real part
        pred_conf = torch.sigmoid(prediction[..., 6])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 7:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size)

        # Add offset and scale with anchors
        # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6]
        pred_boxes = torch.empty(prediction[..., :6].shape,
                                 device=self.device,
                                 dtype=torch.float)
        pred_boxes[..., 0] = x.detach() + self.grid_x
        pred_boxes[..., 1] = y.detach() + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.detach()) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.detach()) * self.anchor_h
        pred_boxes[..., 4] = im.detach()
        pred_boxes[..., 5] = re.detach()

        output = torch.cat((
            pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride,
            pred_boxes[..., 4:].view(num_samples, -1, 2),
            pred_conf.view(num_samples, -1, 1),
            pred_cls.view(num_samples, -1, self.num_classes),
        ),
                           dim=-1)
        # output size: [num_samples, num boxes, 7 + num_classes]

        if targets is None:
            return output, 0
        else:
            obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets(
                pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors)

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_im = self.mse_loss(im[obj_mask], tim[obj_mask])
            loss_re = self.mse_loss(re[obj_mask], tre[obj_mask])
            loss_eular = loss_im + loss_re
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_conf + loss_cls

            # Metrics (store loss values using tensorboard)
            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "im": to_cpu(loss_im).item(),
                "re": to_cpu(loss_re).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item()
            }

            return output, total_loss
예제 #10
0
    def forward(self, x, targets=None):
        # batch_size, c, h, w
        img_size = x.size(2)
        ind = -2
        self.loss = None
        outputs = dict()
        loss = 0.
        yolo_outputs = []
        for block in self.blocks:
            ind = ind + 1
            # if ind > 0:
            #    return x

            if block['type'] == 'net':
                continue
            elif block['type'] in [
                    'convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool',
                    'softmax', 'connected'
            ]:
                x = self.models[ind](x)
                outputs[ind] = x
            elif block['type'] == 'route':
                layers = block['layers'].split(',')
                layers = [
                    int(i) if int(i) > 0 else int(i) + ind for i in layers
                ]
                if len(layers) == 1:
                    if 'groups' not in block.keys() or int(
                            block['groups']) == 1:
                        x = outputs[layers[0]]
                        outputs[ind] = x
                    else:
                        groups = int(block['groups'])
                        group_id = int(block['group_id'])
                        _, b, _, _ = outputs[layers[0]].shape
                        x = outputs[layers[0]][:, b // groups * group_id:b //
                                               groups * (group_id + 1)]
                        outputs[ind] = x
                elif len(layers) == 2:
                    x1 = outputs[layers[0]]
                    x2 = outputs[layers[1]]
                    x = torch.cat((x1, x2), 1)
                    outputs[ind] = x
                elif len(layers) == 4:
                    x1 = outputs[layers[0]]
                    x2 = outputs[layers[1]]
                    x3 = outputs[layers[2]]
                    x4 = outputs[layers[3]]
                    x = torch.cat((x1, x2, x3, x4), 1)
                    outputs[ind] = x
                else:
                    print("rounte number > 2 ,is {}".format(len(layers)))

            elif block['type'] == 'shortcut':
                from_layer = int(block['from'])
                activation = block['activation']
                from_layer = from_layer if from_layer > 0 else from_layer + ind
                x1 = outputs[from_layer]
                x2 = outputs[ind - 1]
                x = x1 + x2
                if activation == 'leaky':
                    x = F.leaky_relu(x, 0.1, inplace=True)
                elif activation == 'relu':
                    x = F.relu(x, inplace=True)
                outputs[ind] = x
            elif block['type'] == 'yolo':
                x, layer_loss = self.models[ind](x, targets, img_size,
                                                 self.use_giou_loss)
                loss += layer_loss
                yolo_outputs.append(x)
            elif block['type'] == 'cost':
                continue
            else:
                print('unknown type %s' % (block['type']))
        yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))

        return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #11
0
    def forward(self, x, targets=None, img_size=608, use_giou_loss=False):
        """
        :param x: [num_samples or batch, num_anchors * (8 + 1 + num_classes), grid_size, grid_size]
        :param targets: [num boxes, 9] (box_idx, class, x, y, z, h, w, l, yaw)
        :param img_size: default 608
        :return:
        """
        self.img_size = img_size
        self.use_giou_loss = use_giou_loss
        self.device = x.device
        num_samples, _, _, grid_size = x.size()

        prediction = x.view(num_samples, self.num_anchors,
                            self.num_classes + 9, grid_size, grid_size)
        prediction = prediction.permute(0, 1, 3, 4, 2).contiguous()
        # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 9]

        # Get outputs
        pred_x = torch.sigmoid(prediction[..., 0])
        pred_y = torch.sigmoid(prediction[..., 1])
        pred_z = torch.sigmoid(prediction[..., 2])
        pred_h = prediction[..., 3]  # Height
        pred_w = prediction[..., 4]  # Width
        pred_l = prediction[..., 5]  # Length
        pred_im = prediction[..., 6]  # angle imaginary part (range: 0 to 1)
        pred_re = prediction[..., 7]  # angle real part (range: 0 to 1)
        pred_conf = torch.sigmoid(prediction[..., 8])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 9:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size)

        # Add offset and scale with anchors
        # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6]
        pred_boxes = torch.empty(prediction[..., :8].shape,
                                 device=self.device,
                                 dtype=torch.float)
        pred_boxes[..., 0] = pred_x + self.grid_x
        pred_boxes[..., 1] = pred_y + self.grid_y
        pred_boxes[..., 2] = pred_z  # Only 1 grid

        pred_boxes[..., 3] = torch.exp(pred_h).clamp(max=1E3) * self.anchor_h
        pred_boxes[..., 4] = torch.exp(pred_w).clamp(max=1E3) * self.anchor_w
        pred_boxes[..., 5] = torch.exp(pred_l).clamp(max=1E3) * self.anchor_l
        pred_boxes[..., 6] = pred_im
        pred_boxes[..., 7] = pred_re

        output = torch.cat(
            (
                pred_boxes[..., :2].view(num_samples, -1, 2) *
                self.stride,  # x, y
                pred_boxes[..., 2:3].view(num_samples, -1, 1),  # z
                pred_boxes[..., 3:6].view(num_samples, -1, 3) *
                self.stride,  # h, w, l
                pred_boxes[..., 6:8].view(num_samples, -1, 2),  # im, re
                pred_conf.view(num_samples, -1, 1),  # conf
                pred_cls.view(num_samples, -1, self.num_classes),  # classes
            ),
            dim=-1)
        # output size: [num_samples, num boxes, 9 + num_classes]

        if targets is None:
            return output, 0
        else:
            self.reduction = 'mean'
            iou_scores, giou_loss, class_mask, obj_mask, noobj_mask, tx, ty, tz, th, tw, tl, tim, tre, tcls, tconf = self.build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors)

            loss_x = F.mse_loss(pred_x[obj_mask],
                                tx[obj_mask],
                                reduction=self.reduction)
            loss_y = F.mse_loss(pred_y[obj_mask],
                                ty[obj_mask],
                                reduction=self.reduction)
            loss_z = F.mse_loss(pred_z[obj_mask],
                                tz[obj_mask],
                                reduction=self.reduction)

            loss_h = F.mse_loss(pred_h[obj_mask],
                                th[obj_mask],
                                reduction=self.reduction)
            loss_w = F.mse_loss(pred_w[obj_mask],
                                tw[obj_mask],
                                reduction=self.reduction)
            loss_l = F.mse_loss(pred_l[obj_mask],
                                tl[obj_mask],
                                reduction=self.reduction)

            loss_im = F.mse_loss(pred_im[obj_mask],
                                 tim[obj_mask],
                                 reduction=self.reduction)
            loss_re = F.mse_loss(pred_re[obj_mask],
                                 tre[obj_mask],
                                 reduction=self.reduction)

            loss_box = loss_x + loss_y + loss_z + loss_h + loss_w + loss_l + loss_re + loss_im

            loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask],
                                                   tconf[obj_mask],
                                                   reduction=self.reduction)
            loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask],
                                                     tconf[noobj_mask],
                                                     reduction=self.reduction)
            loss_cls = F.binary_cross_entropy(pred_cls[obj_mask],
                                              tcls[obj_mask],
                                              reduction=self.reduction)

            if self.use_giou_loss:
                loss_obj = loss_conf_obj + loss_conf_noobj
                total_loss = giou_loss * self.lgiou_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale
            else:
                loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
                total_loss = loss_box + loss_obj + loss_cls

                # Metrics (store loss values using tensorboard)
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "iou_score": to_cpu(iou_scores[obj_mask].mean()).item(),
                'giou_loss': to_cpu(giou_loss).item(),
                'loss_x': to_cpu(loss_x).item(),
                'loss_y': to_cpu(loss_y).item(),
                'loss_z': to_cpu(loss_z).item(),
                'loss_h': to_cpu(loss_h).item(),
                'loss_w': to_cpu(loss_w).item(),
                'loss_l': to_cpu(loss_l).item(),
                'loss_im': to_cpu(loss_im).item(),
                'loss_re': to_cpu(loss_re).item(),
                "loss_obj": to_cpu(loss_obj).item(),
                "loss_cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item()
            }

            return output, total_loss
예제 #12
0
    def forward(self, x, targets=None, img_size=608):
        """
        :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size]
        :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw))
        :param img_size: default 608
        :return:
        """
        self.img_size = img_size
        self.device = x.device
        num_samples, _, _, grid_size = x.size()

        prediction = x.view(num_samples, self.num_anchors,
                            self.num_classes + 7, grid_size, grid_size)
        prediction = prediction.permute(0, 1, 3, 4, 2).contiguous()
        # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7]

        # Get outputs
        pred_x = torch.sigmoid(prediction[..., 0])
        pred_y = torch.sigmoid(prediction[..., 1])
        pred_w = prediction[..., 2]  # Width
        pred_h = prediction[..., 3]  # Height
        pred_im = prediction[..., 4]  # angle imaginary part
        pred_re = prediction[..., 5]  # angle real part
        pred_conf = torch.sigmoid(prediction[..., 6])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 7:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size)

        # Add offset and scale with anchors
        # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6]
        out_boxes = torch.empty(prediction[..., :6].shape,
                                device=self.device,
                                dtype=torch.float)
        out_boxes[..., 0] = pred_x.clone().detach() + self.grid_x
        out_boxes[..., 1] = pred_y.clone().detach() + self.grid_y
        out_boxes[..., 2] = torch.exp(pred_w.clone().detach()) * self.anchor_w
        out_boxes[..., 3] = torch.exp(pred_h.clone().detach()) * self.anchor_h
        out_boxes[..., 4] = pred_im.clone().detach()
        out_boxes[..., 5] = pred_re.clone().detach()

        output = torch.cat((
            out_boxes[..., :4].view(num_samples, -1, 4) * self.stride,
            out_boxes[..., 4:6].view(num_samples, -1, 2),
            pred_conf.clone().view(num_samples, -1, 1),
            pred_cls.clone().view(num_samples, -1, self.num_classes),
        ),
                           dim=-1)
        # output size: [num_samples, num boxes, 7 + num_classes]

        if targets is None:
            return output, 0
        else:
            reduction = 'mean'
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets(
                out_boxes=out_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors)

            iou_masked = iou_scores[obj_mask]  # size: (n_target_boxes,)
            loss_box = (1. - iou_masked).sum() if reduction == 'sum' else (
                1. - iou_masked).mean()

            loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask],
                                                   tconf[obj_mask],
                                                   reduction=reduction)
            loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask],
                                                     tconf[noobj_mask],
                                                     reduction=reduction)
            loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = F.binary_cross_entropy(pred_cls[obj_mask],
                                              tcls[obj_mask],
                                              reduction=reduction)
            total_loss = loss_box * self.lbox_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale

            # Metrics (store loss values using tensorboard)
            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                'loss_box': to_cpu(loss_box).item(),
                "loss_obj": to_cpu(loss_obj).item(),
                "loss_cls": to_cpu(loss_cls).item()
            }

            return output, total_loss
예제 #13
0
    def forward(self, x, targets=None, img_size=608):
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor

        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 7, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        x = torch.sigmoid(prediction[..., 0]) * self.scale_x_y - 0.5 * (
            self.scale_x_y - 1)  # Center x
        y = torch.sigmoid(prediction[..., 1]) * self.scale_x_y - 0.5 * (
            self.scale_x_y - 1)  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        im = prediction[..., 4]  # angle imaginary part
        re = prediction[..., 5]  # angle real part
        pred_conf = torch.sigmoid(prediction[..., 6])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 7:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :6].shape)
        pred_boxes[..., 0] = x.detach() + self.grid_x
        pred_boxes[..., 1] = y.detach() + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.detach()) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.detach()) * self.anchor_h
        pred_boxes[..., 4] = im
        pred_boxes[..., 5] = re

        output = torch.cat(
            (
                # pred_boxes.view(num_samples, -1, 6) * self.stride,
                pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride,
                pred_boxes[..., 4:].view(num_samples, -1, 2),
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            dim=-1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors)

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_im = self.mse_loss(im[obj_mask], tim[obj_mask])
            loss_re = self.mse_loss(re[obj_mask], tre[obj_mask])
            loss_eular = loss_im + loss_re
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "im": to_cpu(loss_im).item(),
                "re": to_cpu(loss_re).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss