예제 #1
0
 def forward(self, x, targets=None):
     img_dim = x.shape[2]
     loss = 0
     layer_outputs, yolo_outputs = [], []
     for i, (module_def,
             module) in enumerate(zip(self.module_defs, self.module_list)):
         if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
             x = module(x)
         elif module_def["type"] == "route":
             x = torch.cat([
                 layer_outputs[int(layer_i)]
                 for layer_i in module_def["layers"].split(",")
             ], 1)
         elif module_def["type"] == "shortcut":
             layer_i = int(module_def["from"])
             x = layer_outputs[-1] + layer_outputs[layer_i]
         elif module_def["type"] == "yolo":
             x, layer_loss = module[0](x, targets, img_dim)
             loss += layer_loss
             yolo_outputs.append(x)
         layer_outputs.append(x)
     yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
     return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #2
0
    def forward(self, x, targets=None):
        img_dim = x.shape[2]
        loss = 0
        layer_outputs, yolo_outputs = [], []
        for i, (module_def,
                module) in enumerate(zip(self.module_defs, self.module_list)):
            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
                x = module(x)
            elif module_def["type"] == "route":
                # TODO raw
                x = torch.cat([
                    layer_outputs[int(layer_i)]
                    for layer_i in module_def["layers"].split(",")
                ], 1)

                # TODO modify by shortcut
                # layers = [int(x) for x in module_def['layers'].split(',')]
                # if len(layers) == 1:
                #     x = layer_outputs[layers[0]]
                # else:
                #     try:  # apply stride 2 for darknet reorg layer
                #         x = torch.cat([layer_outputs[i] for i in layers], 1)
                #     except:
                #         layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5])
                #         x = torch.cat([layer_outputs[i] for i in layers], 1)

            elif module_def["type"] == "shortcut":
                layer_i = int(module_def["from"])
                x = layer_outputs[-1] + layer_outputs[layer_i]
            elif module_def["type"] == "yolo":
                x, layer_loss = module[0](x, targets, img_dim)
                loss += layer_loss
                yolo_outputs.append(x)
            layer_outputs.append(x)
        yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
        return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #3
0
    def forward(self, x, targets=None):
        img_dim = x.shape[2]
        loss = 0
        layer_outputs, yolo_outputs = [], []
        for i, (module_def,
                module) in enumerate(zip(self.module_defs, self.module_list)):
            if module_def['type'] in ['convolutional', 'upsample', 'maxpool']:
                x = module(x)
            elif module_def['type'] == 'route':
                x = torch.cat([
                    layer_outputs[int(layer_i)]
                    for layer_i in module_def['layers'].split(',')
                ], 1)
            elif module_def['type'] == 'shortcut':
                layer_i = int(module_def['from'])
                x = layer_outputs[-1] + layer_outputs[layer_i]
            elif module_def['type'] == 'yolo':
                x, layer_loss = module[0](x, targets, img_dim)
                loss += layer_loss
                yolo_outputs.append(x)
            layer_outputs.append(x)

        yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
        return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #4
0
 def forward(self, x, targets=None):
     img_dim = x.shape[2]  # 取决于输入图片的大小,因为是正方形输入,所以只考虑height
     loss = 0
     layer_outputs, yolo_outputs = [], []
     for i, (module_def,
             module) in enumerate(zip(self.module_defs, self.module_list)):
         if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
             x = module(x)
         elif module_def["type"] == "route":
             x = torch.cat([
                 layer_outputs[int(layer_i)]
                 for layer_i in module_def["layers"].split(",")
             ], 1)
         elif module_def["type"] == "shortcut":
             layer_i = int(module_def["from"])
             x = layer_outputs[-1] + layer_outputs[layer_i]
         elif module_def["type"] == "yolo":  # [82, 94, 106] for yolov3
             x, layer_loss = module[0](
                 x, targets, img_dim)  # module是nn.Sequential(),所以要取[0]
             loss += layer_loss
             yolo_outputs.append(x)
         layer_outputs.append(x)  # 将每个块的output都保存起来
     yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))  # 只保存yolo层的output
     return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #5
0
    test_embed1 = DataLoadDf(df_test_embed1, encode_function_label, transform=Compose(trans_embedding))
    test_embed_loader1 = DataLoader(test_embed1, batch_size=batch_size_classif, shuffle=False,
                                    num_workers=num_workers,
                                    drop_last=False)

    test_df10 = dfs["test10"]
    test_dl10 = DataLoadDf(test_df10, encode_function_label, transform=Compose(trans_fr_sc_embed))
    embed_set10 = "final_test10"
    test_embed_dir10 = os.path.join(embed_dir, embed_set10)
    df_test_embed10, _ = calculate_embedding(test_dl10, model_triplet, savedir=test_embed_dir10,
                                             concatenate="append")
    test_embed10 = DataLoadDf(df_test_embed10, encode_function_label, transform=Compose(trans_embedding))
    test_embed_loader10 = DataLoader(test_embed10, batch_size=batch_size_classif, shuffle=False,
                                     num_workers=num_workers, drop_last=False)

    model_triplet = to_cpu(model_triplet)
    classif_model = to_cuda_if_available(classif_model)
    classif_model.eval()
    mean_test_results1 = measure_classif(classif_model, test_embed_loader1,
                                         classes=classes,
                                         suffix_print="test1")

    mean_test_results10 = measure_classif(classif_model, test_embed_loader10,
                                          classes=classes,
                                          suffix_print="test10")

    print(f"Time of the program: {time.time() - t}")
    from orion.client import report_results

    report_results(
        [dict(
예제 #6
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda Support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)  # x.size() => torch.Size([1, 3, 13, 13])
        grid_size = x.size(2)  # 13, 13

        prediction = (  # (  )이렇게 감 쌈  그냥 가독성을 위한 것인듯... shape 모양에는 그대로임
            x.view(num_samples, self.num_anchors, self.num_classes + 5,
                   grid_size, grid_size).permute(0, 1, 3, 4,
                                                 2)  # 내부의 차원의 배치를 바꿀 것이다.
            .contigous()  # 메모리를 연속적으로 할당해준다. 이렇게 하믄 backend에서 효율적으로 동작한다는듯
        )
        # ( num_samples, self.num_anchors, grid_size, grid_size, self.num_classes + 5 )
        # 만약 coco라면 => (1, 3, 13, 13, 85)

        # Get outputs
        x = torch.sigmoid(prediction[...,
                                     0])  # => O(tx)이다. 즉, Sigmoid를 씌운 x 좌표
        y = torch.sigmoid(prediction[...,
                                     1])  # => O(ty)이다. 즉, Sigmoid를 씌운 y 좌표
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5:])

        # if grid size does not match current we compute new offsets
        #  맨처음에는 grid_size가 0이니까 if 안으로 빠진다.
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        # x.data.shape      -> [1, 3, 13, 13] 이다.
        # self.grid_x.shape -> [1, 1, 13, 13] 이다.
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w  #
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h  #
        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #7
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            if not obj_mask.any():
                total_loss = self.noobj_scale * self.bce_loss(
                    pred_conf[noobj_mask], tconf[noobj_mask])
                return output, total_loss

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            tconf = obj_mask.float()
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #8
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        logger.info(
            f"YOLOLayer input: {x.size(0)}, {x.size(1)}, {x.size(2)}, {x.size(3)}"
        )

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())
        logger.info(
            f"After resize, prediction: {prediction.size(0)}, {prediction.size(1)}, {prediction.size(2)}, {prediction.size(3)}, {prediction.size(4)}"
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5:])

        # if grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_bboxes = FloatTensor(prediction[..., :4].shape)
        pred_bboxes[..., 0] = x.data + self.grid_x
        pred_bboxes[..., 1] = y.data + self.grid_y
        # 乘scale过的anchor_w, anchor_h
        pred_bboxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_bboxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat((
            pred_bboxes.view(num_samples, -1, 4) * self.stride,
            pred_conf.view(num_samples, -1, 1),
            pred_cls.view(num_samples, -1, self.num_classes),
        ), -1)
        logger.info(
            f"YOLOLayer output: {output.size(0)}, {output.size(1)}, {output.size(2)}\n"
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_bboxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf loss)
            # 目标框使用 mse loss
            # 计算loss采用最原始的数值
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse.loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse.loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # 置信度使用 bce 交叉熵, 有无物体的交叉熵比例贡献不一样
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

            # 分类交叉熵
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

            # 总体损失 坐标损失,置信度损失,分类损失
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            # cls_acc 不理解???
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            # detected_mask ???
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #9
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(
            0)  # 三个路径x分别为(N, 255, 13, 13),(N, 255, 26, 26),(N, 255, 52, 52)
        grid_size = x.size(2)
        # print(x.shape)
        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5,
                   grid_size, grid_size).
            permute(
                0, 1, 3, 4, 2
            )  # 交换维度后(N, num_anchors(3), grid_size, grid_size, num_classes + 5(85))
            .contiguous()  # 返回一个内存连续的有相同数据的tensor,如果原tensor内存连续则返回原tensor
        )
        # (N, 3, 13, 13, 85)
        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            # iou_scores:标签中有物体的位置地方预测的框与真实的框的IOU
            # class_mask:标签中有物体的位置地方预测的物体的分类正确率
            # obj_mask:指标签中有物体的网格中且与真实框IOU最大的框
            # noobj_mask:指标签中有物体的网格中且与真实框IOU最大的框和IOU大于0.5之外的框
            # tx, ty, tw, th:标签中检测物体的中心坐标和长宽
            # tcls:类别的标签
            # tconf:obj_mask.float()
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()  # 类别分类的正确率
            conf_obj = pred_conf[obj_mask].mean()  # 标签中有物体的网格所在位置,预测是否有物体的置信度
            conf_noobj = pred_conf[noobj_mask].mean(
            )  # 标签中没有物体的网格所在位置,预测是否有物体的置信度
            conf50 = (pred_conf > 0.5).float()  # 预测是否有物体的置信度大于0.5的框
            iou50 = (iou_scores > 0.5).float()  # 预测的框与真正的框的IOU值>0.5的框
            iou75 = (iou_scores > 0.75).float()  # 预测的框与真正的框的IOU值>0.7的框
            detected_mask = conf50 * class_mask * tconf  # 检测到物体和分类总的正确率
            precision = torch.sum(iou50 * detected_mask) / (
                conf50.sum() + 1e-16)  # 精准度
            recall50 = torch.sum(iou50 * detected_mask) / (
                obj_mask.sum() + 1e-16)  # IOU为0.5的召回率
            recall75 = torch.sum(iou75 * detected_mask) / (
                obj_mask.sum() + 1e-16)  # IOU为0.75的召回率

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #10
0
def compute_loss(predictions, targets, model):  # predictions, targets, model
    device = targets.device
    lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(
        1, device=device), torch.zeros(1, device=device)
    tcls, tbox, indices, anchors = build_targets(predictions, targets,
                                                 model)  # targets
    hyperparams = model.hyperparams  # hyperparameters

    # Define criteria
    BCEcls = nn.BCEWithLogitsLoss(
        pos_weight=torch.tensor([1.0], device=device))
    BCEobj = nn.BCEWithLogitsLoss(
        pos_weight=torch.tensor([1.0], device=device))

    # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
    cp, cn = smooth_BCE(eps=0.0)

    # Focal loss
    gamma = 0  #hyperparams['fl_gamma']  # focal loss gamma
    if gamma > 0:
        BCEcls, BCEobj = FocalLoss(BCEcls, gamma), FocalLoss(BCEobj, gamma)

    # Losses
    balance = [4.0, 1.0, 0.4, 0.1]  # P3-P6
    for layer_index, layer_predictions in enumerate(
            predictions):  # layer index, layer predictions
        b, anchor, grid_j, grid_i = indices[
            layer_index]  # image, anchor, gridy, gridx
        tobj = torch.zeros_like(layer_predictions[..., 0],
                                device=device)  # target obj

        num_targets = b.shape[0]  # number of targets
        if num_targets:
            ps = layer_predictions[
                b, anchor, grid_j,
                grid_i]  # prediction subset corresponding to targets

            # Regression
            pxy = ps[:, :2].sigmoid() * 2. - 0.5
            pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[layer_index]
            pbox = torch.cat((pxy, pwh), 1)  # predicted box
            iou = bbox_iou(pbox.T,
                           tbox[layer_index],
                           x1y1x2y2=False,
                           CIoU=True)  # iou(prediction, target)
            lbox += (1.0 - iou).mean()  # iou loss

            model.gr = 1

            # Objectness
            tobj[b, anchor, grid_j,
                 grid_i] = (1.0 -
                            model.gr) + model.gr * iou.detach().clamp(0).type(
                                tobj.dtype)  # iou ratio

            # Classification
            t = torch.full_like(ps[:, 5:], cn, device=device)  # targets
            t[range(num_targets), tcls[layer_index]] = cp
            lcls += BCEcls(ps[:, 5:], t)  # BCE

        lobj += BCEobj(layer_predictions[..., 4],
                       tobj) * balance[layer_index]  # obj loss

    lbox *= 0.05 * (3. / 2)
    lobj *= (3. / 2)
    lcls *= 0.31
    batch_size = tobj.shape[0]  # batch size

    loss = lbox + lobj + lcls

    return loss * batch_size, to_cpu(torch.cat((lbox, lobj, lcls, loss)))
예제 #11
0
    def forward(self, x, targets=None, img_dim=None, Half=False):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        FloatTensor = torch.cuda.HalfTensor if x.type() == "torch.cuda.HalfTensor" else torch.cuda.FloatTensor
        # LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        # ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        # 注释说明
        # x 是最后一层卷积输出的特征图,在输入图片大小为416×416的前提下
        # x[0],x[1],x[2],x[3] = batch size, 255, 13, 13
        # x[0],x[1],x[2],x[3] = batch size, 255, 26, 26
        # 255 = 3*(4+1+80)  3:我认为是mask的数量,也即每个cell生成的检测框数; 4:检测框坐标; 1:检测框置信度;80:类别数。
        # 检测框具体顺序为 Center x,Center y,Width,Height
        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        # 注释说明
        # prediction 的维度为 batch_size, num_anchors=3, grid_size, grid_size, num_classes + 5(coco:85)
        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
                .permute(0, 1, 3, 4, 2)  # permute: 将维度换位
                .contiguous()
        )
        # print(prediction.size())

        # 注释说明
        # Center x,Center y,Conf,Cls pred 用sigmoid函数限定其范围在0-1范围内
        # 为什么 w,h 不用限定范围?确实存在 w,h 大于1的是数据
        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf (检测框置信度)
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
        # print(torch.max(w))
        # print(h)

        # 调试
        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, img_dim, cuda=x.is_cuda, Half=Half)

        # 注释说明
        # pred_box 表示网络预测的框
        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        # print(pred_boxes[..., 2].type())

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )
        # print(output.size())

        # 注释说明
        # target 用来表明是否是训练还是推理
        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            # 注释说明
            # loss_conf 正负样本带有各自权重(obj_scale,noobj_scale)
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #12
0
                           transform=Compose(list_trans_val))
    embed_set10 = "final_test10"
    test_embed_dir10 = os.path.join(embed_dir, embed_set10)
    df_test_embed10, _ = calculate_embedding(test_dl10,
                                             model,
                                             savedir=test_embed_dir10,
                                             concatenate="append")
    test_embed10 = DataLoadDf(df_test_embed10,
                              many_hot_encoder.encode_weak,
                              transform=Compose(trans_embedding))
    test_embed_loader10 = DataLoader(test_embed10,
                                     batch_size=cfg.batch_size_classif,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     drop_last=False)

    model = to_cpu(model)
    classif_model = to_cuda_if_available(classif_model)
    classif_model.eval()
    mean_test_results1 = measure_classif(classif_model,
                                         test_embed_loader1,
                                         classes=classes,
                                         suffix_print="test1")

    mean_test_results10 = measure_classif(classif_model,
                                          test_embed_loader10,
                                          classes=classes,
                                          suffix_print="test10")

    print(f"Time of the program: {time.time() - t}")
예제 #13
0
    def forward(self, x, targets=None, img_dim=None):
        FloatTensor = torch.cuda.FloatTensor
        LongTensor = torch.cuda.LongTensor
        ByteTensor = torch.cuda.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        # convert predictions
        # note: NCHW format -> grid_y, grid_x
        # nx255x13x13 -> nx3x85x13x13 -> nx3x13x13x85
        # 85: tx_ctr, ty_ctr, tw, th, objectness, 80 class
        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # get and parse outputs
        x = torch.sigmoid(prediction[..., 0])  # tx_ctr range: (0, 1)
        # format: [batch_size, anchors, grid_y, grid_x]
        y = torch.sigmoid(prediction[..., 1])  # ty_ctr range: (0, 1)
        w = prediction[..., 2]  # tw
        h = prediction[..., 3]  # th
        pred_conf = torch.sigmoid(prediction[...,
                                             4])  # objectness use sigmoid()
        pred_cls = torch.sigmoid(prediction[..., 5:])  # cls use sigmoid()
        # format: [batch_size, anchors, grid_y, grid_x, cls]

        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x  # x_ctr range: (0, 13)
        pred_boxes[..., 1] = y.data + self.grid_y  # y_ctr range: (0, 13)
        pred_boxes[..., 2] = torch.exp(
            w.data
        ) * self.anchor_w  # width w.r.t current feature map dimension
        pred_boxes[..., 3] = torch.exp(
            h.data
        ) * self.anchor_h  # height w.r.t current feature map dimension

        # output shape: [1, x, 85]
        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) *
                self.stride,  # get (x_ctr, y_ctr, w, h) w.r.t 416x416 
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            # calculate loss
            # (tx, ty, tw, th): target offset
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,  # normalize x_ctr, y_ctr, w, h
                pred_cls=pred_cls,
                target=targets,
                anchors=self.
                scaled_anchors,  # normalize (anchor w, anchor h) w.r. current yolo layer dimension
                ignore_thres=self.ignore_thres,  # 0.5
            )
            """
            test code
            """
            tmp = list(obj_mask.size())
            sum = 1
            for item in tmp:
                sum *= item
            #print ('sum anchors: ', sum)
            #print ('positive samples: ', list(obj_mask[obj_mask].size())[0])
            #print ('negative sample: %d \n' %(list(noobj_mask[noobj_mask].size())[0]))

            # calculate loss
            #print ('loss')
            """
            calculate postive samples loss: loc loss + cls loss + obj loss
            """
            # calculate loc loss
            loss_x = self.mse_loss(
                x[obj_mask],
                tx[obj_mask])  # choose positive predict box tx ang target tx*
            # x size: [batch_size, anchors, grid_y, grid_x]
            # obj_mask size: [batch_size, anchors, grid_y, grid_x]
            # tx size:  [batch_size, anchors, grid_y, grid_x]
            # x[obj_mask] size: [14] 14 is number of positive samples
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # calculate cls loss
            loss_cls = self.bce_loss(
                pred_cls[obj_mask],
                tcls[obj_mask])  # pred_cls size: [1, 3, 13, 13, 80]
            # obj_mask size: [1, 3, 13, 13]
            # pred_cls[obj_mask] size: [n, 80]
            # tcls[obj_mask] size: [n, 80]
            # loss_cls: 1/N * Sum(-(y x logp + (1-y) x log(1-p)))

            # calculate obj loss
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask],
                                          tconf[obj_mask])  # tconf = obj_mask
            # tconf[obj_mask]: [1, 1, 1, 1, 1 ...] note: just choose 1(target)
            # pred_conf[obj_mask]: [0.1, 0.12, 0.13 ...]
            # use binary cross-entropy loss
            """
            calculate negative samples loss: no obj loss
            """
            # calculate no-obj loss
            loss_conf_noobj = self.bce_loss(
                pred_conf[noobj_mask], tconf[noobj_mask])  # tconf = obj_mask
            # obj_mask[noobj_mask]: just choose 0(target)
            """
            loss post-process
            """
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj  # note: it is unreasonable
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # metrics
            cls_acc = 100 * class_mask[obj_mask].mean(
            )  # class_mask[obj_mask] size: [20] 20 is positive samples number
            conf_obj = pred_conf[obj_mask].mean(
            )  # pred_conf[obj_mask] size: [20] 20 is positve samples number
            conf_noobj = pred_conf[noobj_mask].mean(
            )  # pred_conf[noobj_mask] size: [2000] 2000 is negative samples number
            conf50 = (pred_conf > 0.5).float()  # size: [1, 3, 13, 13]
            iou50 = (iou_scores > 0.5).float()  # size: [1, 3, 13, 13]
            iou75 = (iou_scores > 0.5).float()  # size: [1, 3, 13, 13]
            detected_mask = conf50 * class_mask * tconf  # size: [1, 3, 13, 13]
            # objectness > 0.5 and predict class is correct
            precision = torch.sum(iou50 * detected_mask) / (
                conf50.sum() + 1e-16)  # precision = TP / (TP + FP)
            # TP: objectness > 0.5 && predict class correct && IOU > 0.5
            # TP + FP: objectness > 0.5
            recall50 = torch.sum(iou50 * detected_mask) / (
                obj_mask.sum() + 1e-16)  # recall = TP / (TP + FN)
            # TP: objectness > 0.5 && predict class correct && IOU > 0.5
            # TP + FN : all positive samples(obj_mask)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            #print (grid_size, 'x', grid_size, '-loss: ', to_cpu(total_loss).item(), ' coord loss: ',
            #        to_cpu(loss_x).item() + to_cpu(loss_y).item() + to_cpu(loss_w).item() + to_cpu(loss_h).item(),
            #        ' conf loss: ', to_cpu(loss_conf).item(), ' cls loss: ', to_cpu(loss_cls).item())

            self.metrics = {
                "grid_size":
                grid_size,
                "loss":
                to_cpu(total_loss).item(),
                "loss-tx":
                to_cpu(loss_x).item(),
                "loss-ty":
                to_cpu(loss_y).item(),
                "loss-tw":
                to_cpu(loss_w).item(),
                "loss-th":
                to_cpu(loss_h).item(),
                "loss-conf":
                to_cpu(loss_conf).item(),
                "loss-cls":
                to_cpu(loss_cls).item(),
                "loss-obj":
                to_cpu(loss_conf_obj).item(),
                "loss-noobj x scale":
                to_cpu(loss_conf_noobj * self.noobj_scale).item(),
                "loss-noobj":
                to_cpu(loss_conf_noobj).item(),
                "cls_acc":
                to_cpu(cls_acc).item(),
                "recall50":
                to_cpu(recall50).item(),
                "recall75":
                to_cpu(recall75).item(),
                "precision":
                to_cpu(precision).item(),
                "conf_obj":
                to_cpu(conf_obj).item(),
                "conf_noobj":
                to_cpu(conf_noobj).item(),
            }

            #print (self.metrics)
            self.noobj_scale = 100000

            return output, total_loss
예제 #14
0
    def forward(self, x, target = None, img_dim = None):
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)  # todo 这个size为什么是输入的宽高维度呢

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            .premute(0,1,3,4,2)  # todo
            .contiguous() # todo
        )

        # get output
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5])

        # if the grid size dose not match current we compute new offset
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
            pred_boxes.view(num_samples, -1, 4) * self.stride,
            pred_conf.view(num_samples, -1, 1),
            pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if target is None:
            return output, 0
        else:
            iou_score, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes = pred_boxes,
                pred_cls = pred_cls,
                target = targets,
                anchors=self.scaled_anchors,
                ignore_thres= self.ignore_thres.
            )

            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tconf[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_score > 0.5 ).float()
            iou75 = (iou_score > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-15)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #15
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        # reshape input torch to num_samples * num_anchors * (num_classes + 4) * grid_size^2
        # permute prediction torch into num_samples * num_anchors * grid_size^2 * (num_classes + 4)
        # modify
        # only 4 parameters to be learned, so num_classes+5 => num_classes + 4
        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 4, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        # modify
        # reduce one channel for height
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        d = prediction[..., 2]  # diameter
        #         h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 3])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 4:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        # modify
        # only adjust d
        pred_boxes = FloatTensor(prediction[..., :3].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(d.data) * self.anchor_w
        #         pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 3) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            #modify build_target function to calculate new IOU for circle and rectangle
            #here tw is used as td
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            # modify,
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            # pick loss_w as loss_d and stop using loss_h
            loss_d = self.mse_loss(d[obj_mask], tw[obj_mask])
            #             loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + 0.5 * loss_d + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "d": to_cpu(loss_d).item(),
                #                 "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #16
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 8, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        # (u, v) Projected points on image plane
        u = torch.sigmoid(prediction[..., 0])
        v = torch.sigmoid(prediction[..., 1])    
        # Z in the 3D coordinates
        Z = prediction[..., 2]
        # (Qw + Qx * i + Qy * j + Qz * k) Quaternion
        Qw = prediction[..., 3]
        Qx = prediction[..., 4]
        Qy = prediction[..., 5]
        Qz = prediction[..., 6]

        pred_conf = torch.sigmoid(prediction[..., 7])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 8:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_uvZQ = FloatTensor(prediction[..., :7].shape)
        pred_uvZQ[..., 0] = u.data + self.grid_x
        pred_uvZQ[..., 1] = v.data + self.grid_y
        pred_uvZQ[..., 2] = Z.data
        pred_uvZQ[..., 3] = torch.sigmoid(Qw.data)  # * self.anchor_Qw
        pred_uvZQ[..., 4] = torch.tanh(Qx.data)     # * self.anchor_Qx
        pred_uvZQ[..., 5] = torch.tanh(Qy.data)     # * self.anchor_Qy
        pred_uvZQ[..., 6] = torch.tanh(Qz.data)     # * self.anchor_Qz

        output = torch.cat(
            (
                pred_uvZQ[..., :2].view(num_samples, -1, 2) * self.stride,
                pred_uvZQ[..., 2:].view(num_samples, -1, 5),
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            z_scores, class_mask, obj_mask, noobj_mask, tu, tv, tZ, tQw, tQx, tQy, tQz, tcls, tconf = build_targets(
                pred_uvZQ=pred_uvZQ,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_u = 10 * self.mse_loss(u[obj_mask], tu[obj_mask])
            loss_v = 10 * self.mse_loss(v[obj_mask], tv[obj_mask])
            loss_Z = 10 * self.mse_loss(Z[obj_mask], tZ[obj_mask])
            loss_Qw = self.mse_loss(Qw[obj_mask], tQw[obj_mask])
            loss_Qx = self.mse_loss(Qx[obj_mask], tQx[obj_mask])
            loss_Qy = self.mse_loss(Qy[obj_mask], tQy[obj_mask])
            loss_Qz = self.mse_loss(Qz[obj_mask], tQz[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_u + loss_v + loss_Z + loss_Qw + loss_Qx + loss_Qy + loss_Qz + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            z5 = (z_scores < 0.5).float()
            z05 = (z_scores < 0.05).float()
            detected_mask = conf50 * class_mask * tconf
            recall5 = torch.sum(z5 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall05 = torch.sum(z05 * detected_mask) / (obj_mask.sum() + 1e-16)


            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "u": to_cpu(loss_u).item(),
                "v": to_cpu(loss_v).item(),
                "Z": to_cpu(loss_Z).item(),
                "Qw": to_cpu(loss_Qw).item(),
                "Qx": to_cpu(loss_Qx).item(),
                "Qy": to_cpu(loss_Qy).item(),
                "Qz": to_cpu(loss_Qz).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall5": to_cpu(recall5).item(),
                "recall05": to_cpu(recall05).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #17
0
    def forward(self, x, targets=None, img_dim=None):

        print("^" * 30)
        print("yolo layer input: ", x.shape)
        print("targets: ", targets.shape)
        print("img_dim: ", img_dim)

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        #输入到模型时图片的尺寸
        self.img_dim = img_dim
        num_samples = x.size(0)
        #特征图尺寸
        grid_size = x.size(2)

        # 对x的操作

        #(num_samples, 255, 13, 13)->(num_samples, 3, 80+5, 13, 13)->(num_samples, 3, 13, 13, 80+5)
        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        # last dimension column 1 = tensor[...,0]
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5:])

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        # this part is related to the bounding box. ??????????????
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(
            w.data) * self.anchor_w  # why exp? 这是yolo v3论文中的公式,用这个来做预测值
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        # print(self.stride)
        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) *
                self.stride,  # why does it(x, y, w, h) mult self.stride
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        # 对targets的操作

        if targets is None:
            return output, 0
        else:

            #此函数用于将模型输入的target转化成用于计算loss的target,应当熟悉其逻辑,这也可能是目标检测算法对标签数据处理的通用逻辑
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )
            # ********************************
            # 如果不类型转换,会报警告
            obj_mask = obj_mask.bool()
            noobj_mask = noobj_mask.bool()
            # ********************************
            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #18
0
    def forward(self, x, targets=None, img_dim=None):
        # 计算总损失 以及 预测结果outputs  targets为真实边界框  用于计算ap recall等
        # Tensors for cuda support
        #
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim  # 图片尺寸
        num_samples = x.size(0)  # (img_batch)
        grid_size = x.size(2)  # (feature_map_size)
        # x.shape = tensor([batch_size,num_anchors*(num_classes+5),grid_size,grid_size])
        # (batch_size, 255, grid_size, grid_size)
        # x就是最终输出的预测结果 255 = (80 + 4 + 1)* 3
        # 13*13*255
        prediction = (x.view(num_samples, self.num_anchors,
                             5 + self.num_classes, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())
        # print prediction.shape (batch_size, num_anchors, grid_size, grid_size, 85)

        # Get outputs
        # 这里的prediction是初步的所有预测,在grid_size*grid_size个网格中,它表示每个网格都会有num_anchor(3)个anchor框
        # x,y,w,h, pred_conf的shape都是一样的 (batch_size, num_anchor, gride_size, grid_size)
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf置信度
        pred_cls = torch.sigmoid(
            prediction[..., 5:]
        )  # Cls pred. (batch_size, num_anchor, gride_size, grid_size, cls)

        # If grid size does not match current we compute new offsets
        # print grid_size, self.grid_size
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # print self.grid_x, self.grid_y, self.anchor_w, self.anchor_h
        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        # 这里是创建一个同等shape的tensor
        # 针对每个网格的偏移量,每个网格的单位长度为1,而预测的中心点(x,y)是归一化的(0,1之间),所以可以直接相加
        # 广播机制
        pred_boxes[
            ...,
            0] = x.data + self.grid_x  # (batch_size, 1, gride_size, gride_size)
        # pred_boxes.shape = tensor.size([1,3,13,13])
        # 详细解析上一步是什么意思,首先看维度   x的维度13*13*1  什么意思  就是每个网格中都包含一个预测的x值
        #   那么距离左上角的距离就是   第一个网格左上角就是整个的左上角所以 +0  以此类推 +1 +2 +3 ...
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w  # # (1,3,1,1)
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        # anchor_w 是预先设定的anchor尺寸   w.data是预测的边界框的宽
        # 0 , 1   是指预测的中心点相对于图片左上角的偏移量
        # pred_boxes.shape = tensor.size([batch_size, num_anchors,grid_size,grid_size, 4])
        output = torch.cat(
            (
                # (batch_size, num_anchors*grid_size*grid_size, 4)
                pred_boxes.view(num_samples, -1, 4) *
                self.stride,  # 放大到最初输入的尺寸
                # (batch_size, num_anchors*grid_size*grid_size, 1)
                pred_conf.view(num_samples, -1, 1),
                # (batch_size, num_anchors*grid_size*grid_size, 80)
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )
        # output.shape = tensor.size([batch_size, num_anchors*grid_size*grid_size, 85])
        if targets is None:
            # targets 是指ground truth
            return output, 0
        # 计算loss
        else:
            # pred_boxes => (batch_size, anchor_num, gride, gride, 4)
            # pred_cls => (batch_size, anchor_num, gride, gride, 80)
            # targets => (num, 6)  6=>(batch_index, cls, center_x, center_y, widht, height)
            # scaled_anchors => (3, 2)
            # print pred_boxes.shape, pred_cls.shape, targets.shape, self.scaled_anchors.shape
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )
            #
            # iou_scores:预测框pred_boxes中的正确框与目标实体框target_boxes的交集IOU,以IOU作为分数,IOU越大,分值越高.
            # class_mask:将预测正确的标记为1(正确的预测了实体中心点所在的网格坐标,哪个anchor框可以最匹配实体,以及实体的类别)
            # obj_mask:将目标实体框所对应的anchor标记为1,目标实体框所对应的anchor与实体一一对应的
            # noobj_mask:将所有与目标实体框IOU小于某一阈值的anchor标记为1
            # tx, ty, tw, th: 需要拟合目标实体框的坐标和尺寸
            # tcls:目标实体框的所属类别
            # tconf:所有anchor的目标置信度

            # 这里计算得到的iou_scores,class_mask,obj_mask,noobj_mask,tx, ty, tw, th和tconf都是(batch, anchor_num, gride, gride)
            # 预测的x,y,w,h,pred_conf也都是(batch, anchor_num, gride, gride)

            # tcls 和 pred_cls 都是(batch, anchor_num, gride, gride,num_class)

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)

            # 坐标和尺寸的loss计算:
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            # anchor置信度的loss计算:
            loss_conf_obj = self.bce_loss(
                pred_conf[obj_mask], tconf[obj_mask])  # tconf[obj_mask] 全为1
            loss_conf_noobj = self.bce_loss(
                pred_conf[noobj_mask],
                tconf[noobj_mask])  # tconf[noobj_mask] 全为0
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            # 类别的loss计算
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

            # loss汇总
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics 指标
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf

            obj_mask = obj_mask.float()

            # print type(iou50), type(detected_mask), type(conf50.sum()), type(iou75), type(obj_mask)
            #
            # print iou50.dtype, detected_mask.dtype, conf50.sum().dtype, iou75.dtype, obj_mask.dtype
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #19
0
 def forward(self, x, targets=None):
     img_dim = x.shape[2]
     layer_outputs, yolo_outputs = {}, []
     loss = 0
     x = self.conv1(x)
     x = self.maxpool(x)
     layer_outputs["stage_0"] = x
     for (i, stage_name) in enumerate(self.stages):
         if stage_name == "yolo1_conv1":
             feature_layer = self.__getattr__(stage_name)
             x = feature_layer(layer_outputs["conv5"])
         elif stage_name == "yolo1_conv2":
             feature_layer = self.__getattr__(stage_name)
             x = feature_layer(layer_outputs["yolo1_conv1"])
         elif stage_name == "yolo1_conv3":
             feature_layer = self.__getattr__(stage_name)
             x = feature_layer(layer_outputs["yolo1_conv2"])
         elif stage_name == "yolo1_detection":
             feature_layer = self.__getattr__(stage_name)
             #print(feature_layer)
             x, layer_loss = feature_layer(layer_outputs["yolo1_conv3"],
                                           targets, img_dim)
             loss += layer_loss
             yolo_outputs.append(x)
         elif stage_name == "yolo2_route1":
             x = layer_outputs["yolo1_conv1"]
             #print("yolo2_route1:",x.shape)
         elif stage_name == "yolo2_conv1":
             feature_layer = self.__getattr__(stage_name)
             x = feature_layer(layer_outputs["yolo2_route1"])
             #print("yolo2_conv1:",x.shape)
         elif stage_name == "yolo2_upsample":
             feature_layer = self.__getattr__(stage_name)
             x = feature_layer(layer_outputs["yolo2_conv1"])
             #print("yolo2_upsample:",x.shape)
         elif stage_name == "yolo2_route2":
             input1 = layer_outputs["stage_2"]
             input2 = layer_outputs["yolo2_upsample"]
             #print("yolo2_route2:",input1.shape,input2.shape)
             x = torch.cat((input1, input2), 1)
         elif stage_name == "yolo2_conv2":
             feature_layer = self.__getattr__(stage_name)
             #print(feature_layer)
             x = feature_layer(layer_outputs["yolo2_route2"])
         elif stage_name == "yolo2_conv3":
             feature_layer = self.__getattr__(stage_name)
             #print(feature_layer)
             x = feature_layer(layer_outputs["yolo2_conv2"])
         elif stage_name == "yolo2_detection":
             feature_layer = self.__getattr__(stage_name)
             #print(feature_layer)
             x, layer_loss = feature_layer(layer_outputs["yolo2_conv3"],
                                           targets, img_dim)
             loss += layer_loss
             yolo_outputs.append(x)
         else:
             feature_layer = self.__getattr__(stage_name)
             #print(stage_name,feature_layer)
             x = feature_layer(x)
         layer_outputs[stage_name] = x
     yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
     return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #20
0
    def forward(self, pred, targets=None):
        if targets is not None:
            # yolo层的输入是特征图,即pred是特征图,维度是样本数量*((5+类别数量)*3)*13*13
            # Tensors for cuda support
            FloatTensor = torch.cuda.FloatTensor if pred.is_cuda else torch.FloatTensor
            LongTensor = torch.cuda.LongTensor if pred.is_cuda else torch.LongTensor
            num_samples = pred.size(0)  # 样本数量
            import math
            grid_size = int(math.sqrt(pred.size(2) /
                                      self.num_anchors))  # 此时网格尺寸应该是13或26或52
            self.grid_size = grid_size
            self.stride = self.img_dim / grid_size
            prediction = (
                pred.view(
                    num_samples, self.num_anchors, grid_size, grid_size,
                    self.num_classes +
                    5)  # pred的维度是n*(self.num_anchors*grid_size*grid_size)*85
                .contiguous()
                #  当调用contiguous()时,会强制拷贝一份tensor,让它的布局和从头创建的一模一样,但是两个tensor完全没有联系。
            )
            self.grid_x = FloatTensor([i for j in range(self.grid_size) for i in range(self.grid_size)]) \
                .view([1, 1, self.grid_size, self.grid_size])
            self.grid_y = FloatTensor([j for j in range(self.grid_size) for i in range(self.grid_size)]) \
                .view([1, 1, self.grid_size, self.grid_size])
            pred_boxes = prediction[..., 0:4]
            pred_x = (pred_boxes[..., 0] / self.stride - self.grid_x + 0.5) / 2
            pred_y = (pred_boxes[..., 1] / self.stride - self.grid_y + 0.5) / 2
            pred_w = pred_boxes[..., 2]
            pred_h = pred_boxes[..., 3]
            scaled_anchors = FloatTensor([
                (a_w, a_h) for a_w, a_h in self.anchors
            ])  # 对Anchor的坐标位置进行缩放
            anchor_w = scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
            anchor_h = scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
            pred_w = torch.sqrt(pred_w / anchor_w) / 2
            pred_h = torch.sqrt(pred_h / anchor_h) / 2
            pred_conf = prediction[..., 4]
            pred_cls = prediction[..., 5:]

            # 接下来的代码主要是为了进行性能评估
            scaled_anchors = FloatTensor([
                (a_w / self.stride, a_h / self.stride)
                for a_w, a_h in self.anchors
            ])  # 对Anchor的坐标位置进行缩放
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=scaled_anchors,
                ignore_thres=self.ignore_thres,
                stride=self.stride,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(pred_x[obj_mask == 1], tx[obj_mask == 1])
            loss_y = self.mse_loss(pred_y[obj_mask == 1], ty[obj_mask == 1])
            loss_w = self.mse_loss(pred_w[obj_mask == 1], tw[obj_mask == 1])
            loss_h = self.mse_loss(pred_h[obj_mask == 1], th[obj_mask == 1])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask == 1],
                                          obj_mask[obj_mask == 1])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask == 1],
                                            obj_mask[noobj_mask == 1])
            loss_conf = loss_conf_obj + 100 * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask == 1],
                                     tcls[obj_mask == 1])
            total_loss = loss_x * self.lambda_xy + loss_y * self.lambda_xy + \
                         loss_w * self.lambda_wh + loss_h * self.lambda_wh + \
                         loss_conf * self.lambda_conf + loss_cls * self.lambda_cls
            # print("loss_x: ", loss_x.detach().to("cpu").item())
            # print("loss_y: ", loss_y.detach().to("cpu").item())
            # print("loss_w: ", loss_w.detach().to("cpu").item())
            # print("loss_h: ", loss_h.detach().to("cpu").item())
            # print("loss_conf: ", loss_conf_noobj.detach().to("cpu").item())
            # print("loss_conf: ", loss_conf_obj.detach().to("cpu").item())
            # print("loss_cls: ", loss_cls.detach().to("cpu").item())

            # Metrics               # 对模型的评估
            cls_acc = 100 * class_mask[obj_mask == 1].mean()
            conf_obj = pred_conf[obj_mask == 1].mean()
            conf_noobj = pred_conf[noobj_mask == 1].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask
            precision = torch.sum(iou50 * detected_mask) / (
                conf50.sum() + 1e-16)  # 精确率
            recall50 = torch.sum(iou50 * detected_mask) / (
                obj_mask.sum() + 1e-16)  # 召回率
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return pred, total_loss
        else:
            return pred
                optimizer.step()
                # Reset gradients
                optimizer.zero_grad()

            # ############
            # Log progress
            # ############
            log_str = ""
            log_str += AsciiTable(
                [
                    ["Type", "Value"],
                    ["IoU loss", float(loss_components[0])],
                    ["Object loss", float(loss_components[1])],
                    ["Class loss", float(loss_components[2])],
                    ["Loss", float(loss_components[3])],
                    ["Batch loss", to_cpu(loss).item()],
                ]).table

            if batch_i % print_frequency == 0:
                if verbose:
                    log_and_print(log_file,
                                  f'\nEpoch: {epoch}; Batch: {batch_i}; images:{batch_i * batch_size}; {format_timelapsed(time() - start)} elapsed')
                    log_and_print(log_file, f'Learning Rate: {lr}')
                    log_and_print(log_file, log_str)

                # Tensorboard logging
                '''
                tensorboard_log = [
                    ("train/iou_loss", float(loss_components[0])),
                    ("train/obj_loss", float(loss_components[1])),
                    ("train/class_loss", float(loss_components[2])),
예제 #22
0
    def forward(self, x, targets=None, img_dim=None):

        # print('hahaha',x.shape)

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        # 输入图像大小
        self.img_dim = img_dim
        # N,C,H,W
        # 几个样本
        num_samples = x.size(0)
        # 目前样本的尺寸
        grid_size = x.size(2)

        # print('raw x shape {}'.format(x.shape))
        # print('x view shape {}'.format((num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)))

        '''
            reshape一下,
            [num_samples,num_anchors,grid_size,grid_size,num_class+5]
        '''
        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        '''
            这个...表示取最里面那个num_class+5这个维度的
            x,y是bbox相对于当前cell的偏移量
            w,h是bbox的w,h相对于anchors(在当前feature_map下)的log值
        '''
        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # print('heihei',pred_cls.shape)

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # print(self.grid_x)
        # print(self.grid_y)


        '''
            将tx,ty,tw,th恢复成bbox的坐标
        '''

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h


        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )


        if targets is None:
            return output, 0
        else:
            '''
                这个targets,是一个【n,6】的张量
                [第几张图,0,cx,cy,dw,dh]

                obj_mask包含的是和anchors的IOU最大的一批数据
                noobj_mask包含的是除去IOU超过阈值的一批数据
            '''
            import time

            # print(pred_boxes.shape)
            # print(pred_cls.shape)
            # print(targets.shape)
            #
            # print('stop here')
            # time.sleep(1000)
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            obj_mask = obj_mask.bool()  # convert int8 to bool
            noobj_mask = noobj_mask.bool()  # convert int8 to bool



            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            '''
                loss由三部分组成:
                    1、(有物体在的cell && 被选中的anchors)对应的tx,ty,tw,th误差
                    2、(有物体在的cell && 被选中的anchors)对应的前背景分类误差
                    3、(没物体在的cell && 被选中的anchors)对应的前背景分类误差
                    4、(有物体在的cell && 被选中的anchors)对应的类别分类误差
            '''
            # 第一部分
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # 第二部分
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            # 第三部分
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            # 按照不同比例组合
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

            # 第四部分
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
    def forward(self, x, targets=None, img_dim=None):

        # Tensores para soporte cuda
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # obtener salidas
        x = torch.sigmoid(prediction[..., 0])  # centro de x
        y = torch.sigmoid(prediction[..., 1])  # centro de y
        w = prediction[..., 2]  # ancho
        h = prediction[..., 3]  # largo
        pred_conf = torch.sigmoid(prediction[..., 4])  # configuracion
        pred_cls = torch.sigmoid(prediction[..., 5:])  # predicciones

        # Si el tamaño de la cuadrícula no coincide con el actual, calculamos nuevas compensaciones
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Agregue desplazamiento y escala con anclajes
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Pérdida: enmascara las salidas para ignorar objetos no existentes (excepto con pérdida de configuración)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # metricas
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #24
0
    def forward(self, x, targets=None, img_dim=None):
        # x.shape: b x 255 x 13 x 13 (anchor 6, 7, 8)

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)     # batch size
        grid_size = x.size(2)       # feature map size: 13, 26, 52  # initially, self.grid_size = 0

        prediction = (
            #       b, 3, 85, 13, 13
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            #       b, 3, 13, 13, 85
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        # the x,y,w,h corresponds to the pink circle in slides (generated directly from network)
        x = torch.sigmoid(prediction[..., 0])  # Center x   # (b,3,13,13)            # 1 +
        y = torch.sigmoid(prediction[..., 1])  # Center y   # (b,3,13,13)            # 1 +
        w = prediction[..., 2]  # Width                     # (b,3,13,13)            # 1 +
        h = prediction[..., 3]  # Height                    # (b,3,13,13)            # 1 +
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf (b,3,13,13)            # 1 + = 5 +
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred. (b,3,13,13,80)    # 80 = 85

        # Initially, self.grid_size = 0 != 13, then 13 != 26, then 26 != 52
        # Each time, if former grid size does not match current one, we need to compute new offsets
        # 作用:
        # 1. 针对不同size的feature map (13x13, 26x26, 52x52), 求出不同grid的左上角坐标
        # 2. 将(0, 416)范围的anchor scale到(0, 13)的范围
        #
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
        # self.grid_x:                             # self.grid_y:
        #       tensor([[[[0,1,2,...,12],          #       tensor([[[[0,0,0,...,0],
        #                 [0,1,2,...,12],          #                 [1,1,1,...,1],
        #                 ...                      #                 ...
        #                 [0,1,2,...,12]]]])       #                 [12,12,12,...,12]]]])
        #       shape=torch.Size([1, 1, 13, 13])   #       shape=torch.Size([1, 1, 13, 13])
        #                                          #
        # self.anchor_w: shape([1, 3, 1, 1])       # self.anchor_h: shape([1, 3, 1, 1])
        # tensor([                                 # tensor([
        #         [                                #         [
        #           [[3.625]],                     #           [[2.8125]],
        #           [[4.8750]],                    #           [[6.1875]],
        #           [[11.6562]]                    #           [[10.1875]]
        #         ]                                #         ]
        #        ])                                #        ])

        # Add offset and scale with anchors
        # 请回想/对照slides中的等式,是目前绝大部分靠回归offset的方法通行的策略
        # x, y, w, h即上文中prediction, 此部分是直接由网络predict出来的, xy经过sigmoid强制到(0,1)
        # grid_xy是grid的左上角坐标[0,1,...,12],
        # 所以xy+grid_xy就是将pred结果(即物体中心点)分布到每个grid中去,(0, 13)
        #
        # 对于wh,由于prediction的结果直接是log()后的(如果忘记,请回看slides),所以此处要exp
        #
        # 此时,所有pred_boxes都是(0,13)范围的
        # These preds are final outpus for test/inference which corresponds to the blue circle in slides
        # This procedure could also be called as Decode
        #
        # 通常情况下,单纯的preds并不参与loss的计算,而只是作为最终的输出存在,
        # 但是这里依然计算,并在build_targets函数中出现,其目的,在于协助产生mask
        pred_boxes = FloatTensor(prediction[..., :4].shape)     # (b, 3, 13, 13, 4)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (   # * stride(=32对于13x13),目的是将(0, 13)的bbox恢复到(0, 416)
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            # iou_scores: [b, num_anchor, grid_size, grid_size] -> pred_boxes与ground_truth的IoU
            # class_mask: [b, num_anchor, grid_size, grid_size], 预测正确的class 为true
            # obj_mask : [b, num_anchor, grid_size, grid_size] -> 1: 一定是正样本落在的地方(b_id, anchor_id, i, j)
            #                                                  -> 0: 一定不是正样本落在的地方
            # noobj_mask:  [b, num_anchor, grid_size, grid_size] -> 1: 一定是负样本落在的地方
            #                                                    -> 0: 不一定是正样本落在的地方,也可能是不参与计算
            #                                                          体现了ignore_thres的价值。>ignore的,都不参与计算
            # 底下是,算出来的,要参与产生loss的真实target.(除了tcls)
            # The procedure to generate those t·, corresponding to the gray circle in slides, can be called as Encode
            # tx: [b, num_anchor, grid_size, grid_size]
            # ty: [b, num_anchor, grid_size, grid_size]
            # tw: [b, num_anchor, grid_size, grid_size]
            # th: [b, num_anchor, grid_size, grid_size]
            # tcls :[b, num_anchor, grid_size, grid_size, n_classes]
            #
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,              # (b, 3, 13, 13, 4)
                pred_cls=pred_cls,                  # (b, 3, 13, 13, 80)
                target=targets,                     # (n_boxes, 6) [details in build_targets function]
                anchors=self.scaled_anchors,        # (3, 2) 3个anchor,每个2维
                ignore_thres=self.ignore_thres,     # 0.5 (hard code in YOLOLayer self.init())
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            # 可以看到,真正参与loss计算的,仍然是·与t·,即offset regress
            # Reg Loss
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # Conf Loss
            # 因为这里conf选择的是bce_loss,因为对于noobj,基本都能预测对,所以loss_conf_noobj通常比较小
            # 所以此时为了平衡,noobj_scale往往大于obj_scale, (100, 1)
            # 实际上,这里的conf loss就是做了个0-1分类,0就是noobj, 1就是obj
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

            # Class Loss
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

            # Total Loss
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()     # class_mask/obj_mask(b, 3, 13, 13) # 正确率
            conf_obj = pred_conf[obj_mask].mean()           # 有物体的平均置信度
            conf_noobj = pred_conf[noobj_mask].mean()       # 无物体的平均置信度
            conf50 = (pred_conf > 0.5).float()              # 置信度大于0.5的位置 (b, num_anchor, 13, 13)
            iou50 = (iou_scores > 0.5).float()              # iou大于0.5的位置 (b, num_anchor, 13, 13)
            iou75 = (iou_scores > 0.75).float()             # iou大于0.75的位置 (b, num_anchor, 13, 13)
            detected_mask = conf50 * class_mask * tconf     # tconf=obj_mask, 即:既是预测的置信度>0.5,又class也对,又是obj
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #25
0
def yolo_loss(x, y, w, h, xdir, ydir, pred_boxes, pred_conf, pred_cls, targets,
              scaled_anchors, ignore_thres, clf_criterion, reg_criterion,
              obj_scale, noobj_scale, regr_weights, grid_size1):
    iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, txdir, tydir, tcls, tconf = build_targets(
        pred_boxes=pred_boxes,
        pred_cls=pred_cls,
        target=targets,
        anchors=scaled_anchors,
        ignore_thres=ignore_thres,
    )
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
    loss_x = reg_criterion(x[obj_mask], tx[obj_mask])
    loss_y = reg_criterion(y[obj_mask], ty[obj_mask])
    loss_w = reg_criterion(w[obj_mask], tw[obj_mask])
    loss_h = reg_criterion(h[obj_mask], th[obj_mask])
    # Loss: rotations
    loss_xdir = reg_criterion(xdir[obj_mask], txdir[obj_mask])
    loss_ydir = reg_criterion(ydir[obj_mask], tydir[obj_mask])
    weights = (noobj_scale, obj_scale)
    loss_conf = focal_loss(pred_conf, tconf, weights)
    #print(obj_scale, '*', loss_conf_obj, '+', noobj_scale, '*', loss_conf_noobj )
    if pred_cls is not None:
        loss_cls = clf_criterion(pred_cls[obj_mask], tcls[obj_mask])
    else:
        loss_cls = torch.tensor(0, device=device)

    total_loss = regr_weights * (loss_x + loss_y + loss_w + loss_h + loss_xdir + loss_ydir) +\
                 loss_conf + loss_cls

    # Metrics
    if loss_cls == 0:
        cls_acc = torch.tensor(0, device=device)
    else:
        cls_acc = 100 * class_mask[obj_mask].mean()
    conf_obj = pred_conf[obj_mask].mean()
    conf_noobj = pred_conf[noobj_mask].mean()
    conf50 = (pred_conf > 0.5).float()
    iou50 = (iou_scores > 0.5).float()
    iou75 = (iou_scores > 0.75).float()
    detected_mask = conf50 * class_mask * tconf
    precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
    recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
    recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

    metrics = {
        "loss": to_cpu(total_loss).item(),
        "x": to_cpu(loss_x).item(),
        "y": to_cpu(loss_y).item(),
        "w": to_cpu(loss_w).item(),
        "h": to_cpu(loss_h).item(),
        "conf": to_cpu(loss_conf).item(),
        "cls": to_cpu(loss_cls).item(),
        "cls_acc": to_cpu(cls_acc).item(),
        "recall50": to_cpu(recall50).item(),
        "recall75": to_cpu(recall75).item(),
        "precision": to_cpu(precision).item(),
        "conf_obj": to_cpu(conf_obj).item(),
        "conf_noobj": to_cpu(conf_noobj).item(),
        "grid_size": grid_size1,
        'rotation': to_cpu(loss_xdir + loss_ydir).item()
    }
    return total_loss, metrics
예제 #26
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + self.num_angles + 5, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        #a = torch.remainder((prediction[..., 4]*180/np.pi) + 180, 180)*np.pi/180  # Angle
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf 
        pred_cls = torch.sigmoid(prediction[..., 5:5+self.num_classes])  # Cls pred.
        pred_angle_cls = torch.sigmoid(prediction[..., 5+self.num_classes: ])  # Angle Cls pred


        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        #pred_boxes[..., 4] = a.data
        #print("Theta predictions: ", pred_boxes.view(num_samples, -1, 4).size(),pred_boxes[...,4].size(),pred_conf.size())
        
        #print("Target SIZE: ",targets.size())
        #print("pred boxes: ",pred_boxes[...,:4])
        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                #pred_boxes[...,4].view(num_samples,-1,1),
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
                pred_angle_cls.view(num_samples, -1, self.num_angles)
            ),
            -1,
        )
        #print(pred_boxes, targets)

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask,angle_mask, obj_mask, noobj_mask, tx, ty, tw, th,tacls, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                pred_angle_cls = pred_angle_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            weights = torch.tensor([1.12424274,13.3361754, 75.7716263, 50.10983982, 61.6845070, 71.0974026,  73.73063973, 22.52880658 ,
                                   8.14052045, 5.87707998, 25.49243306,  10.36837121,  26.4468599,   77.92882562, 100.44954128,
                                   82.9469697, 35.20578778, 8.97826978, 1.]).type(FloatTensor)
    
            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            loss_conf_angle = nn.BCELoss(reduction='none')(pred_angle_cls[obj_mask],tacls[obj_mask])
            loss_conf_angle = loss_conf_angle*weights/100
            loss_conf_angle = loss_conf_angle.mean()
            #loss_conf_angle = self.bce_loss(pred_angle_cls[obj_mask],tacls[obj_mask])
 
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls + loss_conf_angle

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            angle_acc = 100 * angle_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "angle_acc": to_cpu(angle_acc).item(),
                "angle":to_cpu(loss_conf_angle).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
    def forward(self, x, targets=None, img_dim=None):
        # print('yolo input shape {}'.format(x.shape))
        # [8, 255, 13, 13]
        # [8, 255, 26, 26]
        # [8, 255, 52, 52]
        # 255 = n_anchors*(5+n_classes) = 3*85
        '''
        anchors = [(116, 90), (156, 198), (373, 326)]
        num_classes = 80
        yolo_layer = YOLOLayer(anchors, num_classes)
        grid_size = 13
        yolo_layer.compute_grid_offsets(grid_size)
        x = torch.rand([8, 255, grid_size, grid_size]).cuda()
        yolo_layer.forward(x, targets=targets)
        num_samples=8
        self = yolo_layer
        '''

        # Tensors for cuda support, fixme
        # import pdb; pdb.set_trace()
        device_id = x.device.index
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        BoolTensor = torch.cuda.BoolTensor if x.is_cuda else torch.BoolTensor

        self.img_dim = img_dim
        num_samples = x.size(0)  # 8
        grid_size = x.size(2)  # # 13

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous()
                      )  # bs, 3, 85, 13, 13

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size,
                                      cuda=x.is_cuda,
                                      device_id=device_id)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape, device=device_id)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )
        # [bs, num_bb_by_each_grid_cell*grid_cell*grid_cell, num_classes]

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            # self.register_buffer('metrics', None) # fixme

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss, self.metrics
예제 #28
0
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None,
                     valid_loader=None, state={},
                     dir_model="model", result_path="res", recompute=True):
    criterion_bce = nn.BCELoss()
    classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce)
    print(classif_model)

    early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup",
                                        init_patience=cfg.first_early_wait)
    save_best_call = SaveBest(val_comp="sup")

    # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr,
    #                               verbose=True)
    print(optimizer_classif)

    save_results = pd.DataFrame()

    create_folder(dir_model)
    if cfg.save_best:
        model_path_sup1 = os.path.join(dir_model, "best_model")
    else:
        model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier))
    print("path of model : " + model_path_sup1)

    state['many_hot_encoder'] = many_hot_encoder.state_dict()

    if not os.path.exists(model_path_sup1) or recompute:
        for epoch_ in range(cfg.n_epoch_classifier):
            print(classif_model.training)
            start = time.time()
            loss_mean_bce = []
            for i, samples in enumerate(train_loader):
                inputs, pred_labels = samples
                if i == 0:
                    LOG.debug("classif input shape: {}".format(inputs.shape))

                # zero the parameter gradients
                optimizer_classif.zero_grad()
                inputs = to_cuda_if_available(inputs)

                # forward + backward + optimize
                weak_out = classif_model(inputs)
                weak_out = to_cpu(weak_out)
                # print(output)
                loss_bce = criterion_bce(weak_out, pred_labels)
                loss_mean_bce.append(loss_bce.item())
                loss_bce.backward()
                optimizer_classif.step()

            loss_mean_bce = np.mean(loss_mean_bce)
            classif_model.eval()
            n_class = len(many_hot_encoder.labels)
            macro_f_measure_train = get_f_measure_by_class(classif_model, n_class,
                                                           train_loader)
            if valid_loader is not None:
                macro_f_measure = get_f_measure_by_class(classif_model, n_class,
                                                         valid_loader)
                mean_macro_f_measure = np.mean(macro_f_measure)
            else:
                mean_macro_f_measure = -1
            classif_model.train()
            print("Time to train an epoch: {}".format(time.time() - start))
            # print statistics
            print('[%d / %d, %5d] loss: %.3f' %
                  (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce))

            results = {"train_loss": loss_mean_bce,
                       "macro_measure_train": np.mean(macro_f_measure_train),
                       "class_macro_train": np.array_str(macro_f_measure_train, precision=2),
                       "macro_measure_valid": mean_macro_f_measure,
                       "class_macro_valid": np.array_str(macro_f_measure, precision=2),
                       }
            for key in results:
                LOG.info("\t\t ---->  {} : {}".format(key, results[key]))

            save_results = save_results.append(results, ignore_index=True)
            # scheduler.step(mean_macro_f_measure)

            # ##########
            # # Callbacks
            # ##########
            state['epoch'] = epoch_ + 1
            state["model"]["state_dict"] = classif_model.state_dict()
            state["optimizer"]["state_dict"] = optimizer_classif.state_dict()
            state["loss"] = loss_mean_bce
            state.update(results)

            if cfg.early_stopping is not None:
                if early_stopping_call.apply(mean_macro_f_measure):
                    print("EARLY STOPPING")
                    break

            if cfg.save_best and save_best_call.apply(mean_macro_f_measure):
                save_model(state, model_path_sup1)

        if cfg.save_best:
            LOG.info(
                "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val))
            LOG.info("loading model from: {}".format(model_path_sup1))
            classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True)
        else:
            model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier))
            save_model(state, model_path_sup1)
        LOG.debug("model path: {}".format(model_path_sup1))
        LOG.debug('Finished Training')
    else:
        classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True)
    LOG.info("#### End classif")
    save_results.to_csv(result_path, sep="\t", header=True, index=False)

    return classif_model, state
예제 #29
0
    def forward(self, x, targets=None, img_dim=None, cls=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        # Softmax instead of sigmoid, since only one class will be present
        pred_cls = prediction[..., 5:]  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        # Weight the grid-wise predictions acc. to the object confidence
        weighted_class_scores = pred_conf.unsqueeze(dim=-1) * pred_cls
        weighted_class_scores = weighted_class_scores.sum(dim=(1, 2, 3))

        if targets is None:
            return output, weighted_class_scores, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Calculate these only if obj_mask is non-empty
            if obj_mask.sum() > 0:
                # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
                loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
                loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
                loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
                loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
                loss_conf_obj = self.bce_loss(pred_conf[obj_mask],
                                              tconf[obj_mask])
                loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                                tconf[noobj_mask])
                loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
                loss_cls = self.ce_loss(
                    pred_cls[obj_mask].view(-1, self.num_classes),
                    tcls[obj_mask].long().view(-1))
                detection_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            else:
                detection_loss = 0.

            # Classification loss
            classification_loss = self.ce_loss(weighted_class_scores, cls)

            total_loss = detection_loss + classification_loss

            # Calculate these only if obj_mask is non-empty
            if obj_mask.sum() > 0:
                # Metrics
                cls_acc = 100 * class_mask[obj_mask].mean()
                conf_obj = pred_conf[obj_mask].mean()
                conf_noobj = pred_conf[noobj_mask].mean()
                conf50 = (pred_conf > 0.5).float()
                iou50 = (iou_scores > 0.5).float()
                iou75 = (iou_scores > 0.75).float()
                detected_mask = conf50 * class_mask * tconf
                precision = torch.sum(
                    iou50 * detected_mask) / (conf50.sum() + 1e-16)
                recall50 = torch.sum(
                    iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
                recall75 = torch.sum(
                    iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

                self.metrics = {
                    "loss": to_cpu(total_loss).item(),
                    "x": to_cpu(loss_x).item(),
                    "y": to_cpu(loss_y).item(),
                    "w": to_cpu(loss_w).item(),
                    "h": to_cpu(loss_h).item(),
                    "conf": to_cpu(loss_conf).item(),
                    "cls": to_cpu(loss_cls).item(),
                    "cls_acc": to_cpu(cls_acc).item(),
                    "recall50": to_cpu(recall50).item(),
                    "recall75": to_cpu(recall75).item(),
                    "precision": to_cpu(precision).item(),
                    "conf_obj": to_cpu(conf_obj).item(),
                    "conf_noobj": to_cpu(conf_noobj).item(),
                    "grid_size": grid_size,
                }
            _classification_loss = classification_loss.clone()
            self.metrics['classification_loss'] = to_cpu(
                _classification_loss).item()
            _weighted_class_scores = weighted_class_scores.clone()
            self.metrics['batch_acc'] = to_cpu(
                torch.sum(torch.argmax(_weighted_class_scores, dim=-1) ==
                          cls)).item() / len(cls)

            return output, weighted_class_scores, total_loss
예제 #30
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        im = prediction[..., 4]  # angle imaginary part
        re = prediction[..., 5]  # angle real part
        pred_conf = torch.sigmoid(prediction[..., 6])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 7:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :6].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        pred_boxes[..., 4] = im
        pred_boxes[..., 5] = re

        output = torch.cat(
            (
                #pred_boxes.view(num_samples, -1, 6) * self.stride,
                pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride,
                pred_boxes[..., 4:].view(num_samples, -1, 2),
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            # Kevin: Adding this try catch to make sure when ious is empty in
            # build_targets (look at utils/utils.py), this function knows how to
            # handle and return (output, 0) instead.
            try:
                iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = build_targets(
                    pred_boxes=pred_boxes,
                    pred_cls=pred_cls,
                    target=targets,
                    anchors=self.scaled_anchors,
                    ignore_thres=self.ignore_thres,
                )

                # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
                loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
                loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
                loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
                loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
                loss_im = self.mse_loss(im[obj_mask], tim[obj_mask])
                loss_re = self.mse_loss(re[obj_mask], tre[obj_mask])
                loss_eular = loss_im + loss_re
                loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
                loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
                loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
                loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
                total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_conf + loss_cls

                # Metrics
                cls_acc = 100 * class_mask[obj_mask].mean()
                conf_obj = pred_conf[obj_mask].mean()
                conf_noobj = pred_conf[noobj_mask].mean()
                conf50 = (pred_conf > 0.5).float()
                iou50 = (iou_scores > 0.5).float()
                iou75 = (iou_scores > 0.75).float()
                detected_mask = conf50 * class_mask * tconf
                precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
                recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
                recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

                self.metrics = {
                    "loss": to_cpu(total_loss).item(),
                    "x": to_cpu(loss_x).item(),
                    "y": to_cpu(loss_y).item(),
                    "w": to_cpu(loss_w).item(),
                    "h": to_cpu(loss_h).item(),
                    "im": to_cpu(loss_im).item(),
                    "re": to_cpu(loss_re).item(),
                    "conf": to_cpu(loss_conf).item(),
                    "cls": to_cpu(loss_cls).item(),
                    "cls_acc": to_cpu(cls_acc).item(),
                    "recall50": to_cpu(recall50).item(),
                    "recall75": to_cpu(recall75).item(),
                    "precision": to_cpu(precision).item(),
                    "conf_obj": to_cpu(conf_obj).item(),
                    "conf_noobj": to_cpu(conf_noobj).item(),
                    "grid_size": grid_size,
                }

                return output, total_loss
            except RuntimeError as err:
                print(err)
                return output, 0