def draw_bounding_boxes(self, image, detections):
     if detections is None:
         return image
     if len(detections) == 0:
         return image
     detections = DetectionUtils.rescale_boxes(detections, self.image_size,
                                               image.shape[:2])
     for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
         try:
             color = self.colors[int(cls_pred)]
             text = self.classes[int(cls_pred)] + ' ' + str(
                 int(cls_conf * 100)) + '%'
             width, height = cv2.getTextSize(text,
                                             self.font,
                                             self.font_scale,
                                             thickness=1)[0]
             image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
             image = cv2.rectangle(image, (x1, y1),
                                   (x1 + width + 5, y1 - height - 10),
                                   color, cv2.FILLED)
             image = cv2.putText(image, text, (x1, y1 - height / 2),
                                 self.font, self.font_scale,
                                 (255, 255, 255), 1)
         except:
             pass
     return image
 def detect(self, image):
     Tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
     ) else torch.FloatTensor
     image_tensor = ImageUtils.get_image_tensor(
         image, self.image_size).unsqueeze(0)
     image_tensor = Variable(image_tensor.type(Tensor))
     self.model.eval()
     with torch.no_grad():
         detections = self.model(image_tensor)
         detections = DetectionUtils.non_max_suppression(
             detections, self.confidence_threshold, self.nms_threshold)
         detections = detections[0]  # only for 1 image
     return detections
 def __init__(self,
              root_path="./",
              confidence_threshold=0.8,
              nms_threshold=0.4,
              image_size=416):
     self.image_folder = root_path + 'assets/images/'
     self.model_cfg = root_path + 'config/yolov3.cfg'
     self.weights_path = root_path + 'config/yolov3.weights'
     self.class_path = root_path + 'config/voc.names'
     self.confidence_threshold = confidence_threshold
     self.nms_threshold = nms_threshold
     self.image_size = image_size
     self.font_scale = 1
     self.font = cv2.FONT_HERSHEY_PLAIN
     self.classes = DetectionUtils.load_classes(self.class_path)
     self.colors = [(randint(0, 255), randint(0, 255), randint(0, 255))
                    for _ in range(len(self.classes) + 1)]
     self.model = self.__load_model()
 def __init__(self,
              confidence_threshold=0.8,
              nms_threshold=0.4,
              image_size=416,
              tiny=False):
     self.model_cfg = Constants.ROOT_PATH + 'config/yolov3.cfg'
     self.weights_path = Constants.ROOT_PATH + 'config/yolov3.weights'
     if tiny:
         self.model_cfg = Constants.ROOT_PATH + 'config/yolov3-tiny.cfg'
         self.weights_path = Constants.ROOT_PATH + 'config/yolov3-tiny.weights'
     self.class_path = Constants.ROOT_PATH + 'config/coco.names'
     self.confidence_threshold = confidence_threshold
     self.nms_threshold = nms_threshold
     self.image_size = image_size
     self.font_scale = 1
     self.font = cv2.FONT_HERSHEY_PLAIN
     self.classes = DetectionUtils.load_classes(self.class_path)
     # self.colors = [(randint(0, 255), randint(0, 255), randint(0, 255)) for _ in range(len(self.classes))]
     self.colors = [(0, 175, 0) for _ in range(len(self.classes))]
     self.model = self.__load_model()
예제 #5
0
 def forward(self, x, targets=None):
     img_dim = x.shape[2]
     loss = 0
     layer_outputs, yolo_outputs = [], []
     for i, (module_def,
             module) in enumerate(zip(self.module_defs, self.module_list)):
         if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
             x = module(x)
         elif module_def["type"] == "route":
             x = torch.cat([
                 layer_outputs[int(layer_i)]
                 for layer_i in module_def["layers"].split(",")
             ], 1)
         elif module_def["type"] == "shortcut":
             layer_i = int(module_def["from"])
             x = layer_outputs[-1] + layer_outputs[layer_i]
         elif module_def["type"] == "yolo":
             x, layer_loss = module[0](x, targets, img_dim)
             loss += layer_loss
             yolo_outputs.append(x)
         layer_outputs.append(x)
     yolo_outputs = DetectionUtils.to_cpu(torch.cat(yolo_outputs, 1))
     return yolo_outputs if targets is None else (loss, yolo_outputs)
예제 #6
0
    def forward(self, x, targets=None, img_dim=None):
        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = DetectionUtils.build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": DetectionUtils.to_cpu(total_loss).item(),
                "x": DetectionUtils.to_cpu(loss_x).item(),
                "y": DetectionUtils.to_cpu(loss_y).item(),
                "w": DetectionUtils.to_cpu(loss_w).item(),
                "h": DetectionUtils.to_cpu(loss_h).item(),
                "conf": DetectionUtils.to_cpu(loss_conf).item(),
                "cls": DetectionUtils.to_cpu(loss_cls).item(),
                "cls_acc": DetectionUtils.to_cpu(cls_acc).item(),
                "recall50": DetectionUtils.to_cpu(recall50).item(),
                "recall75": DetectionUtils.to_cpu(recall75).item(),
                "precision": DetectionUtils.to_cpu(precision).item(),
                "conf_obj": DetectionUtils.to_cpu(conf_obj).item(),
                "conf_noobj": DetectionUtils.to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss