Пример #1
0
    def video_prediction_function(frame_array,
                                  iou_thres_nms=0.45,
                                  conf_thres_nms=0.001):
        n_frames = len(frame_array)
        preds = []
        for i in range(0, n_frames, batch_size):
            frames = []
            for frame in frame_array[i:min(i + batch_size, n_frames)]:
                from utils.datasets import letterbox

                img = letterbox(frame, new_shape=(imgsz, imgsz))[0]
                img = np.array([img, img, img])
                img = np.ascontiguousarray(img)
                frames.append(img)
            frames = np.array(frames)

            # Convert img to torch
            img = torch.from_numpy(frames).to(device)
            img = (img.half() if device.type != "cpu" else img.float()
                   )  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            # Inference
            pred = model(img, augment=False)[0]
            # Apply NMS
            pred = non_max_suppression(pred,
                                       iou_thres=iou_thres_nms,
                                       conf_thres=conf_thres_nms)
            _ = []

            for j, det in enumerate(pred):
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          frame_array[i + j].shape)
                det = det.cpu().numpy()
                det[:, :4] = xyxy2xywhn(
                    det[:, :4],
                    w=frame_array[i + j].shape[1],
                    h=frame_array[i + j].shape[0],
                )
                _.append(det)
            preds += _
        return preds
Пример #2
0
    def __getitem__(self, index):
        index = self.indices[index]  # linear, shuffled, or image_weights

        hyp = self.hyp
        mosaic = self.mosaic and random.random() < hyp['mosaic']
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

            # MixUp augmentation
            if random.random() < hyp['mixup']:
                img, labels = mixup(
                    img, labels,
                    *load_mosaic(self, random.randint(0, self.n - 1)))

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            labels = self.labels[index].copy()
            if labels.size:  # normalized xywh to pixel xyxy format
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:],
                                           ratio[0] * w,
                                           ratio[1] * h,
                                           padw=pad[0],
                                           padh=pad[1])

            if self.augment:
                img, labels = random_perspective(
                    img,
                    labels,
                    degrees=hyp['degrees'],
                    translate=hyp['translate'],
                    scale=hyp['scale'],
                    shear=hyp['shear'],
                    perspective=hyp['perspective'])

        nl = len(labels)  # number of labels
        if nl:
            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5],
                                        w=img.shape[1],
                                        h=img.shape[0],
                                        clip=True,
                                        eps=1E-3)

        if self.augment:
            # Albumentations
            img, labels = self.albumentations(img, labels)

            # HSV color-space
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Flip up-down
            if random.random() < hyp['flipud']:
                img = np.flipud(img)
                if nl:
                    labels[:, 2] = 1 - labels[:, 2]

            # Flip left-right
            if random.random() < hyp['fliplr']:
                img = np.fliplr(img)
                if nl:
                    labels[:, 1] = 1 - labels[:, 1]

            # Cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        labels_out = torch.zeros((nl, 6))
        if nl:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
Пример #3
0
    def __getitem__(self, index):
        index = self.indices[index]  # linear, shuffled, or image_weights

        hyp = self.hyp
        mosaic = self.mosaic and random.random() < hyp['mosaic']
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

            # MixUp https://arxiv.org/pdf/1710.09412.pdf
            if random.random() < hyp['mixup']:
                img2, labels2 = load_mosaic(self, random.randint(0, self.n - 1))
                r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
                img = (img * r + img2 * (1 - r)).astype(np.uint8)
                labels = np.concatenate((labels, labels2), 0)

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling

            labels = self.labels[index].copy()
            if labels.size:  # normalized xywh to pixel xyxy format
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])

        if self.augment:
            # Augment imagespace
            if not mosaic:
                img, labels = random_perspective(img, labels,
                                                 degrees=hyp['degrees'],
                                                 translate=hyp['translate'],
                                                 scale=hyp['scale'],
                                                 shear=hyp['shear'],
                                                 perspective=hyp['perspective'])

            # Augment colorspace
            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0])  # xyxy to xywh normalized

        if self.augment:
            # flip up-down
            if random.random() < hyp['flipud']:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

            # flip left-right
            if random.random() < hyp['fliplr']:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3 x img_height x img_width
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes