Python letterbox 예제들, yolov5.utils.augmentations.letterbox Python 예제들

예제 #1

0

파일 보기

    def __next__(self):
        self.count += 1
        if not all(x.is_alive() for x in self.threads) or cv2.waitKey(
                1) == ord('q'):  # q to quit
            cv2.destroyAllWindows()
            raise StopIteration

        # Letterbox
        img0 = self.imgs.copy()
        img = [
            letterbox(x,
                      self.img_size,
                      stride=self.stride,
                      auto=self.rect and self.auto)[0] for x in img0
        ]

        # Stack
        img = np.stack(img, 0)

        # Convert
        img = img[..., ::-1].transpose(
            (0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW
        img = np.ascontiguousarray(img)

        return self.sources, img, img0, None, ''

예제 #2

0

파일 보기

파일: yolo_detector.py 프로젝트: RI-SE/smirk

    def detect_pedestrians(self, camera_frame: np.ndarray) -> List[BoundingBox]:
        # Pre-process
        im = letterbox(camera_frame, self.IMG_SIZE, stride=self.model.stride)[0]
        im = np.ascontiguousarray(im.transpose(2, 0, 1))

        im = torch.from_numpy(np.expand_dims(im, 0)).to(self.device)
        im = im.half()
        im /= 255

        # Inference
        pred = self.model(im)

        # NMS
        pred = non_max_suppression(
            pred, conf_thres=self.conf, iou_thres=self.NMS_IOU_THRESHOLD
        )[0]

        # Scale predictions to original image coordinates
        pred_boxes = (
            scale_coords(im.shape[2:], pred[:, :4], camera_frame.shape)
            .round()
            .cpu()
            .numpy()
            .astype(int)
        )

        return [self.detection_to_bounding_box(det) for det in pred_boxes]

예제 #3

0

파일 보기

파일: eval_outlier.py 프로젝트: RI-SE/smirk

def extract_box(row):
    pred = row.predictions[0].cpu().numpy()
    im_box = np.array(Image.open(row.img_path).crop(pred[:4]))
    im_box_letter, *_ = letterbox(im_box, (160, 64),
                                  auto=False,
                                  scaleFill=True)

    return (im_box_letter / 255).astype(np.float32)

예제 #4

0

파일 보기

    def is_accepted(self, camera_frame: np.ndarray,
                    predicted_box_crop: np.ndarray, distance: float) -> bool:
        if distance <= 10:
            return True
        resized_box, *_ = letterbox(predicted_box_crop,
                                    self.IMG_SIZE,
                                    auto=False,
                                    scaleFill=True)
        pred: Dict[Any, Any] = self.model.predict(
            np.expand_dims(resized_box / 255, 0).astype(np.float32),
            outlier_type="instance",
            return_feature_score=False,
            return_instance_score=False,
        )

        is_outlier = bool(pred["data"]["is_outlier"][0])

        return not is_outlier

예제 #5

0

파일 보기

    def __next__(self):
        if self.count == self.nf:
            raise StopIteration
        path = self.files[self.count]

        if self.video_flag[self.count]:
            # Read video
            self.mode = 'video'
            ret_val, img0 = self.cap.read()
            if not ret_val:
                self.count += 1
                self.cap.release()
                if self.count == self.nf:  # last video
                    raise StopIteration
                else:
                    path = self.files[self.count]
                    self.new_video(path)
                    ret_val, img0 = self.cap.read()

            self.frame += 1
            s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '

        else:
            # Read image
            self.count += 1
            img0 = cv2.imread(path)  # BGR
            assert img0 is not None, f'Image Not Found {path}'
            s = f'image {self.count}/{self.nf} {path}: '

        # Padded resize
        img = letterbox(img0,
                        self.img_size,
                        stride=self.stride,
                        auto=self.auto)[0]

        # Convert
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)

        return path, img, img0, self.cap, s

예제 #6

0

파일 보기

    def __next__(self):
        self.count += 1
        if cv2.waitKey(1) == ord('q'):  # q to quit
            self.cap.release()
            cv2.destroyAllWindows()
            raise StopIteration

        # Read frame
        ret_val, img0 = self.cap.read()
        img0 = cv2.flip(img0, 1)  # flip left-right

        # Print
        assert ret_val, f'Camera Error {self.pipe}'
        img_path = 'webcam.jpg'
        s = f'webcam {self.count}: '

        # Padded resize
        img = letterbox(img0, self.img_size, stride=self.stride)[0]

        # Convert
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)

        return img_path, img, img0, None, s

예제 #7

0

파일 보기

    def __getitem__(self, index):
        index = self.indices[index]  # linear, shuffled, or image_weights

        hyp = self.hyp
        mosaic = self.mosaic and random.random() < hyp['mosaic']
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

            # MixUp augmentation
            if random.random() < hyp['mixup']:
                img, labels = mixup(
                    img, labels,
                    *load_mosaic(self, random.randint(0, self.n - 1)))

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            labels = self.labels[index].copy()
            if labels.size:  # normalized xywh to pixel xyxy format
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:],
                                           ratio[0] * w,
                                           ratio[1] * h,
                                           padw=pad[0],
                                           padh=pad[1])

            if self.augment:
                img, labels = random_perspective(
                    img,
                    labels,
                    degrees=hyp['degrees'],
                    translate=hyp['translate'],
                    scale=hyp['scale'],
                    shear=hyp['shear'],
                    perspective=hyp['perspective'])

        nl = len(labels)  # number of labels
        if nl:
            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5],
                                        w=img.shape[1],
                                        h=img.shape[0],
                                        clip=True,
                                        eps=1E-3)

        if self.augment:
            # Albumentations
            img, labels = self.albumentations(img, labels)
            nl = len(labels)  # update after albumentations

            # HSV color-space
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Flip up-down
            if random.random() < hyp['flipud']:
                img = np.flipud(img)
                if nl:
                    labels[:, 2] = 1 - labels[:, 2]

            # Flip left-right
            if random.random() < hyp['fliplr']:
                img = np.fliplr(img)
                if nl:
                    labels[:, 1] = 1 - labels[:, 1]

            # Cutouts
            # labels = cutout(img, labels, p=0.5)

        labels_out = torch.zeros((nl, 6))
        if nl:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes

예제 #8

0

파일 보기

    def __init__(self,
                 sources='streams.txt',
                 img_size=640,
                 stride=32,
                 auto=True):
        self.mode = 'stream'
        self.img_size = img_size
        self.stride = stride

        if os.path.isfile(sources):
            with open(sources) as f:
                sources = [
                    x.strip() for x in f.read().strip().splitlines()
                    if len(x.strip())
                ]
        else:
            sources = [sources]

        n = len(sources)
        self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [
            0
        ] * n, [None] * n
        self.sources = [clean_str(x)
                        for x in sources]  # clean source names for later
        self.auto = auto
        for i, s in enumerate(sources):  # index, source
            # Start thread to read frames from video stream
            st = f'{i + 1}/{n}: {s}... '
            if 'youtube.com/' in s or 'youtu.be/' in s:  # if source is YouTube video
                check_requirements(('pafy', 'youtube_dl'))
                import pafy
                s = pafy.new(s).getbest(preftype="mp4").url  # YouTube URL
            s = eval(s) if s.isnumeric() else s  # i.e. s = '0' local webcam
            cap = cv2.VideoCapture(s)
            assert cap.isOpened(), f'{st}Failed to open {s}'
            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100,
                              0) or 30.0  # 30 FPS fallback
            self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
                                 0) or float('inf')  # infinite stream fallback

            _, self.imgs[i] = cap.read()  # guarantee first frame
            self.threads[i] = Thread(target=self.update,
                                     args=([i, cap, s]),
                                     daemon=True)
            LOGGER.info(
                f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)"
            )
            self.threads[i].start()
        LOGGER.info('')  # newline

        # check for common shapes
        s = np.stack([
            letterbox(x, self.img_size, stride=self.stride,
                      auto=self.auto)[0].shape for x in self.imgs
        ])
        self.rect = np.unique(
            s, axis=0).shape[0] == 1  # rect inference if all shapes equal
        if not self.rect:
            LOGGER.warning(
                'WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.'
            )