def __next__(self): self.count += 1 if not all(x.is_alive() for x in self.threads) or cv2.waitKey( 1) == ord('q'): # q to quit cv2.destroyAllWindows() raise StopIteration # Letterbox img0 = self.imgs.copy() img = [ letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0 ] # Stack img = np.stack(img, 0) # Convert img = img[..., ::-1].transpose( (0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW img = np.ascontiguousarray(img) return self.sources, img, img0, None, ''
def detect_pedestrians(self, camera_frame: np.ndarray) -> List[BoundingBox]: # Pre-process im = letterbox(camera_frame, self.IMG_SIZE, stride=self.model.stride)[0] im = np.ascontiguousarray(im.transpose(2, 0, 1)) im = torch.from_numpy(np.expand_dims(im, 0)).to(self.device) im = im.half() im /= 255 # Inference pred = self.model(im) # NMS pred = non_max_suppression( pred, conf_thres=self.conf, iou_thres=self.NMS_IOU_THRESHOLD )[0] # Scale predictions to original image coordinates pred_boxes = ( scale_coords(im.shape[2:], pred[:, :4], camera_frame.shape) .round() .cpu() .numpy() .astype(int) ) return [self.detection_to_bounding_box(det) for det in pred_boxes]
def extract_box(row): pred = row.predictions[0].cpu().numpy() im_box = np.array(Image.open(row.img_path).crop(pred[:4])) im_box_letter, *_ = letterbox(im_box, (160, 64), auto=False, scaleFill=True) return (im_box_letter / 255).astype(np.float32)
def is_accepted(self, camera_frame: np.ndarray, predicted_box_crop: np.ndarray, distance: float) -> bool: if distance <= 10: return True resized_box, *_ = letterbox(predicted_box_crop, self.IMG_SIZE, auto=False, scaleFill=True) pred: Dict[Any, Any] = self.model.predict( np.expand_dims(resized_box / 255, 0).astype(np.float32), outlier_type="instance", return_feature_score=False, return_instance_score=False, ) is_outlier = bool(pred["data"]["is_outlier"][0]) return not is_outlier
def __next__(self): if self.count == self.nf: raise StopIteration path = self.files[self.count] if self.video_flag[self.count]: # Read video self.mode = 'video' ret_val, img0 = self.cap.read() if not ret_val: self.count += 1 self.cap.release() if self.count == self.nf: # last video raise StopIteration else: path = self.files[self.count] self.new_video(path) ret_val, img0 = self.cap.read() self.frame += 1 s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' else: # Read image self.count += 1 img0 = cv2.imread(path) # BGR assert img0 is not None, f'Image Not Found {path}' s = f'image {self.count}/{self.nf} {path}: ' # Padded resize img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return path, img, img0, self.cap, s
def __next__(self): self.count += 1 if cv2.waitKey(1) == ord('q'): # q to quit self.cap.release() cv2.destroyAllWindows() raise StopIteration # Read frame ret_val, img0 = self.cap.read() img0 = cv2.flip(img0, 1) # flip left-right # Print assert ret_val, f'Camera Error {self.pipe}' img_path = 'webcam.jpg' s = f'webcam {self.count}: ' # Padded resize img = letterbox(img0, self.img_size, stride=self.stride)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return img_path, img, img0, None, s
def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights hyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] if mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None # MixUp augmentation if random.random() < hyp['mixup']: img, labels = mixup( img, labels, *load_mosaic(self, random.randint(0, self.n - 1))) else: # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ( (h / h0, w / w0), pad) # for COCO mAP rescaling labels = self.labels[index].copy() if labels.size: # normalized xywh to pixel xyxy format labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: img, labels = random_perspective( img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective']) nl = len(labels) # number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) if self.augment: # Albumentations img, labels = self.albumentations(img, labels) nl = len(labels) # update after albumentations # HSV color-space augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Flip up-down if random.random() < hyp['flipud']: img = np.flipud(img) if nl: labels[:, 2] = 1 - labels[:, 2] # Flip left-right if random.random() < hyp['fliplr']: img = np.fliplr(img) if nl: labels[:, 1] = 1 - labels[:, 1] # Cutouts # labels = cutout(img, labels, p=0.5) labels_out = torch.zeros((nl, 6)) if nl: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes
def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True): self.mode = 'stream' self.img_size = img_size self.stride = stride if os.path.isfile(sources): with open(sources) as f: sources = [ x.strip() for x in f.read().strip().splitlines() if len(x.strip()) ] else: sources = [sources] n = len(sources) self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [ 0 ] * n, [None] * n self.sources = [clean_str(x) for x in sources] # clean source names for later self.auto = auto for i, s in enumerate(sources): # index, source # Start thread to read frames from video stream st = f'{i + 1}/{n}: {s}... ' if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video check_requirements(('pafy', 'youtube_dl')) import pafy s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam cap = cv2.VideoCapture(s) assert cap.isOpened(), f'{st}Failed to open {s}' w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback _, self.imgs[i] = cap.read() # guarantee first frame self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) LOGGER.info( f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)" ) self.threads[i].start() LOGGER.info('') # newline # check for common shapes s = np.stack([ letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs ]) self.rect = np.unique( s, axis=0).shape[0] == 1 # rect inference if all shapes equal if not self.rect: LOGGER.warning( 'WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.' )