def __next__(self): self.count += 1 if not all(x.is_alive() for x in self.threads) or cv2.waitKey( 1) == ord('q'): # q to quit cv2.destroyAllWindows() raise StopIteration # Letterbox img0 = self.imgs.copy() img = [ letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0 ] # Stack img = np.stack(img, 0) # Convert img = img[..., ::-1].transpose( (0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW img = np.ascontiguousarray(img) return self.sources, img, img0, None, ''
def __init__(self, sources='streams.txt', img_size=640, stride=32): self.mode = 'stream' self.img_size = img_size self.stride = stride if os.path.isfile(sources): with open(sources, 'r') as f: sources = [ x.strip() for x in f.read().strip().splitlines() if len(x.strip()) ] else: sources = [sources] n = len(sources) self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [ 0 ] * n, [None] * n self.sources = [clean_str(x) for x in sources] # clean source names for later for i, s in enumerate(sources): # index, source # Start thread to read frames from video stream print(f'{i + 1}/{n}: {s}... ', end='') if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video check_requirements(('pafy', 'youtube_dl')) import pafy s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam cap = cv2.VideoCapture(s) assert cap.isOpened(), f'Failed to open {s}' w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback _, self.imgs[i] = cap.read() # guarantee first frame self.threads[i] = Thread(target=self.update, args=([i, cap]), daemon=True) print( f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)" ) self.threads[i].start() print('') # newline # check for common shapes s = np.stack([ letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs ], 0) # shapes self.rect = np.unique( s, axis=0).shape[0] == 1 # rect inference if all shapes equal if not self.rect: print( 'WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.' )
def __next__(self): if self.count == 1: raise StopIteration self.count += 1 img0 = self.img # Padded resize img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) # print(img) return img, img0, self.cap
def __next__(self): if self.count == self.nf: raise StopIteration path = self.files[self.count] if self.source_img: # Get image 直接传入图片时一次一张,存储在self.files中 self.count += 1 img0 = path # BGR path = "img" assert img0 is not None, f'Image Not Found {path}' s = f'image {self.count}/{self.nf} {path}: ' elif self.video_flag[self.count]: # Read video self.mode = 'video' ret_val, img0 = self.cap.read() while not ret_val: self.count += 1 self.cap.release() if self.count == self.nf: # last video raise StopIteration else: path = self.files[self.count] self.new_video(path) ret_val, img0 = self.cap.read() self.frame += 1 s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' else: # Read image self.count += 1 img0 = cv2.imread(path) # BGR assert img0 is not None, f'Image Not Found {path}' s = f'image {self.count}/{self.nf} {path}: ' # Padded resize img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return path, img, img0, self.cap, s
def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True): self.mode = 'stream' self.img_size = img_size self.stride = stride if os.path.isfile(sources): with open(sources) as f: sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())] else: sources = [sources] n = len(sources) self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n self.sources = [clean_str(x) for x in sources] # clean source names for later self.auto = auto for i, s in enumerate(sources): # index, source # Start thread to read frames from video stream st = f'{i + 1}/{n}: {s}... ' if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video check_requirements(('pafy', 'youtube_dl==2020.12.2')) import pafy s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam if s == 0: assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.' assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.' cap = cv2.VideoCapture(s) assert cap.isOpened(), f'{st}Failed to open {s}' w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback _, self.imgs[i] = cap.read() # guarantee first frame self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)") self.threads[i].start() LOGGER.info('') # newline # check for common shapes s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs]) self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal if not self.rect: LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')
def to_yolov5_dataloader(images_gen, stride, img_size=1024): """ Mimicks the output of YOLOv5's ImageLoader class but instead of reading from a filepath, the images are loaded from Pluto API """ for capture, image in images_gen: # To numpy: img0 = np.array(image)[:, :, ::-1].copy() # Padded resize img = letterbox(img0, img_size, stride=stride, auto=False)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) yield capture, f"/tmp/{capture.ImgName}.png", img, img0
def __next__(self): if self.count == self.nf: raise StopIteration path = self.files[self.count] if self.video_flag[self.count]: # Read video self.mode = 'video' ret_val, img0 = self.cap.read() if not ret_val: self.count += 1 self.cap.release() if self.count == self.nf: # last video raise StopIteration else: path = self.files[self.count] self.new_video(path) ret_val, img0 = self.cap.read() self.frame += 1 print( f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ', end='') else: # Read image self.count += 1 img0 = cv2.imread(path) # BGR assert img0 is not None, 'Image Not Found ' + path print(f'image {self.count}/{self.nf} {path}: ', end='') # Padded resize img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return path, img, img0, self.cap
def __next__(self): self.count += 1 if cv2.waitKey(1) == ord('q'): # q to quit self.cap.release() cv2.destroyAllWindows() raise StopIteration # Read frame ret_val, img0 = self.cap.read() img0 = cv2.flip(img0, 1) # flip left-right # Print assert ret_val, f'Camera Error {self.pipe}' img_path = 'webcam.jpg' print(f'webcam {self.count}: ', end='') # Padded resize img = letterbox(img0, self.img_size, stride=self.stride)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return img_path, img, img0, None
def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights hyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] if mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None # MixUp augmentation if random.random() < hyp['mixup']: img, labels = mixup( img, labels, *load_mosaic(self, random.randint(0, self.n - 1))) else: # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ( (h / h0, w / w0), pad) # for COCO mAP rescaling labels = self.labels[index].copy() if labels.size: # normalized xywh to pixel xyxy format labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: img, labels = random_perspective( img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective']) nl = len(labels) # number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) if self.augment: # Albumentations img, labels = self.albumentations(img, labels) # HSV color-space augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Flip up-down if random.random() < hyp['flipud']: img = np.flipud(img) if nl: labels[:, 2] = 1 - labels[:, 2] # Flip left-right if random.random() < hyp['fliplr']: img = np.fliplr(img) if nl: labels[:, 1] = 1 - labels[:, 1] # Cutouts # if random.random() < 0.9: # labels = cutout(img, labels) labels_out = torch.zeros((nl, 6)) if nl: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes
conf_thres=0.4 classes=None agnostic_nms=False line_thickness=3 path="yolov5/zidane.jpg" img0 = cv2.imread(path) image_size=(640, 640) stride= 32 img = letterbox(img0, image_size, stride, auto=False)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) img = torch.from_numpy(img) img= img.float() img /= 255 if len(img.shape) == 3: img = img[None]
def generate_detections_one_image(self, img_original, image_id, detection_threshold): """Apply the detector to an image. Args: img_original: the PIL Image object with EXIF rotation taken into account image_id: a path to identify the image; will be in the "file" field of the output object detection_threshold: confidence above which to include the detection proposal Returns: A dict with the following fields, see the 'images' key in https://github.com/microsoft/CameraTraps/tree/master/api/batch_processing#batch-processing-api-output-format - 'file' (always present) - 'max_detection_conf' - 'detections', which is a list of detection objects containing keys 'category', 'conf' and 'bbox' - 'failure' """ result = { 'file': image_id } detections = [] max_conf = 0.0 try: img_original = np.asarray(img_original) # padded resize img = letterbox(img_original, new_shape=PTDetector.IMAGE_SIZE, stride=PTDetector.STRIDE, auto=True)[0] # JIT requires auto=False img = img.transpose((2, 0, 1)) # HWC to CHW; PIL Image is RGB already img = np.ascontiguousarray(img) img = torch.from_numpy(img) img = img.to(self.device) img = img.float() img /= 255 if len(img.shape) == 3: # always true for now, TODO add inference using larger batch size img = torch.unsqueeze(img, 0) pred: list = self.model(img)[0] # NMS pred = non_max_suppression(prediction=pred, conf_thres=detection_threshold) # format detections/bounding boxes gn = torch.tensor(img_original.shape)[[1, 0, 1, 0]] # normalization gain whwh for det in pred: if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_original.shape).round() for *xyxy, conf, cls in reversed(det): # normalized center-x, center-y, width and height xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() api_box = ct_utils.convert_yolo_to_xywh(xywh) conf = ct_utils.truncate_float(conf.tolist(), precision=CONF_DIGITS) # MegaDetector output format's categories start at 1, but this model's start at 0 cls = int(cls.tolist()) + 1 if cls not in (1, 2, 3): raise KeyError(f'{cls} is not a valid class.') detections.append({ 'category': str(cls), 'conf': conf, 'bbox': ct_utils.truncate_float_array(api_box, precision=COORD_DIGITS) }) max_conf = max(max_conf, conf) except Exception as e: result['failure'] = FAILURE_INFER print('PTDetector: image {} failed during inference: {}'.format(image_id, str(e))) result['max_detection_conf'] = max_conf result['detections'] = detections return result
def detect_image( self, img_cv, conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image classes=None, agnostic_nms=False, # class-agnostic NMS line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences ): device = select_device('') ### Preprocess based on what inside LoadImages # Padded resize img = letterbox(img_cv, 640, stride=32)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) ### Preprocess finish img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim # Inference pred = self.model(img, augment=False, visualize=False)[0] # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) # Process predictions bbox_pred = [] s, im0 = '', img_cv.copy() for i, det in enumerate(pred): # detections per image s += '%gx%g ' % img.shape[2:] # print string if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Visualize results for *xyxy, conf, cls in reversed(det): c = int(cls) # integer class label = None if hide_labels else ( self.names[c] if hide_conf else f'{self.names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) bbox_pred.append( det.cpu().numpy()[:, :-1] ) # bbox pred after rescaled back to the original image size return bbox_pred[0], im0