def detect(self, img_path): img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) _, H, W = img.shape h_factor, w_factor = (H, W) img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape imgs = torch.stack([resize(img_, 416) for img_ in [img]]).to('cuda') with torch.no_grad(): output = self.net(imgs) output = non_max_suppression(output, 0.5, 0.5)[0] output = rescale_boxes(output, 416, (H, W)).numpy() detections = [] boxes = [] confidences = [] class_ids = [] for x1, y1, x2, y2, conf, cls_conf, cls_pred in output: width = x2 - x1 height = y2 - y1 x = x1 y = y1 boxes.append([x, y, int(width), int(height)]) confidences.append(float(cls_conf)) class_ids.append(int(cls_pred)) class_ = self.get_class(int(cls_pred)) top_left = (int(x1), int(y1)) bottom_right = (int(x2), int(y2)) box_2d = [top_left, bottom_right] detections.append(Detection(box_2d, class_)) return detections
def predict(self, image, device='cuda', img_size=446, conf_thres=0.6, nms_thres=0.2, normalize=False, normalize_input=True): """ Util method for getting preprocessed bboxes img - (W,H,3) returns - bboxes (N, 7) """ if normalize_input: img = transforms.ToTensor()(image) else: img = torch.from_numpy(np.array(image)).float().permute(2, 0, 1) padded_img, pad = pad_to_square(img) warped_img = F.interpolate(padded_img.unsqueeze(0), size=img_size, mode="nearest").squeeze(0) if device: device = torch.device(device) self = self.to(device) warped_img = warped_img.to(device) result = self(warped_img.unsqueeze(0)) outputs = non_max_suppression(result, conf_thres=conf_thres, nms_thres=nms_thres) pred_bboxes = outputs[0] if pred_bboxes is None: return [] _, img_h, img_w = img.shape _, width, height = warped_img.shape # normalize back pred_bboxes[:, [0, 2]] /= width pred_bboxes[:, [1, 3]] /= height pred_bboxes[:, [0, 2]] -= pad[0] / max(img_w, img_h) pred_bboxes[:, [1, 3]] -= pad[2] / max(img_w, img_h) if not normalize: pred_bboxes[:, [0, 2]] *= max(img_w, img_h) pred_bboxes[:, [1, 3]] *= max(img_w, img_h) return pred_bboxes
def infer_video(model, video_path, save_path, iou_thres, conf_thres, nms_thres, img_size, batch_size=1): model.eval() if os.path.isfile(video_path): cap = cv2.VideoCapture(video_path) else: raise Exception('no such video') tp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) stream = cv2.VideoWriter(os.path.join(save_path, 'out.mp4'), cv2.VideoWriter_fourcc(*'MJPG'), 20.0, (int(cap.get(3)), int(cap.get(4)))) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") while cap.isOpened(): ret, frame = cap.read() if ret: print(frame.shape) img = transform(frame).to(device) print(img.shape) img, _ = pad_to_square(img, 0) print(img.shape) img = F.interpolate(img.unsqueeze(0), img_size, mode='nearest').squeeze() print(img.shape) img = img.unsqueeze(0) with torch.no_grad(): output = model(img) output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres) output = rescale_boxes(output[0], img_size, frame.shape[:2]) # print(output) # print(len(output[0][0])) img = vis_frame(frame, output) stream.write(img) else: break cap.release() stream.release()
def Yolo_detect(model, camInputFrame, img_size=416, conf_thres=0.8, nms_thres=0.4): img = transforms.ToTensor()(Image.fromarray(camInputFrame)) # Pad to square resolution img, _ = pad_to_square(img, 0) # Resize img = resize(img, img_size) img = img.unsqueeze(0) #(1,3,416.419) input_imgs = img.cuda() with torch.no_grad(): detections = model(input_imgs) detections = non_max_suppression(detections, conf_thres, nms_thres) if detections is not None: detections = detections[0] if detections is not None: detections = rescale_boxes(detections, img_size, camInputFrame.shape[:2]) return detections
ret, frame = input.read() nums += 1 if not ret: break print("---------------读取第" + str(nums) + "帧") frame_start_t = time.time() # cv2 读取图片转换为 PIL 格式 转换为 Tensor # img = torchvision.transforms.ToTensor()(Image.open(img_path).convert(mode="RGB")) frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img = torchvision.transforms.ToTensor()(frame_pil.convert(mode="RGB")) # NEW 创建一个可用来对其进行draw的对象 draw = ImageDraw.Draw(frame_pil) input_imgs, _ = pad_to_square(img, 0) # Resize input_imgs = resize(input_imgs, opt.img_size).unsqueeze(0) # Configure input input_imgs = Variable(input_imgs.type(Tensor)) tensor_t = time.time() print("转换为 Tensor 用时:" + str(time.time() - frame_start_t)) # 开始检测 with torch.no_grad(): detections = model(input_imgs.to(device)) detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)[0] detect_t = time.time() print("进行物体检测用时:" + str(detect_t - tensor_t))
20.0, (int(cap.get(3)), int(cap.get(4))) ) trans = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize(mean=[0.486, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) plt.figure() plt.margins(0, 0) while cap.isOpened(): ret, frame = cap.read() if ret: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = Image.fromarray(frame) img = trans(frame) img, _ = pad_to_square(img, 0) img = F.interpolate(img.unsqueeze(0), opt.img_size, mode='bilinear').squeeze() img = img.unsqueeze(0).to(device) with torch.no_grad(): detections = model(img) detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres) # Bounding-box colors cmap = plt.get_cmap("tab20b") colors = [cmap(i) for i in np.linspace(0, 1, 20)] # Create plot # img = np.array(frame) # plt.figure() fig, ax = plt.subplots(1) ax.imshow(np.array(frame)) # Draw bounding boxes and labels of detections
def test_dataloader_getitem(self): unloader = transforms.ToPILImage() self.normalized_labels = True # img_path = 'e:/ML_data/images/train2014/COCO_train2014_000000000092.jpg' # label_path = 'e:/ML_data/labels/train2014/COCO_train2014_000000000092.txt' img_path = 'E:/trafficlight_detect/light_img/IMG_20181124_125821.jpg' label_path = 'E:/trafficlight_detect/labels/IMG_20181124_125821.txt' # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) fig, ax = plt.subplots(1) image_orig = unloader(img) ax.imshow(image_orig) #ax.add_patch( patches.Rectangle( xy=( 225, 131), width=75, height=50, linewidth=1 )) plt.show() # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape image_padded = unloader(img) fig, ax = plt.subplots(1) ax.imshow(image_padded) plt.show() boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) fig, ax = plt.subplots(1) ax.imshow(image_orig) #ax.add_patch( patches.Rectangle( xy=( 225, 131), width=75, height=50, linewidth=1 )) for box_i, (xx1, yy1, xx2, yy2) in enumerate(zip(x1, y1, x2, y2)): ax.add_patch( patches.Rectangle(xy=(xx1.item(), yy1.item()), width=(xx2 - xx1).item(), height=(yy2 - yy1).item(), linewidth=1)) plt.show() # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] fig, ax = plt.subplots(1) ax.imshow(image_padded) #ax.add_patch( patches.Rectangle( xy=( 225, 131), width=75, height=50, linewidth=1 )) for box_i, (xx1, yy1, xx2, yy2) in enumerate(zip(x1, y1, x2, y2)): ax.add_patch( patches.Rectangle(xy=(xx1.item(), yy1.item()), width=(xx2 - xx1).item(), height=(yy2 - yy1).item(), linewidth=1)) plt.show() boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] = boxes[:, 3] * w_factor / padded_w boxes[:, 4] = boxes[:, 4] * h_factor / padded_h fig, ax = plt.subplots(1) ax.imshow(image_padded) #ax.add_patch( patches.Rectangle( xy=( 225, 131), width=75, height=50, linewidth=1 )) for box_i in range(len(boxes)): x = boxes[box_i, 1] * padded_w y = boxes[box_i, 2] * padded_h width = boxes[box_i, 3] * padded_w height = boxes[box_i, 3] * padded_h ax.add_patch( patches.Circle(xy=(x.item(), y.item()), radius=10, linewidth=1)) plt.show()
def YOLO(): parser = argparse.ArgumentParser() parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file") parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file") parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold") parser.add_argument("--nms_thres", type=float, default=0.2, help="iou thresshold for non-maximum suppression") parser.add_argument("--batch_size", type=int, default=1, help="size of the batches") parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") parser.add_argument("--video", type=str, required=True, help="input video") parser.add_argument("--display", action="store_true", default=False) parser.add_argument("--output", default="./output", help="output dir") opt = parser.parse_args() print(opt) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Darknet(opt.model_def, img_size=opt.img_size).to(device) if opt.weights_path.endswith(".weights"): # Load darknet weights model.load_darknet_weights(opt.weights_path) else: # Load checkpoint weights model.load_state_dict(torch.load(opt.weights_path)) model.eval() # Set in evaluation mode classes = load_classes(opt.class_path) # Extracts class labels from file Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor #cap = cv2.VideoCapture(0) cap = cv2.VideoCapture(opt.video) cap.set(3, 1280) cap.set(4, 720) # out = cv2.VideoWriter( # "output.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10.0, # (darknet.network_width(netMain), darknet.network_height(netMain))) print("Starting the YOLO loop...") while True: try: prev_time = time.time() ret, frame_read = cap.read() frame = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) # Extract image as PyTorch tensor img = transforms.ToTensor()(frame) # Pad to square resolution img, _ = pad_to_square(img, 0) # Resize img = resize(img, opt.img_size) img = img.unsqueeze(0) # Configure input input_imgs = nn.Variable(img.type(Tensor)) # Get detections with torch.no_grad(): detections = model(input_imgs) detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) detections = list(filter(lambda x: x is not None, detections)) if detections is not None and len(detections) > 0: # Rescale boxes to original image detections = rescale_boxes(detections[0], opt.img_size, frame.shape[:2]) frame = cvDrawBoxes(frame, detections, classes) current_time = datetime.datetime.now() if int(time.time()*10) % 10 == 0: str_date = datetime.datetime.strftime(current_time, "%Y%m%d") str_time = datetime.datetime.strftime(current_time, "%Y%m%d%H%M%S") os.makedirs(os.path.join(opt.output, str_date), exist_ok=True) cv2.imwrite(os.path.join(opt.output, str_date, str_time + ".jpg"), frame) # print(1/(time.time()-prev_time)) if opt.display: cv2.imshow('Demo', frame) cv2.waitKey(3) except Exception as e: print("fail to detect", e) cap.release()