def detect(save_img=False): '''获取输出文件夹,输入源,权重,参数与等信息''' source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run # (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) # 获取设备 # 如果设备为GPU 使用float16 half = device.type != 'cpu' # half precision only supported on CUDA # Load model # 加载float32模型,确保用户设定的输入图片分辨率能整除32(如不能则调整为能整除返回) model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: # 设置float16 model.half() # to FP16 # Second-stage classifier # 设置第二次分类,默认不使用 classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader # 通过不同的输入源来设置不同的数据加载方式 vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True # 如果哦检测视频的时候想显示出来,可以在这里加一行 view_img = True view_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors # 获取类别名字 names = model.module.names if hasattr(model, 'module') else model.names # 设置画框的颜色 colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() # 进行一次前向推理,测试程序是否正常 """ path 图片/视频路径 img 进行resize+pad之后的图片 img0 原size图片 cap 当读取图片时为None,读取视频时为视频源 """ for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) # 图片也设置为Float16 img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size的话则在最前面添加一个轴 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] """ 前向传播 返回pred的shape是(1, num_boxes, 5+num_class) h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8 pred[..., 0:4]为预测框坐标 预测框坐标为xywh(中心点+宽长)格式 pred[..., 4]为objectness置信度 pred[..., 5:-1]为分类结果 """ # Apply NMS """ pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic:进行nms是否也去除不同类别之间的框 经过nms之后,预测框格式:xywh-->xyxy(左上角右下角) pred是一个列表list[torch.tensor],长度为batch_size 每一个torch.tensor的shape为(num_boxes, 6),内容为box+conf+cls """ pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier # 添加二次分类,默认不使用 if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections # 对每一张图片作处理 for i, det in enumerate(pred): # detections per image # 如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path # 设置保存图片/视频的路径 save_path = str(save_dir / p.name) # img.jpg # 设置保存框坐标txt文件的路径 txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt # 设置打印信息(图片长宽) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size # 调整预测框的坐标:基于resize+pad的图片的坐标-->基于原size图片的坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file # 将xyxy(左上角+右下角)格式转为xywh(中心点+宽长)格式,并除上w,h做归一化,转化为列表再保存 xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # 在原图上画框 if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) # 打印前向传播+nms时间 print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results # 如果设置展示,则show图片/视频 if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) # 设置保存图片/视频 if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") # 打开保存图片和txt的路径(好像只适用于MacOS系统) # 打印总时间 print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(self, opencv_img, data, save_img=False): self.weights = os.path.join(package_path, 'yolov5/weights', self.weights) self.source = os.path.join(package_path,'yolov5', self.source) # print(self.weights) source, weights, view_img, save_txt, imgsz = self.source, self.weights, self.view_img, self.save_txt, self.img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) self.project = os.path.join(package_path,'yolov5', self.project) # Directories save_dir = Path(increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(self.device) half = device.type != 'cpu' # half precision only supported on CUDA # print(os.getcwd()) # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load( 'weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: #save_img = True save_img = False dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() # path = r"/workspace/yolov5/data/images/bus.jpg" vid_cap = None #im0s = cv2.imread(path) im0s = opencv_img img = letterbox(im0s, 640, stride=stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) # img = cv2.imread("") img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() # print("\nhaha: 83945723\n") # print(img.shape) # print(img) # print(self.conf_thres) # print(self.iou_thres) # print(self.classes) # print(self.agnostic_nms) # print("\nhaha: 02394857\n") pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression( pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections detection_results = BoundingBoxes() detection_results.header = data.header detection_results.image_header = data.header for i, det in enumerate(pred): # detections per image im0 = im0s p = self.path # if webcam: # batch_size >= 1 # p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count # else: # p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) # p = Path(p) # to Path #save_path = str(self.save_dir + "/img.jpg") # img.jpg #txt_path = str(self.save_dir + "/labels/label") s = '' s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string xmin, ymin, xmax, ymax, conf, det_class = det[0] detection_msg = BoundingBox() detection_msg.xmin = xmin detection_msg.xmax = xmax detection_msg.ymin = ymin detection_msg.ymax = ymax detection_msg.probability = conf detection_msg.Class = names[int(det_class)] detection_results.bounding_boxes.append(detection_msg) # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # label format line = (cls, *xywh, conf) if self.save_conf else (cls, *xywh) with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) self.pub_.publish(detection_results) #if save_txt or save_img: # s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' #print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(source, device, keys, reporter, save_img=False): weights = "yolov5s.pt" img_size = 640 conf_thres = 0.25 iou_thres = 0.45 view_img = False save_txt = False save_conf = False classes = None agnostic_nms = False augment = False update = False project = 'runs/detect' exist_ok = False imgsz = img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = Path(Path(project)) # not increment run # Initialize set_logging() device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() # Found label in keys found = False for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): label = f'{names[int(cls)]} {conf:.2f}' if names[int(cls)] and names[int(cls)] in keys: found = True else: found = False plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) cprint.info(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if found and reporter.can_report(): logging.info("get label in keys: " + names[int(cls)]) file_path = str(save_dir / f'report_{t1}.jpg') cv2.imwrite(file_path, im0) reporter.report(file_path) cprint.info(f'Done to close. ({time.time() - t0:.3f}s)')
def detect( weights='mdp/weights/weights.pt', source_address='http://localhost:8008', # 192.168.15.1 img_size=416, conf_thres=0.01, iou_thres=0.5, device='', classes=None, agnostic_nms=False, augment=False, update=False, scale_percent=50): source = source_address + '/stream.mjpg' label_server = source_address + '/labels' predicted_label = None imgsz = img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader if webcam: cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once row_num = 0 for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].detach().unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: predicted_label = names[int(cls)] if predicted_label: label_id = label_id_mapping.get(predicted_label) if False and conf < confidence_threshold( label_id): # fine tune for up arrow (white) break xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh print(('%s ' * 5 + '\n') % (label_id, *xywh)) # label format if not image_seen[predicted_label]: # determine image position x, y, w, h = xywh # r = requests.post(label_server, json={'label': label_id, 'x': x, 'y': y}) # send result to rpi # print(r.text) image_seen[predicted_label] = True label = '%s %.2f' % (label_id, conf) good, text = check_bounding_box( xywh, im0.shape[0], im0.shape[1]) if not good: break label = text plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) break cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration
def detect(save_img=True): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) elif source == 'own_camera': pass # cap = cv2.VideoCapture(0) # while True: # _, frame = cap.read() # img0 = frame.copy() # img_size = 640 # img, ratio, (dw, dh) = letterbox(img0, new_shape=img_size) # img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 # img = np.ascontiguousarray(img) # img = torch.from_numpy(img).to(device) # img = img.half() if half else img.float() # uint8 to fp16/32 # img /= 255.0 # 0 - 255 to 0.0 - 1.0 # if img.ndimension() == 3: # img = img.unsqueeze(0) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once if source == 'own_camera': # cap = cv2.VideoCapture(0) camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice()) # print(type(camera)) camera.Open() converter = pylon.ImageFormatConverter() #camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice()) # ========== Grabing Continusely (video) with minimal delay ========== camera.StartGrabbing(pylon.GrabStrategy_LatestImageOnly) # ========== converting to opencv bgr format ========== converter.OutputPixelFormat = pylon.PixelType_BGR8packed converter.OutputBitAlignment = pylon.OutputBitAlignment_MsbAligned img_name = 0 while camera.IsGrabbing(): # print(source) # _, frame = cap.read() grabResult = camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException) # print(type(frame)) if grabResult.GrabSucceeded(): image = converter.Convert(grabResult) frame = image.GetArray() # frame = cv2.resize(frame, (int(frame.shape[1]/3),int(frame.shape[0]/3))) if opt.collect == 'True' and img_name%5 == 0: cv2.imwrite("./data/pylon/"+str(img_name)+"jpg", frame) img_name += 1 img0 = frame.copy() im0s = img0 img_size = 640 img, ratio, (dw, dh) = letterbox(img0, new_shape=img_size) img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path('0'), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path('0'), '', im0s save_path = str(save_dir / p.name) # txt_path = str(save_dir / 'labels' / p.stem) + ( # '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results classes = [] for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string s1 = '%g'%(n) classes.append([names[int(c)], s1]) df_cls_info = pd.DataFrame(classes,columns=['Class','number']) df_cls_info.to_csv('class_num.csv') print("\n number of object : ", len(det[:, -1])) print(df_cls_info) # Write results box_info = [] for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) confident = '%g'%(conf) box_info.append([label, c1 ,c2 ,confident]) df_box = pd.DataFrame(box_info,columns=['Class','top left','bottom_right','confidence']) df_box.to_csv('box_info.csv') # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) im0 = cv2.resize(im0, (int(im0.shape[1]/1.5),int(im0.shape[0]/1.5))) cv2.imshow("asd", im0) if cv2.waitKey(1) == ord('q'): # q to quit break # Stream results # if view_img: # cv2.imshow("asd", im0) # if cv2.waitKey(1) == ord('q'): # q to quit # raise StopIteration # Save results (image with detections) # if save_img: # if dataset.mode == 'images': # # cv2.imwrite(save_path, im0) # pass # else: # if vid_path != save_path: # new video # vid_path = save_path # if isinstance(vid_writer, cv2.VideoWriter): # vid_writer.release() # release previous video writer # # fourcc = 'mp4v' # output video codec # fps = vid_cap.get(cv2.CAP_PROP_FPS) # w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) # vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % save_dir) print('Done. (%.3fs)' % (time.time() - t0)) for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow("asd", im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': # cv2.imwrite(save_path, im0) pass else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % save_dir) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): print_div('INTIL') out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size # Initialize print_div('GET DEVICE') set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model print_div('LOAD MODEL') model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier print_div('LOAD MODEL_CLASSIFIER') classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Get names and colors print_div('SET LABEL COLOR') names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference ############################################################################### print_div("RUN INFERENCE") img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once video_path = source cap = cv2.VideoCapture(video_path) print_div('Start Play VIDEO') while cap.isOpened(): ret, frame = cap.read() t0 = time.time() if not ret: print_div('No Frame') break fps_t1 = time.time() img, img0 = img_preprocess(frame) # img: Resize , img0:Orginal img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS : 取得每項預測的數值 pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier : 取得該數值的LAbel if classify: pred = apply_classifier(pred, modelc, img, img0) # Draw Box for i, det in enumerate(pred): s = '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) # Print Results(inference + NMS) print_div('%sDone. (%.3fs)' % (s, t2 - t1)) # Draw Image x, y, w, h = (img0.shape[1]//4), 25, (img0.shape[1]//2), 30 cv2.rectangle(img0, (x, 10),(x+w, y+h), (0,0,0), -1) rescale = 0.5 re_img0 = (int(img0.shape[1]*rescale) ,int(img0.shape[0]*rescale)) cv2.putText(img0, '{} | inference: {:.4f}s | fps: {:.4f}'.format(opt.weights[0], t2-t1, 1/(time.time()-t0)),(x+20, y+20),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),2) cv2.imshow('Stream_Detected', cv2.resize(img0, re_img0) ) key = cv2.waitKey(1) if key == ord('q'): break # After break cap.release() cv2.destroyAllWindows()
def detect(save_img=False): warning = 0 _sum = 0 source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) or source == 'realsense' # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference # dataset = LoadStreams(source, img_size=imgsz, stride=stride) if source == '1': #print('우헤헤헤헤') dataset = LoadRealSense2() else: #print('하ㅏ하하하') dataset = LoadStreams(source, img_size=imgsz) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) #print('오이잉이이이') # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string if names[int(c)] == ('no-helmet'): warning = 1 _sum += 1 if names[int(c)] == ('no-helmets'): warning = 1 _sum += 1 if names[int(c)] == ('no-mask'): warning = 1 _sum += 1 if names[int(c)] == ('no-masks'): warning = 1 _sum += 1 if names[int(c)] == ('person'): if n == 1: warning = 1 _sum += 1 # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) #if 'helmet' in names[int(cls)]: #print('캬캬캬캬캬캬',names[int(cls)]) #print('ㅋㅋㅋㅋㅋ',xyxy) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') file = open( '/home/piai/MinkyuKim/Week6~10_AI/Project/SocketConnection/detect.txt', 'a') idx = 0 if warning == 1: #file = open('detect.txt', 'w') idx += 1 if idx <= 200: file.write('1' + '\n') #idx += 1 # print(_sum) # if _sum>=200: # print('_sum은', _sum) # file.write('20' + '\n') # file.close() print('stop') warning = 0 else: file.write('0' + '\n') print('pass') # file.close() # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) file.close() if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = 'data/images', 'yolov5s.pt', False, False, 640 #All manually set webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path(increment_path(Path('runs/detect') / 'exp', exist_ok=False)) # increment run adam (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device('') half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # adam # Apply NMS pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if False else ( cls, *xywh) # label format adam with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % save_dir) print('Done. (%.3fs)' % (time.time() - t0)) return (n) #EDIT returns the number of objects detected in the last image
print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, loss)) #running_loss = 0.0 test(net,test_loader,labels.shape[0],classes) PATH = './soccer_classify.pth' torch.save(net.state_dict(), PATH) print('Finished Training') def predict_image(model,image,test_transform): image_tensor = test_transforms(image) image_tensor.unsqueeze_(0) image_tensor=image_tensor.to(device) output = model(image_tensor) index = output.data.cpu().numpy().argmax() return index if __name__=="__main__": train=True device = select_device("0") modelc = load_classifier(name='resnet101', n=4) # initialize train_loader,test_loader,classes=load_dataset("/media/asad/adas_cv_2/caffe/train_classifier","/media/asad/adas_cv_2/caffe/train_classifier") modelc.to(device) if train: training(modelc,train_loader,test_loader,device,classes,epochs=100) else: test_transforms = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) modelc.load_state_dict(torch.load("./soccer_classify.pth")) modelc.eval() image_pil = Image.open("/media/asad/adas_cv_2/caffe/patches/9000.png") cls_idx=predict_image(modelc,image_pil,test_transforms)
def detect(save_img=False): print_div('INTIL') out, source, weights, view_img, save_txt, imgsz,count = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size,opt.threads # Initialize print_div('GET DEVICE') set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model print_div('LOAD MODEL') model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier print_div('LOAD MODEL_CLASSIFIER') classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Get names and colors print_div('SET LABEL COLOR') names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference ############################################################################### print_div("RUN INFERENCE") img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run muti video_path = source cap = cv2.VideoCapture(video_path) threads = [] q = Queue() rescale = 0.5 time_count = time.time() while cap.isOpened(): #img0, re_img0=multithreading(cap.read(),device,half,names,colors,model) key = cv2.waitKey(1) if key == ord('q'): return for i in range(1): ret, frame = cap.read() if not ret: print_div('No Frame') break threads.append( Thread(target=multithreading, args=(frame, device, half, names, colors, model, q))) threads[i].start() img0 = q.get() re_img0 = (int(img0.shape[1] * rescale), int(img0.shape[0] * rescale)) cv2.imshow('Stream_Detected', cv2.resize(img0, re_img0)) for i in range(1): if not ret: print_div('No Frame') break threads[i].join() threads = [] if not ret: print_div('No Frame') break # After break cap.release() cv2.destroyAllWindows() print("Spending time: " + str(time.time() - time_count))
def custom_detect( weights='yolov5l.pt', # model.pt path(s) source='data/images', # source folder imgsz=640, # inference size (pixels) conf_thres=0.5, # object confidence threshold iou_thres=0.45, # IOU threshold for NMS device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # display results box_only=False, # output only contains bounding boxes, not original image save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels classes: int=None, # only look for certain classes. Probably a list of ints agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs', # save results to project/name name='', # save results to project/name exist_ok=False # existing project/name ok, do not increment ): # Directories save_dir = Path(increment_path(Path(project) / name, exist_ok=exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() for path, img, im0, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0) # Process detections for i, det in enumerate(pred): # detections per image p, s, frame = path, '', getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if box_only: im0 = np.zeros_like(im0) # Makes to img black to output only bounding boxes if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(opt): weights, view_img, save_txt, imgsz = opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = True # Directories save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None dataset = LoadNLMFeatures(img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, imgsz, imgsz, dataset.dimensions).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 # img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt # s += f"{img.shape[::]}" # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if opt.save_crop else im0 # for opt.save_crop if len(det): # Rescale boxes from img_size to im0 size # det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or opt.save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if opt.hide_labels else ( names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) if opt.save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # s += f"{len(pred)} table found. " # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Save results (image with detections) if save_img: cv2.imwrite(save_path, im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): os.environ['CUDA_VISIBLE_DEVICES'] = opt.device source, weights, save_img, save_txt, imgsz = opt.source, opt.weights, opt.save_img, opt.save_txt, opt.img_size #save_img = not opt.nosave and not source.endswith('.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir print(save_dir) # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None # dataset = LoadImages_panda(source, img_size=imgsz, stride=stride, split_size=[[1000, 1000],[1500, 1500], [3000, 3000],[6000, 6000],[10000,10000]], over_lap=0.3) dataset = LoadImages_panda(source, img_size=imgsz, stride=stride, split_size=[[1500, 1500], [3000, 3000], [6000, 6000], [10000, 10000]], over_lap=0.3) #dataset = LoadImages_panda(source, img_size=imgsz, stride=stride,split_size=[], over_lap=0.3) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] ret_matrix = np.zeros((2, 2)) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() jdict = [] results = [] for path, img_list, start_list, split_size_list, boundary_list, img0_list, im0s, vid_cap in dataset: det_list = [] det_scale_dict = {} # Inference t1 = time_synchronized() for img_i in range(len(img_list)): img = torch.from_numpy(img_list[img_i]).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=opt.augment)[0] # Apply pred_list = [] pred_output = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) if pred_list == []: pred_list = pred_output[0] if len(pred_output[0]): pred_list = torch.cat((pred_list, pred_output[0]), dim=0) pred_out = [pred_list] # Apply Classifier if classify: pred_out = apply_classifier(pred_out, modelc, img, img0_list[img_i]) # Process detections for i, det in enumerate(pred_out): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, img0_list[img_i][ i].copy(), dataset.count else: p, s, im0, frame = path, '', img0_list[img_i], getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh det = del_more(det, img.shape[2:], big_thres=0.3, boundary=boundary_list[img_i]) # del_small if len(det) != 0: small_thres = 15 dets_wh_thres = det[:, 2:4] - det[:, :2] det_thres = torch.minimum(dets_wh_thres[:, 0], dets_wh_thres[:, 1]) if split_size_list[img_i] > 2000: det = det[det_thres > small_thres] if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() det[..., 0] += start_list[img_i][0] det[..., 2] += start_list[img_i][0] det[..., 1] += start_list[img_i][1] det[..., 3] += start_list[img_i][1] if det_list == []: det_list = det else: det_list = torch.cat((det_list, det), dim=0) if split_size_list[img_i] not in det_scale_dict.keys(): det_scale_dict[split_size_list[img_i]] = det else: det_scale_dict[split_size_list[img_i]] = torch.cat( (det_scale_dict[split_size_list[img_i]], det), dim=0) det_list = fuse_all_det(det_list[:, :6], im0, conf_thres=opt.conf_thres, nms_thres=opt.iou_thres, method='standard', merge=False) scale_key = [] for key in det_scale_dict.keys(): scale_key.append(key) det_list = WBF_fuse(im0, [det_list, det_scale_dict[scale_key[0]]], weights=[1, 1], iou_thres=0.5, conf_thres=0.5) # Print results for c in det_list[:, -1].unique(): n = (det_list[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results frame_id = int(path.split(".")[-2].split("_")[-1]) id_list = [] online_tlwhs = [] for *xyxy, conf, cls in reversed(det_list): online_tlwhs.append((float(xyxy[0]), float(xyxy[1]), float(xyxy[2]) - float(xyxy[0]), float(xyxy[3]) - float(xyxy[1]))) id_list.append(conf) if save_img: # Add bbox to image plot_one_box(xyxy, im0s, label=0, color=colors[int(cls)], line_thickness=3) results.append((frame_id, online_tlwhs, id_list)) # Print time (per image) t2 = time_synchronized() print(f'{s}Done. ({t2 - t1:.3f}s)') sys.stdout.flush() # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0s) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) result_detection, id_num = write_results( opt.source.split("/")[-1] + ".txt", results, "mot") print("detection_num", result_detection) print("id_num", id_num) print(f'Done. ({time.time() - t0:.3f}s)') print(f'object num. ({len(jdict)})') sys.stdout.flush()
def detect(opt): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) global boyutlarX, boyutlarY boyutlarY, boyutlarX = img.shape[2:] # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, opt.classes, opt.agnostic_nms, max_det=opt.max_det) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if opt.save_crop else im0 # for opt.save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or opt.save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if opt.hide_labels else ( names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') print(label, " : ", end=" ") global boxY, boxX boxX = xyxy[0] + ((xyxy[2] - xyxy[0]) / 2) boxY = xyxy[1] + ((xyxy[3] - xyxy[1]) / 2) if boyutlarX * 0.5 < boxX: print("Rechts", end=" ") elif boyutlarX * 0.5 > boxX: print("Links", end=" ") if boyutlarY * 0.5 < boxY: print("Hinter") elif boyutlarY * 0.5 > boxY: print("Vorne") plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) if opt.save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) #print(f'{s}Done. ({t2 - t1:.3f}s)') #*************************************** # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
def run( weights='yolov5s.pt', # model.pt path(s) source='./test_1', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # = device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labelss save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): desire_param = [] coor = [] all_info = [] save_img = not nosave and not source.endswith( '.txt') # save inference images # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check image size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() # Set Dataloader dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg #print() txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results pr = ' ' for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string pr += f"{n} {names[int(c)]}{'s' * (n > 1)}," desire_param.append({"image_id": p.name, "prediction": pr}) # for img_name in enumerate(p.name): # if img_name not in desire_param: # add_image = (img_name, "predictioni 0") # desire_param.append(add_image) # print(desire_param) # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) # print(f'{s}Done. ({t2 - t1:.3f}s)') result = [(f'{s}Done. ({t2 - t1:.3f}s)')] print(result) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: None if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)') # lst = [] # for child in desire_param: # info = ["img_name", "prediction"] # lst1 = {k: v for k, v in zip(info, child)} # lst.append(lst1) with open('result.json', 'w') as f: json.dump(desire_param, f)
def run( weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, pt, onnx = False, w.endswith('.pt'), w.endswith( '.onnx') # inference type stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() elif onnx: check_requirements(('onnx', 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: if pt: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 elif onnx: img = img.astype('float32') img /= 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim # Inference t1 = time_sync() if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(img, augment=augment, visualize=visualize)[0] elif onnx: pred = torch.tensor( session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_sync() # Second-stage classifier (optional) if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file #xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = ( cls, *xyxy, conf ) # if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer[i] = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.save_dir, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith(('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): # output dir shutil.rmtree(out) # delete dir os.makedirs(out) # make new dir half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # TODO: merge classes with same coordinates bbox_cls = {} for *xyxy, conf, cls in reversed(det): xyxy = list(map(float, xyxy)) coord_str = ','.join(list(map(str, xyxy))) bbox_cls.setdefault(coord_str, []).append([int(cls), float(conf)]) # print('\nbefore merge: ', bbox_cls) # TODO: merge bboxes based on IOU bbox_cls_copy = {} while len(bbox_cls.keys()) > 0: keys = list(bbox_cls.keys()) merge_lst = [] box = list(map(float, keys[0].split(','))) for j in range(1, len(keys)): boxj = list(map(float, keys[j].split(','))) iou = box_iou(torch.Tensor([box]), torch.Tensor([boxj])) if iou > 0.8: merge_lst.append([j, boxj, bbox_cls[keys[j]]]) if len(merge_lst) > 0: coords = [box, ] values = bbox_cls[keys[0]] for j, boxj, valj in merge_lst: coords.append(boxj) values.extend(valj) del bbox_cls[keys[j]] new_coord = torch.Tensor(coords).mean(dim=0).tolist() new_key = ','.join(map(str, new_coord)) bbox_cls_copy[new_key] = values else: bbox_cls_copy[keys[0]] = bbox_cls[keys[0]] del bbox_cls[keys[0]] # print('after merge: ', bbox_cls_copy) # TODO: keep at most two classes for key, value in bbox_cls_copy.items(): xyxy = list(map(float, key.split(','))) value.sort(key=lambda x: x[1], reverse=True) # based on conf cls1, conf = value[0] cls2 = -1 while len(value) > 1: if (cls1 in [0, 1, 2] and value[1][0] in [3, 4, 5]) or \ (cls1 in [3, 4, 5] and value[1][0] in [0, 1, 2]): cls2 = value[1][0] break del value[1] if save_txt: # Write to file xywh = (xyxy2xywh(torch.Tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls1, cls2, conf, *xywh) if opt.save_conf else (cls1, cls2, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) if save_img or view_img: # Add bbox to image if cls2 == -1: # cls_str = names[int(cls1)] cls_str = str(cls1) else: # cls_str = ','.join([names[int(cls1)], names[int(cls2)]]) cls_str = ','.join(sorted([str(cls1), str(cls2)])) label = cls_str + ' %.2f' % conf plot_one_box(xyxy, im0, label=label, color=colors[int(cls1)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections listOfObjectsDetected = [] for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): listOfObjectsDetected.append(names[int(cls)]) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) kitchenObjects, livingroomObjects, bathroomObjects, bedroomObjects = read_rooms( ) kitchen_score = len( set(kitchenObjects) & set(listOfObjectsDetected)) livingroom_score = len( set(livingroomObjects) & set(listOfObjectsDetected)) bathroom_score = len( set(bathroomObjects) & set(listOfObjectsDetected)) bedroom_score = len( set(bedroomObjects) & set(listOfObjectsDetected)) rooms = { 'Kitchen': kitchen_score, 'Livingroom': livingroom_score, 'Bathroom': bathroom_score, 'Bedroom': bedroom_score } textToOverlay = 'Room Detected: ' + str( max(rooms, key=rooms.get)) cv2.putText( im0, textToOverlay, (int(im0.shape[1] * 0.7), int(im0.shape[0] * 0.05)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 1, cv2.LINE_AA) print(textToOverlay) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(source, weights, conf_thres=0.25, imgsz=640, iou_thres=0.45, classes=None, device=''): # Initialize set_logging() device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=True)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=True) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) print('Done. (%.3fs)' % (time.time() - t0)) return im0
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA half = False # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # model = Darknet('cfg/prune_0.8_yolov3-spp.cfg', (opt.img_size, opt.img_size)).to(device) # initialize_weights(model) # model.load_state_dict(torch.load('weights/prune_0.8_yolov3-spp-ultralytics.pt')['model']) # model.eval() # stride = [8, 16, 32] # imgsz = check_img_size(imgsz, s=max(stride)) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None # if webcam: # view_img = True # cudnn.benchmark = True # set True to speed up constant image size inference # dataset = LoadStreams(source, img_size=imgsz) # else: # save_img = True # dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # names = ['1', '2'] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once videopath_list = ( # '2020double_company', # '2020double_company_1', # 'child_79_company', # 'child_137_huaxia', # 'child_137_huaxia_1', # 'double_54_zhuhai', # 'double_54_zhuhai_1', # 'double_59_huaxiaxueyuan', # 'double_59_huaxiaxueyuan_1', # 'double_990_close_company', # 'double_beijing', # 'double_beijing_1', 'single_28_huaxia', # 'single_28_huaxia_2', # 'single_897_yinchuan', # 'single_897_yinchuan_2', # 'single_1000_beijng_shoudu', # 'single_1000_guangzhjou', # 'single_1000_wuhan', ) video_dir_pass = [ 'single_1000_beijng_shoudu_kuan', 'single_1000_wuhan_kuan', ] # video_path='/home/lishuang/Disk/shengshi_data/video_test_split_all/single_1000_beijng_shoudu_test_frame' video_dir_path = '/home/lishuang/Disk/shengshi_data/video_test_split_all' video_paths = os.listdir(video_dir_path) for video_dir in video_paths: if video_dir not in videopath_list: print(video_dir, " pass") continue video_path = os.path.join(video_dir_path, video_dir) # if video_dir !='double_54_zhuhai': # continue csv_path = os.path.join(video_dir_path, 'video_test_csv', f'{video_dir}_video_cut.csv') # csv_path = os.path.join(os.path.join(videopath, ".."), f'{basedirname}_video_cut.csv') video_name = [] video_name_dic = {} with open(csv_path) as f: lines = f.readlines()[1:] for line in lines: line = line.rstrip() items = line.split(',') video_name.append(items[1]) video_name_dic[items[1]] = [ items[2], items[3], items[4], items[5] ] if os.path.isdir(video_path): video_files = os.listdir(video_path) alarmvideo_list = {} for video_file in video_files: if video_file != '616643FEF1380C0E_2019-10-19-11-37-49-812_passenger_00000061_2.mp4': continue if video_file[:-4] not in video_name_dic: continue videosource = os.path.join(video_path, video_file) # if len(os.listdir(videosource))==0: # continue save_img = True view_img = True videodataset = LoadImages(videosource, img_size=imgsz) video_file, extension = os.path.splitext(video_file) alarmvideo_list[video_file] = 0 frame_record = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] frame_num = 0 outvideo = str(Path(out) / video_dir / video_file) x1t, y1t, x2t, y2t = video_name_dic[video_file] ratio_width = 1 ratio_height = 1 x1t = int(x1t) * ratio_width x2t = int(x2t) * ratio_width y1t = int(y1t) * ratio_height y2t = int(y2t) * ratio_height if os.path.exists(outvideo): shutil.rmtree(outvideo) # delete output folder os.makedirs(outvideo) # make new output folder for path, img, im0s, vid_cap in videodataset: #one video img = torch.from_numpy(img).to(device) img = img.half() if half else img.float( ) # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) boxnum = 0 boxnumbody = 0 boxnumhead = 0 # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / video_dir / Path(p).name) # txt_path = str(Path(out) /video_dir/video_file/ Path(p).stem) + ('_%g' % videodataset.frame if videodataset.mode == 'video' else '') txt_path = str( Path(out) / video_dir / video_file / str(videodataset.frame)) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor( im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = ( det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)] ) # add to string # Write results for *xyxy, conf, cls in det: # if cls == 0: # # label = 'person' # boxnum += 1 # boxnumbody += 1 # elif cls == 1: # # label = 'head' # boxnumhead += 1 # if point_in_box(box_center, [x1, y1, x2, y2]): # boxnumhead += 1 * person_result['class'] == 2 # boxnumbody += 1 * person_result['class'] == 1 if save_txt: # Write to file xywh = (xyxy2xywh( torch.tensor(xyxy).view(1, 4)) / gn).view( -1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: x0, y0, w0, h0 = xywh h, w = im0.shape[:2] x0 *= w y0 *= h w0 *= w h0 *= h x1 = x0 - w0 / 2 y1 = y0 - h0 / 2 if point_in_box([x0, y0], [x1t, y1t, x2t, y2t]): boxnumhead += 1 * cls == 1 boxnumbody += 1 * cls == 0 f.write(('%s ' + '%.2g ' + '%d ' * 3 + '%d' + '\n') % (names[int(cls)], conf, x1, y1, w0, h0)) # label format # f.write(('%ss '+'%.2g ' * 5 + '\n') % (names[int(cls)], conf,*xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if videodataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int( vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int( vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) image_path = os.path.join( outvideo, str(videodataset.frame) + '.jpg') cv2.imwrite(image_path, im0) if boxnumbody > 1 or boxnumhead > 1: frame_record[frame_num % 10] = 1 else: frame_record[frame_num % 10] = 0 frame_num += 1 if alarmvideo_list[video_file] == 0 and sum( frame_record) > 7: alarmvideo_list[video_file] = 1 image_path = os.path.join( outvideo, str(videodataset.frame) + '_alarmvideo.jpg') cv2.imwrite(image_path, im0) file_data = "" for single_video in alarmvideo_list: file_data += str(single_video) + ', value: ' + str( alarmvideo_list[single_video]) + '\n' with open( f'{os.path.basename(video_path)}_video_result_{opt.conf_thres}.txt', 'a') as f: f.write(file_data) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(self, app, weights, source='data/images', image_size=736, debug=False): """ Detects the image or video of the given source by using the specified weights. Parameters ---------- weights: str Weights path. source: str Source of the detection. 0 for webcam. image_size: int Inference size (pixels). debug: bool Whether the debug mode is on. """ print(f'Detecting using weights: {weights}') source = str(source) imgsz = image_size opt_project = 'runs/detect' opt_name = 'exp' opt_exist_ok = False opt_device = '' opt_augment = False opt_conf_thres = 0.25 opt_iou_thres = 0.01 opt_classes = 0 opt_agnostic_nms = True opt_save_conf = False webcam = False if source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')): print(source) webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) # Initialize set_logging() device = select_device(opt_device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) print("webcam") # debug logging.debug("webcam") else: dataset = LoadImages(source, img_size=imgsz) print("image") # debug # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once logging.debug("start inference") for path, img, im0s, vid_cap in dataset: if self.detection is False and webcam is True: logging.debug("kill thread") dataset.kill_thread() break img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt_augment)[0] # Apply NMS pred = non_max_suppression(pred, opt_conf_thres, opt_iou_thres, classes=opt_classes, agnostic=opt_agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr( dataset, 'frame', 0) #p = Path(p) # to Path s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string # Write results for *xyxy, conf, cls in reversed(det): logging.debug('Prediction: {}; Confidence {}'.format( f'{names[int(cls)]}', f'{conf:.2f}')) label = ''.join( map(lambda x: x if x.islower() else ' ' + x, names[int(cls)])) label = f'{label} {conf:.2f}' if debug else label if debug: plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) plot_one_point(xyxy, im0, label=label, color=colors[int(cls)], point_thickness=None, r=10) app.image = QImage(bytearray(im0), im0.shape[1], im0.shape[0], QImage.Format_RGB888).rgbSwapped() app.Label_Bild.setPixmap(QPixmap(app.image)) time.sleep(1 / 60) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') logging.debug(f'Done. ({time.time() - t0:.3f}s)') print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): ''' input: save_img_flag output(result): ''' # 获取输出文件夹,输入路径,权重,参数等参数 out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') # Initialize set_logging() # 获取设备 device = select_device(opt.device) # 移除之前的输出文件夹,并新建输出文件夹 if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # 如果设备为gpu,使用Float16 half = device.type != 'cpu' # half precision only supported on CUDA # Load model # 加载Float32模型,确保用户设定的输入图片分辨率能整除最大步长s=32(如不能则调整为能整除并返回) ''' model = Model( (model): Sequential( (0): Focus(...) (1): Conv(...) ... (24): Detect(...) ) ''' model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # 设置Float16 if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader # 通过不同的输入源来设置不同的数据加载方式 vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors # 获取类别名字 names = ['person', 'bicycle', 'car',...,'toothbrush'] names = model.module.names if hasattr(model, 'module') else model.names # 设置画框的颜色 colors = [[178, 63, 143], [25, 184, 176], [238, 152, 129],....,[235, 137, 120]]随机设置RGB颜色 colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() # 进行一次前向推理,测试程序是否正常 向量维度(1,3,imgsz,imgsz) img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once """ path 图片/视频路径 'E:\...\bus.jpg' img 进行resize+pad之后的图片 1*3*re_size1*resize2的张量 (3,img_height,img_weight) img0 原size图片 (img_height,img_weight,3) cap 当读取图片时为None,读取视频时为视频源 """ for path, img, im0s, vid_cap in dataset: print(img.shape) img = torch.from_numpy(img).to(device) # 图片也设置为Float16 img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size的话则在最前面添加一个轴 if img.ndimension() == 3: # (in_channels,size1,size2) to (1,in_channels,img_height,img_weight) img = img.unsqueeze(0) # 在[0]维增加一个维度 # Inference t1 = time_synchronized() """ model: input: in_tensor (batch_size, 3, img_height, img_weight) output: 推理时返回 [z,x] z tensor: [small+medium+large_inference] size=(batch_size, 3 * (small_size1*small_size2 + medium_size1*medium_size2 + large_size1*large_size2), nc) x list: [small_forward, medium_forward, large_forward] eg:small_forward.size=( batch_size, 3种scale框, size1, size2, [xywh,score,num_classes]) ''' 前向传播 返回pred[0]的shape是(1, num_boxes, nc) h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w num_boxes = 3 * h/32 * w/32 + 3 * h/16 * w/16 + 3 * h/8 * w/8 pred[0][..., 0:4] 预测框坐标为xywh(中心点+宽长)格式 pred[0][..., 4]为objectness置信度 pred[0][..., 5:5+nc]为分类结果 pred[0][..., 5+nc:]为Θ分类结果 """ # pred : (batch_size, num_boxes, no) batch_size=1 pred = model(img, augment=opt.augment)[0] # Apply NMS # 进行NMS # pred : list[tensor(batch_size, num_conf_nms, [xylsθ,conf,classid])] θ∈[0,179] #pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) pred = rotate_non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms, without_iouthres=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate( pred ): # i:image index det:(num_nms_boxes, [xylsθ,conf,classid]) θ∈[0,179] if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) # 图片保存路径+图片名字 txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') #print(txt_path) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :5] = scale_labels(img.shape[2:], det[:, :5], im0.shape).round() # Print results det:(num_nms_boxes, [xylsθ,conf,classid]) θ∈[0,179] for c in det[:, -1].unique( ): # unique函数去除其中重复的元素,并按元素(类别)由大到小返回一个新的无元素重复的元组或者列表 n = (det[:, -1] == c ).sum() # detections per class 每个类别检测出来的素含量 s += '%g %ss, ' % (n, names[int(c)] ) # add to string 输出‘数量 类别,’ # Write results det:(num_nms_boxes, [xywhθ,conf,classid]) θ∈[0,179] for *rbox, conf, cls in reversed( det): # 翻转list的排列结果,改为类别由小到大的排列 # rbox=[tensor(x),tensor(y),tensor(w),tensor(h),tsneor(θ)] θ∈[0,179] # if save_txt: # Write to file # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) classname = '%s' % names[int(cls)] conf_str = '%.3f' % conf rbox2txt(rbox, classname, conf_str, Path(p).stem, str(out + '/result_txt/result_before_merge')) #plot_one_box(rbox, im0, label=label, color=colors[int(cls)], line_thickness=2) plot_one_rotated_box(rbox, im0, label=label, color=colors[int(cls)], line_thickness=1, pi_format=False) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results 播放结果 if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) pass else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print(' Results saved to %s' % Path(out)) print(' All Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, imgsz = \ opt.output, opt.source, opt.weights, opt.img_size # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) #if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Write results img2 = im0.copy() nperson = [] nname = [] for *xyxy, conf, cls in reversed(det): if save_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) ######################################################################################################## ##classes 변수 생성 (이름) classes = names[int(cls)] ##classes 변수 함수에 추가 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, classes=classes) ##사람이라고 판단한 물체의 각 좌표 리스트에 저장 if classes == 'person': nperson.append([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) if classes == 'name_tag': nname.append([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) ##사람이 아닌 리스트의 크기가 0보다 클 때 미리 복사해둔 프레임의 구역으로 이미지 덮기 if len(nname) > 0: for ii in range(len(nname)): for pi in range(len(nperson)): if nname[ii][1] >= nperson[pi][1] and nname[ii][ 3] <= nperson[pi][3] and nname[ii][ 0] >= nperson[pi][0] and nname[ii][ 2] <= nperson[pi][2]: proi = img2[nname[ii][1]:nname[ii][3], nname[ii][0]:nname[ii][2]] cv2.imwrite( "./temp/{0}_{1}_{2}_{3}.jpg".format( nname[ii][1], nname[ii][3], nname[ii][0], nname[ii][2]), proi) roi = img2[nperson[pi][1]:nperson[pi][3], nperson[pi][0]:nperson[pi][2]] im0[nperson[pi][1]:nperson[pi][3], nperson[pi][0]:nperson[pi][2]] = roi ######################################################################################################## # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Save results (image with detections) if save_img: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( './inference/output/output.mp4', cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0)
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) print('reading in Deep Sort Readings') DSOutput = pd.read_csv('/content/outputs.txt', sep=' ', header=None) print('Successfully read in Deep Sort Reading') print(DSOutput) history = defaultdict(list) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size # print(imgsz) if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for frameNumber, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) print('pred shape', len(pred)) # Process detections for i, det in enumerate(pred): # detections per image print('det shape', det.shape) if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): num_cat = 6 classes = (det[:, -1].cpu().numpy()).astype(int) one_hot_cats = np.eye(num_cat)[classes].reshape(-1, num_cat) counts_per_cat = one_hot_cats.sum(axis=0) #print("Countspercat ", counts_per_cat) score = counts_per_cat[[1, 3, 5]].sum() / len(det) #ConfidenceMetric = weighted_counts_per_cat = one_hot_cats.T @ np.asarray( det[:, -2].cpu()) WeightedCompliance = weighted_counts_per_cat[[ 1, 3, 5 ]].sum() / weighted_counts_per_cat.sum() # print(score) # print(WeightedCompliance) # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # print(det) person_coords = DSOutput[DSOutput.iloc[:, 0] == frameNumber].values.reshape(-1, 11) CurrentFrameDetection = -1 * np.zeros(len(det)) if (len(person_coords != 0)): for itemp, mask_coord in enumerate(det): # overlaps = [Overlap(mask_coord[:4], person_coord, img.shape[2], img.shape[3]) for person_coord in person_coords[:,2:6]] overlaps = [ Overlap(mask_coord[:4].cpu(), person_coord, 10000, 10000) for person_coord in person_coords[:, 2:6] ] best_overlap = np.argmax(overlaps) best_person = person_coords[best_overlap, 1] history[best_person].append( mask_coord[-1].cpu().item()) CurrentFrameDetection[itemp] = best_person #print(frameNumber , history) # Print results #0 - no mask #1 - non medical full #2 - non medical partial #3 - medical full #4 medical partial #5 face shield for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class # print("n: " , n) # print("c: " , c) s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results #save_txt = True CurrentFrameDetection = list(reversed(CurrentFrameDetection)) for mask, (*xyxy, conf, cls) in enumerate(reversed(det)): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # print(xywh) line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, score, label=label, color=colors[int(cls)], personid=CurrentFrameDetection[mask], line_thickness=3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") compliance = 0 total = 0 for k, v in history.items(): # 1,3,5 are full # 2,4 are partial # 0 no good_frames = sum(np.array(v) % 2 == 1) bad_frames = sum(np.array(v) % 2 == 0) if len(v) > 4: total += 1 if good_frames >= bad_frames: compliance += 1 print('Person {} is compliant'.format(k)) else: print('Person {} is not compliant'.format(k)) print('overall compliance', compliance / total) print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # take a pic !! img_arr, depth_arr = realsense.get_image(show=False) #array RGB # print('shape=',depth_arr.shape) #(480, 640) print('central depth=', depth_arr[ 240, 320]) #depth at center in mm #should it be depth_arr[row,col]? x1, y1, x2, y2, depth_avg = 0, 0, 0, 0, 0 xyz_obj = np.array([0, 0, 0]) K = np.array([[609.674560546875, 0.0, 323.9862365722656], [0.0, 608.5648193359375, 227.5126495361328], [0.0, 0.0, 1.0]]) # intrinsic ? # convert img_pad = letterbox(img_arr[:, :, ::-1], new_shape=imgsz)[0] # first to BGR and padding img_pad = img_pad[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img_pad = np.ascontiguousarray(img_pad) # 将一个内存不连续存储的数组转换为内存连续存储的数组 # cv2.imwrite('letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image realsense_once = 1 # 1: take an image from realsense and infer without imwrite locally; # 0: infer all images from the path, including one taken from realsense if realsense_once == 0: cv2.imwrite('./inference/images/snap.jpg', cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)) #opencv assume BGR # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference !! t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once # infer one image from realsense if realsense_once: img = img_pad im0s = img_arr[:, :, ::-1] # BGR path = '/home/hanwen/test_ros_ws/src/yolov5_test/scripts/inference/images/snap.jpg' img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): # average depth x1, y1, x2, y2 = xyxy[0].cpu().numpy(), xyxy[1].cpu( ).numpy(), xyxy[2].cpu().numpy(), xyxy[3].cpu().numpy() print('x1,y1,x2,y2 = ', x1, y1, x2, y2) xc, yc = (x1 + x2) / 2, (y1 + y2) / 2 x1c, x2c, y1c, y2c = (x1 + xc) / 2, (x2 + xc) / 2, ( y1 + yc) / 2, (y2 + yc) / 2 depth_4samples = [ depth_arr[int(y1c), int(x1c)], depth_arr[int(y1c), int(x2c)], depth_arr[int(y2c), int(x1c)], depth_arr[int(y2c), int(x2c)] ] print('depth_4samples:', depth_4samples) depth_validsamples = [b for b in depth_4samples if b > 0] depth_avg = np.mean(depth_validsamples) print('depth for grasping =', depth_avg) xyz_obj = np.dot(np.linalg.inv(K), depth_avg * np.array([xc, yc, 1]).transpose() ) # estimated object center-XYZ (mm) xyz_obj = xyz_obj.transpose() if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) im0 = np.array( im0 ) # this fix the error https://github.com/opencv/opencv/issues/18120 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # error # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) #done with one det # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) # realsense_once=0, use the default dataloader else: for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): # average depth x1, y1, x2, y2 = xyxy[0].cpu().numpy(), xyxy[1].cpu( ).numpy(), xyxy[2].cpu().numpy(), xyxy[3].cpu().numpy( ) print('x1,y1,x2,y2 = ', x1, y1, x2, y2) xc, yc = (x1 + x2) / 2, (y1 + y2) / 2 x1c, x2c, y1c, y2c = (x1 + xc) / 2, (x2 + xc) / 2, ( y1 + yc) / 2, (y2 + yc) / 2 depth_4samples = [ depth_arr[int(y1c), int(x1c)], depth_arr[int(y1c), int(x2c)], depth_arr[int(y2c), int(x1c)], depth_arr[int(y2c), int(x2c)] ] print('depth_4samples:', depth_4samples) depth_validsamples = [ b for b in depth_4samples if b > 0 ] depth_avg = np.mean(depth_validsamples) print('depth for grasping =', depth_avg) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) #done with one det # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('All Done. (%.3fs)' % (time.time() - t0)) #done with all imgs return x1, y1, x2, y2, xyz_obj[0], xyz_obj[1], xyz_obj[2], depth_avg
def detect(save_img=False): plate_detect_model = os.path.join(pwd, 'models', 'best.pt') corner_detect_model = os.path.join(pwd, 'models', 'corner_epoch_227_valoss_0.000117.pt') recognition_crnn_model = os.path.join(pwd, 'models', 'checkpoint_123_acc_0.9940.pth') corner_model = cornernet() corner_model.eval() # 验证模式 out, weights, view_img, save_txt, imgsz = \ opt.output, opt.weights, opt.view_img, opt.save_txt, opt.img_size generate_crnn_trainset = False if generate_crnn_trainset: source = os.path.join(pwd, 'CCPD2019', 'ccpd_base') save_path_train = os.path.join(pwd, 'data', 'crnn', 'warpimg') save_path_test = os.path.join(pwd, 'data', 'crnn', 'test') ftrain = open( os.path.join(pwd, 'CRNN_Chinese_Characters_Rec', 'lib', 'train_own.txt'), 'w') ftest = open( os.path.join(pwd, 'CRNN_Chinese_Characters_Rec', 'lib', 'test_own.txt'), 'w') if not os.path.exists(save_path_train): os.makedirs(save_path_train) if not os.path.exists(save_path_test): os.makedirs(save_path_test) all_length = len(os.listdir(source)) else: # source = os.path.join(pwd, 'data', 'stage1', 'test') source = r'/data/CCPD2019/ccpd_challenge' print('image\' length is: ', len(os.listdir(source))) webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(plate_detect_model, map_location=device) # load FP32 model corner_model_state = torch.load(corner_detect_model, map_location=device) corner_model.load_state_dict(corner_model_state['model_state_dict']) print('loaded the plate detected model:', plate_detect_model) print('loaded the corner detected model:', corner_detect_model) crnn_train_data = r'' alphabet = '0123456789abcdefghjklmnpqrstuvwxyz云京冀吉宁川新晋桂沪津浙渝湘琼甘皖粤苏蒙西豫贵赣辽鄂闽陕青鲁黑' converter = utils.strLabelConverter(alphabet) nclass = len(alphabet) + 1 # crnn_model = crnn.CRNN(32, 1, nclass, 190) if not generate_crnn_trainset: crnn_model = crnn.CRNN(32, 1, nclass, 256) checkpoint = torch.load(recognition_crnn_model) print('loaded the corner recognition model:', recognition_crnn_model) if 'state_dict' in checkpoint.keys(): crnn_model.load_state_dict(checkpoint['state_dict']) else: crnn_model.load_state_dict(checkpoint) if torch.cuda.is_available(): model = model.cuda() corner_model = corner_model.cuda() if not generate_crnn_trainset: crnn_model = crnn_model.cuda() crnn_model.eval() corner_model.eval() model.eval() imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once count = 0 n_correct = 0 false_img = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections count += 1 for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) imgple = im0[int(xyxy[1]):int(xyxy[3]), int(xyxy[0]):int(xyxy[2]), :] if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label='', color=colors[int(cls)], line_thickness=3) warp_img, im0 = corner_detection(corner_model, imgple, im0, xyxy, colors, cls, c1, c2) if generate_crnn_trainset: name = save_path.split(os.sep)[-1] string = name.replace('.jpg', '').replace( '.png', '').replace('&', '_').replace('-', '_').split('_') platestring = chinesechar_reverse[int(string[15])] for ij in string[16:-2]: platestring += alphaenglish_reverse[int(ij)] platestring = platestring.upper() if count < 3800: ftest.write( os.path.join(save_path_test, name) + ' ' + platestring + '\n') cv2.imwrite(os.path.join(save_path_test, name), warp_img) else: ftrain.write( os.path.join(save_path_train, name) + ' ' + platestring + '\n') cv2.imwrite(os.path.join(save_path_train, name), warp_img) else: try: name = save_path.split(os.sep)[-1].replace( '.jpg', '').replace('.png', '').replace( '&', '_').replace('-', '_').split('_') platestring = chinesechar_reverse[int(name[15])] for ij in name[16:-2]: platestring += alphaenglish_reverse[int(ij)] platestring = platestring.upper() except: pass # sim_pred = recognition(warp_img, crnn_model, converter) sim_pred = sim_pred.upper() # try: if sim_pred == platestring: print() n_correct += 1 else: false_img.append(platestring) cv2.imwrite(save_path, warp_img) # print('gt: ', platestring, ',predict: ', sim_pred, ' ', sim_pred == platestring, ' ', n_correct, # ' ', count) except: pass # sim_pred = sim_pred[:2] + ' ' + sim_pred[2:] print('predict: ', sim_pred) img_PIL = Image.fromarray(im0).convert('RGB') font = ImageFont.truetype( '/usr/share/fonts/truetype/wqy/wqy-microhei.ttc', 50) #('simsun.ttc', 50) # 字体颜色 fillColor = (255, 0, 0) draw = ImageDraw.Draw(img_PIL) draw.text(( c1[0], c1[1] - 39, ), sim_pred, font=font, fill=fillColor) im0 = np.asarray(img_PIL) # Print time (inference + NMS) # print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img and (not generate_crnn_trainset): if dataset.mode == 'images': # pass cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) if generate_crnn_trainset: print('生成的CRNN训练集数量:{},测试集数量是:{}'.format( len(os.listdir(save_path_train)), len(os.listdir(save_path_test)))) else: try: print('n_correct: {},count: {},test accuray: '.format( n_correct, count, n_correct / count)) print('false_image', false_img) except: pass if generate_crnn_trainset: ftrain.close() ftest.close() print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) file_name = [] file_code = [] # result = dict() # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results x_value = dict() for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh cls = torch.tensor(cls).tolist() x_value[xywh[0]] = int(cls) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') file_name.append(os.path.split(path)[-1]) res = '' for key in sorted(x_value): res += str(x_value[key]) file_code.append(res) save_csv_path = str( os.getcwd()) + '\\' + str(save_dir) + '\\submission.csv' print(save_csv_path) sub = pd.DataFrame({"file_name": file_name, 'file_code': file_code}) sub.to_csv(save_csv_path, index=False) print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names if opt.fixed_colors: rng = random.RandomState(seed=1825) colors = [[rng.randint(0, 255) for _ in range(3)] for _ in names] else: colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = Path( path[i]), '%g: ' % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = Path(path), '', im0s, getattr( dataset, 'frame', 0) save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=opt.line_thickness) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): logging.basicConfig(filename='detect.log', level=logging.INFO) source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) imglist = opt.imlist source_list = source.split('\n') # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # save_dir = Path(increment_path(Path(opt.project) / opt.name)) # increment run # (save_dir / 'labels' if save_txt else save_dir).mkdir # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = False if imglist: dataset_list = [] for i in range(len(source_list)): print("source_list : ", source_list[i]) try: dataset_list.append(LoadImages(source_list[i], img_size=imgsz, stride=stride)) except: print("error!!!!!: ", source_list[i]) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() #sujin if imglist: j = len(dataset_list) print("imglist True, j = ", j) else: j = 1 print("imglist False, j = ", j) count = 0 for k in range(j): if imglist: dataset = dataset_list[k] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) prevTime = 0 # Process detections for i, det in enumerate(pred): # detections per image curTime = time.time() * 1000 sec = curTime - prevTime prevTime = curTime #이전 시간을 현재시간으로 다시 저장시킴 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count fps_ = 1/(sec) else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path print("p.name=", p.name) save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' pt_start = time.time()*1000 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) pt_end = time.time()*1000 if os.path.exists(result_dir) == False: os.makedirs(result_dir) with open(result_dir + p.name[4:-4] + '.txt', 'w') as f: xmin=(int(xyxy[0])) ymin=(int(xyxy[1])) xmax=(int(xyxy[2])) ymax=(int(xyxy[3])) h, w, bs = im0.shape print("bs h w = ",bs, h, w) absolute_x = xmin + 0.5 * (xmax - xmin) absolute_y = ymin + 0.5 * (ymax - ymin) absolute_width = xmax - xmin absolute_height = ymax - ymin x = str(absolute_x / w) y = str(absolute_y / h) width = str(absolute_width / w) height = str(absolute_height / h) f.write(str(int(cls))+ " " + x + " " + y + " " + width + " " + height) count += 1 else: tl = 3 or round(0.002 * (im0.shape[0] + im0.shape[1]) / 2) + 1 # line/font thickness tf = max(tl - 1, 1) # font thickness # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')