def forward_once(self, x, profile=False): y, dt = [], [] # outputs for m in self.model: if m.f != -1: # if not from previous layer x = y[m.f] if isinstance( m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers if profile: o = thop.profile(m, inputs=( x, ), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS t = time_synchronized() for _ in range(10): _ = m(x) dt.append((time_synchronized() - t) * 100) if m == self.model[0]: logger.info( f"{'time (ms)':>10s} {'GFLOPS':>10s} {'params':>10s} {'module'}" ) logger.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') x = m(x) # run y.append(x if m.i in self.save else None) # save output if profile: logger.info('%.1fms total' % sum(dt)) return x
def forward_from(self, x, cut_layer=4, profile=False): y, dt = [], [] # outputs for m in self.model: if m.i < cut_layer: y.append(None) continue if m.f != -1: # if not from previous layer x = y[m.f] if isinstance( m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers if profile: o = thop.profile(m, inputs=( x, ), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS t = time_synchronized() for _ in range(10): _ = m(x) dt.append((time_synchronized() - t) * 100) print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) x = m(x) # run y.append(x if m.i in self.save else None) # save output if profile: print('%.1fms total' % sum(dt)) return x
def image_track(self, im0): """ :param im0: original image, BGR format :return: """ # preprocess ************************************************************ # Padded resize img = letterbox(im0, new_shape=self.img_size)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) # numpy to tensor img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) s = '%gx%g ' % img.shape[2:] # print string # Detection time ********************************************************* # Inference t1 = time_synchronized() with torch.no_grad(): pred = self.detector( img, augment=self.args.augment)[0] # list: bz * [ (#obj, 6)] # Apply NMS and filter object other than person (cls:0) pred = non_max_suppression(pred, self.args.conf_thres, self.args.iou_thres, classes=self.args.classes, agnostic=self.args.agnostic_nms) t2 = time_synchronized() # get all obj ************************************************************ det = pred[0] # for video, bz is 1 if det is not None and len( det): # det: (#obj, 6) x1 y1 x2 y2 conf cls # Rescale boxes from img_size to original im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results. statistics of number of each obj for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, self.names[int(c)]) # add to string bbox_xywh = xyxy2xywh(det[:, :4]).cpu() confs = det[:, 4:5].cpu() # ****************************** deepsort **************************** outputs = self.deepsort.update(bbox_xywh, confs, im0) # (#ID, 5) x1,y1,x2,y2,track_ID else: outputs = torch.zeros((0, 5)) t3 = time.time() return outputs, t2 - t1, t3 - t2
def forward(self, imgs, size=640, augment=False, profile=False): # Inference from various sources. For height=720, width=1280, RGB images example inputs are: # filename: imgs = 'data/samples/zidane.jpg' # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: = np.zeros((720,1280,3)) # HWC # torch: = torch.zeros(16,3,720,1280) # BCHW # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images t = [time_synchronized()] p = next(self.model.parameters()) # for device and type if isinstance(imgs, torch.Tensor): # torch return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images shape0, shape1, files = [], [], [] # image and inference shapes, filenames for i, im in enumerate(imgs): if isinstance(im, str): # filename or uri im, f = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im), im # open im.filename = f # for uri files.append(Path(im.filename).with_suffix('.jpg').name if isinstance(im, Image.Image) else f'image{i}.jpg') im = np.array(im) # to numpy if im.shape[0] < 5: # image in CHW im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im # update shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 t.append(time_synchronized()) with torch.no_grad(), amp.autocast(enabled=p.device.type != 'cpu'): # Inference y = self.model(x, augment, profile)[0] # forward t.append(time_synchronized()) # Post-process y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) t.append(time_synchronized()) return Detections(imgs, y, files, t, self.names, x.shape)
def get_predictions(self, img): """ Get predictions """ img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # unit8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(img, augment=self.opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) t2 = time_synchronized() prediction_time = t2 - t1 return img, pred, prediction_time
def predict(self, img, im0s): img = torch.from_numpy(img).to(self._device) img = img.half() if self._half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self._model(img)[0] # Apply NMS det = non_max_suppression(pred, self._conf_thres, self._iou_thres, classes=self._classes)[0] t2 = time_synchronized() logging.info('Inference time: {:.3f}s'.format(t2 - t1)) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0s.shape).round() return det
def get_yolo_roi(img_path, model, device, dataset_name): # 面积大于阈值 if dataset_name == "ped2": min_area_thr = 10*10 elif dataset_name == "avenue": min_area_thr = 30*30 elif dataset_name == "shanghaiTech": min_area_thr = 8*8 else: raise NotImplementedError dataset = LoadImages(img_path, img_size=640) for path, img, im0s, vid_cap in dataset: p, s, im0 = Path(path), '', im0s # print(device) img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img)[0] # Apply NMS pred = non_max_suppression(pred, 0.25, 0.45) # Process detections for i, det in enumerate(pred): # detections per image if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class # results bboxs = [] for *xyxy, conf, cls in reversed(det): box = [int(x.cpu().item()) for x in xyxy] if (box[3]-box[1]+1)*(box[2]-box[0]+1) > min_area_thr: bboxs.append( tuple(box) ) return bboxs
def detect(self,img,model,stride,device,imgsz): names = model.module.names if hasattr(model, 'module') else model.names # t0 = time.time() im0s = img.copy() img = letterbox(im0s, imgsz, stride=stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) half = device.type != "cpu" # half precision only supported on CUDA img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference # t1 = time_synchronized() pred = model(img, augment=True)[0] # print(pred) # Apply NMS pred = non_max_suppression(pred, 0.60, 0.5, classes=[0,2,3,5,7], agnostic=True) t2 = time_synchronized() xywhs,labels,xyxys,confs = [],[],[],[] for i, det in enumerate(pred): im0 = im0s.copy() if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for *xyxy, conf, cls in reversed(det): label = f'{names[int(cls)]}' xywh = self.bbox_rel(*xyxy) xyxys.append(xyxy) xywhs.append(xywh) labels.append(label) confs.append([conf.item()]) # print(labels) return xyxys,xywhs,labels,confs,im0
def test(data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_conf=False, plots=True): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) save_txt = opt.save_txt # save *.txt labels # Remove previous if os.path.exists(save_dir): shutil.rmtree(save_dir) # delete dir os.makedirs(save_dir) # make new dir if save_txt: out = save_dir / 'autolabels' if os.path.exists(out): shutil.rmtree(out) # delete dir os.makedirs(out) # make new dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = model.names if hasattr(model, 'names') else model.module.names coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh x = pred.clone() x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in x: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, conf, *xywh) if save_conf else (cls, *xywh) # label format with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and save_dir and batch_i < 1: f = save_dir / f'test_batch{batch_i}_gt.jpg' # filename plot_images(img, targets, paths, str(f), names) # ground truth f = save_dir / f'test_batch{batch_i}_pred.jpg' plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=os.path.join(save_dir, 'precision-recall_curve.png')) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights file = save_dir / f"detections_val2017_{w}_results.json" # predicted annotations file print('\nCOCO mAP with pycocotools... saving %s...' % file) with open(file, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(str(file)) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def mainFunc(args): # Set the main function flag print("Main Function Start...") # Check the GPU device print("Number of available GPUs: {}".format(torch.cuda.device_count())) # Check whether using the distributed runing for the network is_distributed = initDistributed(args) master = True if is_distributed and os.environ["RANK"]: master = int( os.environ["RANK"]) == 0 # check whether this node is master node # Configuration for device setting set_logging() if is_distributed: device = torch.device('cuda:{}'.format(args.local_rank)) else: device = select_device(args.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load the configuration config = loadConfig(args.config) # CuDNN related setting if torch.cuda.is_available(): cudnn.benchmark = config.DEVICE.CUDNN.BENCHMARK cudnn.deterministic = config.DEVICE.CUDNN.DETERMINISTIC cudnn.enabled = config.DEVICE.CUDNN.ENABLED # Configurations for dirctories save_img, save_dir, source, yolov5_weights, view_img, save_txt, imgsz = \ False, Path(args.save_dir), args.source, args.weights, args.view_img, args.save_txt, args.img_size webcam = source.isnumeric() or source.startswith( ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') if save_dir == Path('runs/detect'): # if default os.makedirs('runs/detect', exist_ok=True) # make base save_dir = Path(increment_dir(save_dir / 'exp', args.name)) # increment run os.makedirs(save_dir / 'labels' if save_txt else save_dir, exist_ok=True) # make new dir # Load yolov5 model for human detection model_yolov5 = attempt_load(config.MODEL.PRETRAINED.YOLOV5, map_location=device) imgsz = check_img_size(imgsz, s=model_yolov5.stride.max()) # check img_size if half: model_yolov5.half() # to FP16 # Second-stage classifier classify = False if classify: model_classifier = load_classifier(name='resnet101', n=2) # initialize model_classifier.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights model_classifier.to(device).eval() # Load resnet model for human keypoints estimation model_resnet = eval('pose_models.' + config.MODEL.NAME.RESNET + '.get_pose_net')(config, is_train=False) if config.EVAL.RESNET.MODEL_FILE: print('=> loading model from {}'.format(config.EVAL.RESNET.MODEL_FILE)) model_resnet.load_state_dict(torch.load(config.EVAL.RESNET.MODEL_FILE), strict=False) else: print('expected model defined in config at EVAL.RESNET.MODEL_FILE') model_resnet.to(device) model_resnet.eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) pose_transform = transforms.Compose( [ # input transformation for 2d human pose estimation transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Get names and colors names = model_yolov5.module.names if hasattr( model_yolov5, 'module') else model_yolov5.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Construt filters for filtering 2D/3D human keypoints # filters_2d = constructFilters((1,16,2), freq=25, mincutoff=1, beta=0.01) # for test # filters_3d = constructFilters((1,16,3), freq=25, mincutoff=1, beta=0.01) # Run the yolov5 and resnet for 2d human pose estimation # with torch.no_grad(): # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model_yolov5(img.half() if half else img ) if device.type != 'cpu' else None # run once # Process every video frame for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred_boxes = model_yolov5(img, augment=args.augment)[0] # Apply NMS pred_boxes = non_max_suppression(pred_boxes, args.conf_thres, args.iou_thres, classes=args.classes, agnostic=args.agnostic_nms) t2 = time_synchronized() # Can not find people and move to next frame if pred_boxes[0] is None: # show the frame with no human detected cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL) cv2.imshow("2D Human Pose Estimation", im0s[0].copy()) # wait manual operations # with kb.Listener(on_press=on_press) as listener: # listener.join() # return # if kb.is_pressed('t'): # return print("No Human Detected and Move on.") print("-" * 30) continue # Print time (inference + NMS) detect_time = t2 - t1 detect_fps = 1.0 / detect_time print("Human Detection Time: {}, Human Detection FPS: {}".format( detect_time, detect_fps)) # Apply Classifier if classify: # false pred_boxes = apply_classifier(pred_boxes, model_classifier, img, im0s) # Estimate 2d human pose(multiple person) centers = [] scales = [] for id, boxes in enumerate(pred_boxes): if boxes is not None and len(boxes): boxes[:, :4] = scale_coords(img.shape[2:], boxes[:, :4], im0s[id].copy().shape).round() # convert tensor to list format boxes = np.delete(boxes.cpu().numpy(), [-2, -1], axis=1).tolist() for l in range(len(boxes)): boxes[l] = [tuple(boxes[l][0:2]), tuple(boxes[l][2:4])] # convert box to center and scale for box in boxes: center, scale = box_to_center_scale(box, imgsz, imgsz) centers.append(center) scales.append(scale) t3 = time_synchronized() pred_pose_2d = get_pose_estimation_prediction(config, model_resnet, im0s[0], centers, scales, transform=pose_transform, device=device) t4 = time_synchronized() # Print time (2d human pose estimation) estimate_time = t4 - t3 estimate_fps = 1.0 / estimate_time print("Pose Estimation Time: {}, Pose Estimation FPS: {}".format( estimate_time, estimate_fps)) # Filter the predicted 2d human pose(multiple person) t5 = time_synchronized() # if False: # for test if config.EVAL.RESNET.USE_FILTERS_2D: # construct filters for every keypoints of every person in 2D filters_2d = constructFilters(pred_pose_2d.shape, freq=1, mincutoff=1, beta=0.01) print("Shape of filters_2d: ({}, {}, {})".format( len(filters_2d), len(filters_2d[0]), len(filters_2d[0][0]))) # for test for per in range(pred_pose_2d.shape[0]): for kp in range(pred_pose_2d.shape[1]): for coord in range(pred_pose_2d.shape[2]): pred_pose_2d[per][kp][coord] = filters_2d[per][kp][ coord](pred_pose_2d[per][kp][coord]) t6 = time_synchronized() # Print time (filter 2d human pose) filter_time_2d = t6 - t5 filter_fps_2d = 1.0 / filter_time_2d print("Filter 2D Pose Time: {}, Filter 2D Pose FPS: {}".format( filter_time_2d, filter_fps_2d)) # Process detections and estimations in 2D for i, box in enumerate(pred_boxes): if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if box is not None and len(box): # Rescale boxes from img_size to im0 size box[:, :4] = scale_coords(img.shape[2:], box[:, :4], im0.shape).round() # Print results for c in box[:, -1].unique(): n = (box[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(box): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if args.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # Add bbox to image if save_img or view_img: label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Draw joint keypoints, number orders and human skeletons for every detected people in 2D for person in pred_pose_2d: # draw the human keypoints for idx, coord in enumerate(person): x_coord, y_coord = int(coord[0]), int(coord[1]) cv2.circle(im0, (x_coord, y_coord), 1, (0, 0, 255), 5) cv2.putText(im0, str(idx), (x_coord, y_coord), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2, cv2.LINE_AA) # draw the human skeletons in PACIFIC mode for skeleton in PACIFIC_SKELETON_INDEXES: cv2.line(im0, (int(person[skeleton[0]][0]), int(person[skeleton[0]][1])), (int(person[skeleton[1]][0]), int(person[skeleton[1]][1])), skeleton[2], 2) # Print time (inference + NMS + estimation) print('%sDone. (%.3fs)' % (s, t4 - t1)) # Stream results if view_img: detect_text = "Detect FPS:{0:0>5.2f}/{1:0>6.2f}ms".format( detect_fps, detect_time * 1000) estimate_text = "Estimate FPS:{0:0>5.2f}/{1:0>6.2f}ms".format( estimate_fps, estimate_time * 1000) cv2.putText(im0, detect_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(im0, estimate_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL) cv2.imshow("2D Human Pose Estimation", im0) if cv2.waitKey(1) & 0xFF == ord('q'): # q to quit return # goto .mainFunc # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) # Print time (inference + NMS + estimation + 2d filtering) all_process_time = t6 - t1 all_process_fps = 1.0 / all_process_time print("All Process Time: {}, All Process FPS: {}".format( all_process_time, all_process_fps)) print("-" * 30) # Goto label # label .mainFunc # Print saving results if save_txt or save_img: print('Results saved to %s' % save_dir) # Release video reader and writer, then destory all opencv windows dataset.vid_cap.release() vid_writer.release() cv2.destroyAllWindows() print('Present 2D Human Pose Inference Done. Total Time:(%.3f seconds)' % (time.time() - t0))
def detect(agnostic_nms, augment, classes, conf_thres, device, iou_thres, half, model, webcam, names, deepsort, path, img, im0s): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() outputs = [] # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s im_org = im0.copy() s += '%gx%g ' % img.shape[2:] # print string if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) return im0, outputs
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') array_detected_object = [] # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' peopleIn = 0 peopleOut = 0 detectedIds = [] for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] array_detected_object.append(identity) array_detected_object = list( dict.fromkeys(array_detected_object)) xas = 0 yas = 0 if identity >= 0: xas = bbox_xyxy[0][0] yas = bbox_xyxy[0][1] if identity not in detectedIds and int(bbox_top) >= 10: detectedIds.append(identity) if int(bbox_top) >= 500 or (int(bbox_left) >= 800 and int(bbox_top) >= 80): peopleOut += 1 if int(bbox_top) <= 100: peopleIn += 1 # with open(txt_path, 'a') as f: # f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, # bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format # f.write(('%g ' * 3 + '\n') % (identity, bbox_left, bbox_top)) # label format # resultText = str(identity) + '-' + str(bbox_top) # f.write(resultText + '\n') # label format # f.write(('%g ' * 4 + '\n') % (-1, frame_idx, -1, -1, str(xas), str(yas))) # label format # f.write(str(identity)) # f.write(('%g ' * 1 + '\n') % (identity)) # f.write('\n') # f.write(str(bbox_xyxy)) # f.write("Number people counted: " + str(len(array_detected_object))) # with open(txt_path, 'r') as fp: # line = fp.readline() # cnt = 1 # while line: # identity = line.split("-")[0] # infoCheck = line.split("-")[1] # if identity not in detectedIds: # detectedIds.append(identity) # if int(infoCheck) > 680 : # peopleOut += 1 # else: # peopleIn +=1 # print("Line {}: {}".format(cnt, line.strip().split("-")[0])) # line = fp.readline() # cnt += 1 # print("All people counted: " + str(peopleIn + peopleOut)) # print("Number people in: " + str(peopleIn)) # print("Number people out: " + str(peopleOut)) font = cv2.FONT_HERSHEY_DUPLEX cv2.putText(im0, "People in out counted: " + str(peopleIn + peopleOut), (50, 100), font, 0.8, (0, 255, 255), 2, font) cv2.putText(im0, "Number people in: " + str(peopleIn), (50, 135), font, 0.8, (0, 255, 255), 2, font) cv2.putText(im0, "Number people out: " + str(peopleOut), (50, 170), font, 0.8, (0, 255, 255), 2, font) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit txt_result = str(Path(out)) + '/result-counted.txt' print("All people counted: " + str(peopleIn + peopleOut)) print("Number people in: " + str(peopleIn)) print("Number people out: " + str(peopleOut)) with open(txt_path, 'a') as f: f.write("All people counted: " + str(peopleIn + peopleOut)) f.write("Number people in: " + str(peopleIn)) f.write("Number people out: " + str(peopleOut)) raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: # cv2.imshow(p, im0) cv2.imwrite("C:/Users/lenovo/Desktop/server/output/camera.jpg", im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIterationq # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc('X', '2', '6', '4'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(self, opt): print("before detect lock") self.qmut_1.lock() print("after detect lock") out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference self.t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): if not self.Consuming: # dataset.stop_cap() raise StopIteration img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if not self.Consuming: # dataset.stop_cap() raise StopIteration if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() n = 0 # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # 将当前帧的总人数发送给前端pyqt界面 self.sum_person.emit(n) self.msleep(30) bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] self.bbox_id.emit([bbox_xyxy, identities]) self.msleep(30) draw_boxes(im0, bbox_xyxy, identities) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: # self.detOut.emit(im0) self.queue.put(im0) # if self.queue.qsize() > 3: self.qmut_1.unlock() if self.queue.qsize() > 1: self.queue.get(False) self.queue.task_done() else: self.msleep(30) print('Done. (%.3fs)' % (time.time() - self.t0))
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)[ 'model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path_raw = str(Path(out)) + '/results_raw.txt' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) print(pred) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] clss = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) clss.append(cls.item()) bbox_xywh = bbox_xywh cls_conf = confs cls_ids = clss # xywhs = torch.Tensor(bbox_xywh) # confss = torch.Tensor(confs) # cls_ids = clss # if len(bbox_xywh) == 0: # continue # print("detection cls_ids:", cls_ids) #filter cls id for tracking # print("cls_ids") # print(cls_ids) # # select class # mask = [] # lst_move_life = [0,1,2] # # lst_for_track = [] # for id in cls_ids: # if id in lst_move_life: # # lst_for_track.append(id) # mask.append(True) # else: # mask.append() # # print("mask cls_ids:", mask) # # print(bbox_xywh) # bbox_xywh = list(compress(bbox_xywh,mask)) # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector # bbox_xywh[:,3:] *= 1.2 # cls_conf = list(compress(cls_conf,mask)) # print(cls_conf) bbox_xywh = torch.Tensor(bbox_xywh) cls_conf = torch.Tensor(cls_conf) # Pass detections to deepsort outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids) ''' TODO: 카운터 추가 요망 ''' # counting num and class # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, 4:5] cls_id = outputs[:,-1] # print(outputs[:,-1]) #--> 문제 발견 # print("track res cls_id:", cls_id) # cls_ids_show = [cls_ids[i] for i in cls_id] draw_boxes(im0, bbox_xyxy, cls_id, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[4] classname = output[5] with open(txt_path_raw, 'a') as f: # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물) f.write(('%g ' * 6 +'%g' *1 +'%g ' * 3 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, classname, -1, -1, -1)) # label format else: deepsort.increment_ages() # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): ct = CentroidTracker() out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time # Load model model = torch.load(weights, map_location=device)[ 'model'].float() # load to FP32 model.to(device).eval() # ============================================================================= filepath_mask = 'D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/yolov5/weights/mask.pt' model_mask = torch.load(filepath_mask, map_location = device)['model'].float() model_mask.to(device).eval() if half: model_mask.half() names_m = model_mask.module.names if hasattr(model_mask, 'module') else model_mask.names # ============================================================================= if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = False save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' memory = {} people_counter = 0 in_people = 0 out_people = 0 people_mask = 0 people_none = 0 time_sum = 0 # now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # ============================================================================= pred_mask = model_mask(img)[0] # ============================================================================= # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # ============================================================================= pred_mask = non_max_suppression(pred_mask, 0.4, 0.5, classes = [0, 1, 2], agnostic = None) if pred_mask is None: continue classification = torch.cat(pred_mask)[:, -1] if len(classification) == 0: print("----",None) continue index = int(classification[0]) mask_class = names_m[index] print("MASK CLASS>>>>>>> \n", mask_class) # ============================================================================= # Create the haar cascade # cascPath = "D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/haarcascade_frontalface_alt2.xml" # faceCascade = cv2.CascadeClassifier(cascPath) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) img_center_y = int(im0.shape[0]//2) # line = [(int(im0.shape[1]*0.258),int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))] # print("LINE>>>>>>>>>", line,"------------") # line = [(990, 672), (1072, 24)] line = [(1272, 892), (1800, 203)] # [(330, 468), (704, 468)] print("LINE>>>>>>>>>", line,"------------") cv2.line(im0,line[0],line[1],(0,0,255),5) # ============================================================================= # gray = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) # # Detect faces in the image # faces = faceCascade.detectMultiScale( # gray, # scaleFactor=1.1, # minNeighbors=5, # minSize=(30, 30) # ) # # Draw a rectangle around the faces # for (x, y, w, h) in faces: # cv2.rectangle(im0, (x, y), (x+w, y+h), (0, 255, 0), 2) # text_x = x # text_y = y+h # cv2.putText(im0, mask_class, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) # ============================================================================= if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] bbox_xyxy = [] rects = [] # Is it correct? # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) # label = f'{names[int(cls)]}' xyxy_list = torch.tensor(xyxy).view(1,4).view(-1).tolist() plot_one_box(xyxy, im0, label='person', color=colors[int(cls)], line_thickness=3) rects.append(xyxy_list) obj = [x_c, y_c, bbox_w, bbox_h,int(cls)] #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10) bbox_xywh.append(obj) # bbox_xyxy.append(rec) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = ct.update(rects) # xyxy # outputs = deepsort.update(xywhs, confss, im0) # deepsort index_id = [] previous = memory.copy() memory = {} boxes = [] names_ls = [] # draw boxes for visualization if len(outputs) > 0: # print('output len',len(outputs)) for id_,centroid in outputs.items(): # boxes.append([output[0],output[1],output[2],output[3]]) # index_id.append('{}-{}'.format(names_ls[-1],output[-2])) index_id.append(id_) boxes.append(centroid) memory[index_id[-1]] = boxes[-1] i = int(0) print(">>>>>>>",boxes) for box in boxes: # extract the bounding box coordinates # (x, y) = (int(box[0]), int(box[1])) # (w, h) = (int(box[2]), int(box[3])) x = int(box[0]) y = int(box[1]) # GGG if index_id[i] in previous: previous_box = previous[index_id[i]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) # (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (x,y) p1 = (x2,y2) cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point if intersect(p0, p1, line[0], line[1]): people_counter += 1 print('==============================') print(p0,"---------------------------",p0[1]) print('==============================') print(line[1][1],'------------------',line[0][0],'-----------------', line[1][0],'-------------',line[0][1]) # if p0[1] <= line[1][1]: # in_people +=1 # else: # # if mask_class == 'mask': # # print("COUNTING MASK..", mask_class) # # people_mask += 1 # # if mask_class == 'none': # # people_none += 1 # out_people +=1 if p0[1] >= line[1][1]: in_people += 1 if mask_class == 'mask': people_mask += 1 else: people_none += 1 else: out_people += 1 i += 1 # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() cv2.putText(im0, 'Person [down][up] : [{}][{}]'.format(out_people,in_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) cv2.putText(im0, 'Person [mask][no_mask] : [{}][{}]'.format(people_mask, people_none), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) time_sum += t2-t1 # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': # im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') global counter global features # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA. Make faster cmputation with lower precision. #Write headers into csv file with open(str(Path(args.output)) + '/results.csv', 'a') as f: f.write("Time,People Count Changed,TotalCount,ActivePerson,\n") #Initialize the scheduler for every 2 secs scheduler = BackgroundScheduler() scheduler.start() scheduler.add_job(func=write_csv, args=[opt.output], trigger=IntervalTrigger(seconds=2)) # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.csv' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0(640) size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) features['identities'] = identities if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] # with open(txt_path, 'a') as f: # f.write(f"{datetime.now()},{changes if changes != counter else 0},{counter},{len(identities)},\n") # label format # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1), end='\r') # Write Counter on img cv2.putText(im0, "Counter : " + str(counter), (10, 20), cv2.FONT_HERSHEY_PLAIN, 2, [1, 190, 200], 2) # Stream results # if view_img: # cv2.imshow(p, im0) # if cv2.waitKey(1) == ord('q'): # q to quit # raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': print('saving img!') cv2.imwrite(save_path, im0) else: # print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer. Issues with video writer. Fix later fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Find index corresponding to a person idx_person = names.index("person") # SORT: initialize the tracker mot_tracker = sort_module.Sort(max_age=opt.max_age, min_hits=opt.min_hits, iou_threshold=opt.iou_threshold) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # SORT: number of people detected idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, :-1].to( "cpu") # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers = mot_tracker.update(dets_ppl) for d in trackers: plot_one_box(d[:-1], im0, label='ID' + str(int(d[-1])), color=colors[1], line_thickness=1) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, device, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') colorOrder = ['red', 'purple', 'blue', 'green', 'yellow', 'orange'] frame_num = 0 framestr = 'Frame {frame}' fpses = [] frame_catch_pairs = [] ball_person_pairs = {} for color in colorDict: ball_person_pairs[color] = 0 # Read Class Name Yaml with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) names = data_dict['names'] # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() bbox_xywh = [] confs = [] clses = [] # Write results for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape # get image shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) clses.append([cls.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) clses = torch.Tensor(clses) # Pass detections to deepsort outputs = [] global groundtruths_path if not 'disable' in groundtruths_path: # print('\nenabled', groundtruths_path) groundtruths = solution.load_labels( groundtruths_path, img_w, img_h, frame_num) if (groundtruths.shape[0] == 0): outputs = deepsort.update(xywhs, confss, clses, im0) else: # print(groundtruths) xywhs = groundtruths[:, 2:] tensor = torch.tensor((), dtype=torch.int32) confss = tensor.new_ones((groundtruths.shape[0], 1)) clses = groundtruths[:, 0:1] outputs = deepsort.update(xywhs, confss, clses, im0) if frame_num >= 2: for real_ID in groundtruths[:, 1:].tolist(): for DS_ID in xyxy2xywh(outputs[:, :5]): if (abs(DS_ID[0] - real_ID[1]) / img_w < 0.005 ) and (abs(DS_ID[1] - real_ID[2]) / img_h < 0.005) and ( abs(DS_ID[2] - real_ID[3]) / img_w < 0.005) and ( abs(DS_ID[3] - real_ID[4]) / img_w < 0.005): id_mapping[DS_ID[4]] = int(real_ID[0]) else: outputs = deepsort.update(xywhs, confss, clses, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, 4] clses = outputs[:, 5] scores = outputs[:, 6] #Temp solution to get correct id's mapped_id_list = [] for ids in identities: if (ids in id_mapping): mapped_id_list.append(int(id_mapping[ids])) else: mapped_id_list.append(ids) ball_detect, frame_catch_pairs, ball_person_pairs = solution.detect_catches( im0, bbox_xyxy, clses, mapped_id_list, frame_num, colorDict, frame_catch_pairs, ball_person_pairs, colorOrder, save_img) t3 = time_synchronized() draw_boxes(im0, bbox_xyxy, [names[i] for i in clses], scores, ball_detect, identities) else: t3 = time_synchronized() #Draw frame number tmp = framestr.format(frame=frame_num) t_size = cv2.getTextSize(tmp, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] cv2.putText(im0, tmp, (0, (t_size[1] + 10)), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) #Inference Time fps = (1 / (t3 - t1)) fpses.append(fps) print('FPS=%.2f' % fps) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) frame_num += 1 #t4 = time_synchronized() avgFps = (sum(fpses) / len(fpses)) print('Average FPS = %.2f' % avgFps) #print('Total Runtime = %.2f' % (t4 - t0)) outpath = os.path.basename(source) outpath = outpath[:-4] outpath = out + '/' + outpath + '_out.csv' solution.write_catches(outpath, frame_catch_pairs, colorOrder) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path)
def test( weights=None, data="yolov5/data/coco128.yaml", batch_size=32, image_size=640, conf_thres=0.001, iou_thres=0.6, # for NMS task="val", device="", single_cls=False, augment=False, verbose=False, save_txt=False, # for auto-labelling save_hybrid=False, # for hybrid auto-labelling save_conf=False, # save auto-label confidences save_json=False, project="runs/test", name="exp", exist_ok=False, model=None, dataloader=None, save_dir=Path(""), # for saving images plots=True, log_imgs=0, # number of logged images ): arguments = locals() # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(device, batch_size=batch_size) # Directories save_dir = Path(increment_path(Path(project) / name, exist_ok=exist_ok)) # increment run (save_dir / "labels" if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model image_size = check_img_size(image_size, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != "cpu" # half precision only supported on CUDA if half: model.half() # Configure model.eval() is_coco = data.endswith("coco.yaml") # is COCO dataset with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data["nc"]) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs, wandb = min(log_imgs, 100), None # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, image_size, image_size), device=device) # init img _ = (model(img.half() if half else img) if device.type != "cpu" else None) # run once path = (data["test"] if task == "test" else data["val"] ) # path to val/test images opt = OptFactory(arguments) dataloader = create_dataloader(path, image_size, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, "names") else model.module.names) } coco91class = coco80_to_coco91_class() s = ("%20s" + "%12s" * 6) % ( "Class", "Images", "Targets", "P", "R", "[email protected]", "[email protected]:.95", ) p, r, f1, mp, mr, map50, map, t0, t1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = ([targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] ) # for autolabelling t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append(( torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls, )) continue # Predictions predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh line = ((cls, *xywh, conf) if save_conf else (cls, *xywh)) # label format with open(save_dir / "labels" / (path.stem + ".txt"), "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") # W&B logging if plots and len(wandb_images) < log_imgs: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3], }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel", } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb.Image(img[si], boxes=boxes, caption=path.name)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ "image_id": image_id, "category_id": coco91class[int(p[5])] if is_coco else int(p[5]), "bbox": [round(x, 3) for x in b], "score": round(p[4], 5), }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( pred, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = ((cls == tcls_tensor).nonzero(as_tuple=False).view(-1) ) # prediction indices pi = ((cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) ) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if (len(detected) == nl ): # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f"test_batch{batch_i}_labels.jpg" # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f"test_batch{batch_i}_pred.jpg" # predictions Thread( target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True, ).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) p, r, ap50, ap = ( p[:, 0], r[:, 0], ap[:, 0], ap.mean(1), ) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = "%20s" + "%12.3g" * 6 # print format print(pf % ("all", seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1e3 for x in (t0, t1, t0 + t1)) + ( image_size, image_size, batch_size, ) # tuple if not training: print( "Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g" % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb and wandb.run: wandb.log({"Images": wandb_images}) wandb.log({ "Validation": [ wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob("test*.jpg")) ] }) # Save JSON if save_json and len(jdict): w = (Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else "") # weights anno_json = "../coco/annotations/instances_val2017.json" # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print("\nEvaluating pycocotools mAP... saving %s..." % pred_json) with open(pred_json, "w") as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, "bbox") if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f"pycocotools unable to run: {e}") # Return results if not training: s = ( f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "") print(f"Results saved to {save_dir}{s}") model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(self, Image=None): out, source, weights, view_img, save_txt, imgsz = \ self.output, self.source, self.weights, self.view_img, self.save_txt, self.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize device = torch_utils.select_device(self.device) # if os.path.exists(out): # shutil.rmtree(out) # delete output folder # os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model google_utils.attempt_download(weights) model = torch.load( weights, map_location=device)['model'].float() # load to FP32 # torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning # model.fuse() model.to(device).eval() if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() # img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img im0s = np.frombuffer(Image.data, dtype=np.uint8).reshape( Image.height, Image.width, -1).astype('float32') # Added by Prasanth Suresh # # run once # _ = model(img.half() if half else img) if device.type != 'cpu' else None img = letterbox(im0s, new_shape=imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) # # for path, img, im0s, vid_cap in dataset: vid_cap = None path = 'img.jpg' img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=self.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) t2 = torch_utils.time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # print('**** pred: ',pred) ''' Sorting the bounding boxes according to ascending x values ''' if pred[0] != None: pred[0] = pred[0].cpu().numpy() pred[0] = pred[0][pred[0][:, 0].argsort()] pred[0] = torch.from_numpy(pred[0]) print('**** Sorted pred: \n', pred) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s if det != None: self.bounding_boxes.append(det) save_path = str(Path(out) / Path(p).name) s += '%gx%g ' % img.shape[2:] # print string # normalization gain whwh gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] if det is not None and len(det): # Rescale boxes from img_size to im0s size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results minx = 0 miny = 0 maxx = 0 maxy = 0 for *xyxy, conf, cls in det: tlx, tly, brx, bry = int(xyxy[0]), int(xyxy[1]), int( xyxy[2]), int(xyxy[3]) if tlx < minx: minx = tlx if tly < miny: miny = tly if bry > maxy: maxy = bry if brx > maxx: maxx = brx # crop_img = img[y:y+h, x:x+w] if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(save_path[:save_path.rfind('.')] + '.txt', 'a') as file: file.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*self.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) ################################ END OF COMMENTED OUT FOR LOOP TO RESTOR INDENT LATER ################################ print('Done. (%.3fs)' % (time.time() - t0)) return self.bounding_boxes # if __name__ == '__main__': # parser = argparse.ArgumentParser() # parser.add_argument('--weights', type=str, # default='weights/yolov5s.pt', help='model.pt path') # # file/folder, 0 for webcam # parser.add_argument('--source', type=str, # default='inference/images', help='source') # parser.add_argument('--output', type=str, default='inference/output', # help='output folder') # output folder # parser.add_argument('--img-size', type=int, default=640, # help='inference size (pixels)') # parser.add_argument('--conf-thres', type=float, # default=0.4, help='object confidence threshold') # parser.add_argument('--iou-thres', type=float, # default=0.5, help='IOU threshold for NMS') # parser.add_argument('--fourcc', type=str, default='mp4v', # help='output video codec (verify ffmpeg support)') # parser.add_argument('--device', default='', # help='cuda device, i.e. 0 or 0,1,2,3 or cpu') # parser.add_argument('--view-img', action='store_true', # help='display results') # parser.add_argument('--save-txt', action='store_true', # help='save results to *.txt') # parser.add_argument('--classes', nargs='+', # type=int, help='filter by class') # parser.add_argument('--agnostic-nms', action='store_true', # help='class-agnostic NMS') # parser.add_argument('--augment', action='store_true', # help='augmented inference') # opt = parser.parse_args() # opt.img_size = check_img_size(opt.img_size) # print(opt) # with torch.no_grad(): # detect() # Update all models # for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']: # detect() # create_pretrained(opt.weights, opt.weights)
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Read Yaml with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) names = data_dict['names'] print(names) # Load model #google_utils.attempt_download(weights) model = torch.load(weights, map_location=device)['model'].float() # load to FP32 #model = torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning # model.fuse() model.to(device).eval() if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t_fps = time_synchronized() frame_fps = 0 img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results #print(det[:, -1].unique()) for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] #clses = [] # Write results for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape # get image shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) #clses.append([cls.item()]) #outputs, clses2 = deepsort.update((torch.Tensor(bbox_xywh)), (torch.Tensor(confs)), (torch.Tensor(clses)), im0) #outputs = deepsort.update((torch.Tensor(bbox_xywh)), (torch.Tensor(confs)), im0) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(im0, bbox_xyxy, identities) # Print time (inference + NMS) #print('%sDone. (%.3fs)' % (s, t2 - t1)) print('%sDone.' % s) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) frame_fps += 1 if ((time_synchronized() - t_fps) >= 1): print('\n') print('FPS=%.2f' % (frame_fps)) t_fps = time_synchronized() frame_fps = 0 #print('FPS=%.2f' % (1/(time_synchronized() - t1))) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path)
def detect( weights="yolov5s.pt", source="yolov5/data/images", img_size=640, conf_thres=0.75, iou_thres=0.45, device="", view_img=False, save_txt=False, save_conf=False, classes=None, agnostic_nms=False, augment=False, update=False, project="runs/detect", name="exp", exist_ok=False, save_img=False, ): """ Args: weights: str model.pt path(s) source: str file/folder, 0 for webcam img_size: int inference size (pixels) conf_thres: float object confidence threshold iou_thres: float IOU threshold for NMS device: str cuda device, i.e. 0 or 0,1,2,3 or cpu view_img: bool display results save_txt: bool save results to *.txt save_conf: bool save confidences in save_txt labels classes: int filter by class: [0], or [0, 2, 3] agnostic-nms: bool class-agnostic NMS augment: bool augmented inference update: bool update all models project: str save results to project/name name: str save results to project/name exist_ok: bool existing project/name ok, do not increment """ source, weights, view_img, save_txt, imgsz = ( source, weights, view_img, save_txt, img_size, ) webcam = ( source.isnumeric() or source.endswith(".txt") or source.lower().startswith(("rtsp://", "rtmp://", "http://")) ) # Directories save_dir = Path( increment_path(Path(project) / name, exist_ok=exist_ok) ) # increment run (save_dir / "labels" if save_txt else save_dir).mkdir( parents=True, exist_ok=True ) # make dir # Initialize set_logging() device = select_device(device) half = device.type != "cpu" # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name="resnet101", n=2) # initialize modelc.load_state_dict( torch.load("weights/resnet101.pt", map_location=device)["model"] ).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, "module") else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != "cpu" else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression( pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms, ) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], "%g: " % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = path, "", im0s, getattr(dataset, "frame", 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / "labels" / p.stem) + ( "" if dataset.mode == "image" else f"_{frame}" ) # img.txt s += "%gx%g " % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}s, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = ( (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn) .view(-1) .tolist() ) # normalized xywh line = ( (cls, *xywh, conf) if save_conf else (cls, *xywh) ) # label format with open(txt_path + ".txt", "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") if save_img or view_img: # Add bbox to image label = f"{names[int(cls)]} {conf:.2f}" plot_one_box( xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, ) # Print time (inference + NMS) print(f"{s}Done. ({t2 - t1:.3f}s)") # Stream results if view_img: cv2.imshow(str(p), im0) # Save results (image with detections) if save_img: if dataset.mode == "image": cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = "mp4v" # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h) ) vid_writer.write(im0) if save_txt or save_img: s = ( f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "" ) print(f"Results saved to {save_dir}{s}") print(f"Done. ({time.time() - t0:.3f}s)")
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None flag = 0 # 비디오 저장시 사용할 플래그 view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once idx = -1 compare_dict = {} # create a new figure or activate an exisiting figure fig = plt.figure() ax = fig.add_subplot(111, projection='polar') # 1,1,1그리드 for path, img, im0s, vid_cap in dataset: #plt # Plot origin (agent's start point) - 원점=보행자 ax.plot(0, 0, color='black', marker='o', markersize=20, alpha=0.3) # Plot configuration ax.set_rticks([]) ax.set_rmax(1) ax.grid(False) ax.set_theta_zero_location("S") # 0도가 어디에 있는지-S=남쪽 ax.set_theta_direction(-1) # 시계방향 극좌표 img = torch.from_numpy(img).to(device) # img 프레임 자르기 # '''input 이미지 프레임 자르기''' img = img[:, 100:260, :] temp = img add_img = temp[:, :, :32] img = torch.cat((img, add_img), dim=2) # 결과 이미지 프레임 자르기 #결과 프레임 자르기 (bouding box와 object 매칭 시키기 위해!!) im0s = im0s[200:520, :, :] temp = im0s add_im0s = temp[:, :64, :] print(add_im0s.shape) im0s = np.concatenate((im0s, add_im0s), axis=1) img = img.half() \ if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() idx += 1 if (idx % 10 != 0): # 동영상 길이 유지 if len(outputs) > 0: ori_im = draw_boxes(im0s, bbox_xyxy, identities, isCloser) # 이전 정보로 bbox 그리기 vid_writer.write(im0s) continue # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s #우리는 여기로 감 - 파일경로 전까지의 출력문은 datasets.py에서 삭제해야함 save_path = str(Path(out) / Path(p).name) #print(dataset.frame) #프레임 번호 #s += '%gx%g ' % img.shape[2:] # print string #영상 사이즈 출력 (예:640x320) - 삭제가능 gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh #만약 차량이 detect된 경우 if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results #개수와 클래스 출력(예: 5 cars) -삭제가능 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class #s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape #결과프레임의 사이즈 x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) #center좌표, w, h obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = [] if len(bbox_xywh) != 0: #뭔가 detect됐다면 deepsort로 보냄 outputs, coors, frame, bbox_size = deepsort.update( xywhs, confss, im0, compare_dict, dataset.frame) # draw boxes for visualization if len(outputs) > 0: print("!", outputs) bbox_xyxy = outputs[:, :4] identities = outputs[:, 4] isCloser = outputs[:, -1] print("isCloser:", isCloser) print(compare_dict) ori_im = draw_boxes(im0, bbox_xyxy, identities, isCloser) # bbox 그리기 alert.show_direction(ax, coors, bbox_size, isCloser) # 방향 display하는 함수 호출 # Print time (inference + NMS) #print('%sDone. (%.3fs)' % (s, t2 - t1)) plt.show(block=False) # plot 차트 저장 # idx가 10이면 1부터 9까지 10과 같은 이미지로 저장 ''' file = '/Users/wonyeong/Desktop/ewha/project/plotimgs/img%d.png' % idx plt.savefig(file) for j in range(9): file = '/Users/wonyeong/Desktop/ewha/project/plotimgs/img%d.png' % (j + idx + 1) plt.savefig(file) # 차량이 detect된 경우에만 그린다.. ''' plt.pause(0.01) plt.cla() # Stream results cv2.imshow('frame', im0) if cv2.waitKey(1) & 0xFF == ord('q'): break print(im0s.shape) # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if (flag == 0): vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) #w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) w = 1344 h = 320 flag = 1 vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) else: vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def test( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a cocoapi-compatible JSON results file project='runs/test', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference model=None, dataloader=None, save_dir=Path(''), plots=True, wandb_logger=None, compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half &= device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() if isinstance(data, str): with open(data) as f: data = yaml.safe_load(f) check_dataset(data) # check is_coco = data['val'].endswith('coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs = 0 if wandb_logger and wandb_logger.wandb: log_imgs = min(wandb_logger.log_imgs, 100) # Dataloader if not training: if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once task = task if task in ( 'train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=0.5, rect=True, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } coco91class = coco80_to_coco91_class() s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t_ = time_synchronized() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width t = time_synchronized() t0 += t - t_ # Run model out, train_out = model( img, augment=augment) # inference and training outputs t1 += time_synchronized() - t # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) t2 += time_synchronized() - t # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging - Media Panel plots if len( wandb_images ) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3] }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel" } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name)) wandb_logger.log_training_progress( predn, path, names) if wandb_logger and wandb_logger.wandb_run else None # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( predn, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # target indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb_logger and wandb_logger.wandb: val_batches = [ wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg')) ] wandb_logger.log({"Validation": val_batches}) if wandb_images: wandb_logger.log({"Bounding Box Debugger/Images": wandb_images}) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time # Load model model = torch.load(weights, map_location=device)[ 'model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = False save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' url = 'sample_url' uid = 'bus1' os.system('shutdown -r 06:00') memory = {} people_counter = 0 car_counter = 0 in_people = 0 out_people = 0 time_sum = 0 now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) img_center_x = int(im0.shape[1]//2) # line = [(0,img_center_y),(im0.shape[1],img_center_y)] line = [(int(img_center_x + 50),0),(img_center_x+50,int(im0.shape[0]))] line2 = [(int(img_center_x + 170),0),(img_center_x+170,int(im0.shape[0]))] cv2.line(im0,line[0],line[1],(0,0,255),5) cv2.line(im0,line2[0],line2[1],(0,255,0),5) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() crop_xyxy = det[:,:4] det = det[crop_xyxy[:,0]<img_center_x + 170] # line 오른쪽 지우기 if len(det) == 0: pass else: # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] bbox_xyxy = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h,int(cls)] #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10) bbox_xywh.append(obj) # bbox_xyxy.append(rec) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # deepsort index_id = [] previous = memory.copy() memory = {} boxes = [] names_ls = [] # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -2] labels = outputs[:,-1] dic = {0:'person',2:'car'} for i in labels: names_ls.append(dic[i]) # print('output len',len(outputs)) for output in outputs: boxes.append([output[0],output[1],output[2],output[3]]) index_id.append('{}-{}'.format(names_ls[-1],output[-2])) memory[index_id[-1]] = boxes[-1] if time_sum>=60: param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')} response = requests.post(url,data=param) response_text = response.text with open('counting.txt','a') as f: f.write('{}~{} IN : {}, Out : {} Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text)) people_counter,car_counter,in_people,out_people = 0,0,0,0 time_sum = 0 now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') i = int(0) for box in boxes: # extract the bounding box coordinates (x, y) = (int(box[0]), int(box[1])) (w, h) = (int(box[2]), int(box[3])) if index_id[i] in previous: previous_box = previous[index_id[i]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (int(x + (w-x)/2), int(y + (h-y)/2)) p1 = (int(x2 + (w2-x2)/2), int(y2 + (h2-y2)/2)) cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point if intersect(p0, p1, line[0], line[1]) and index_id[i].split('-')[0] == 'person': people_counter += 1 if p0[0] > line[1][0]: in_people +=1 else: out_people +=1 if intersect(p0, p1, line[0], line[1]) and index_id[i].split('-')[0] == 'car': car_counter +=1 i += 1 draw_boxes(im0,bbox_xyxy,identities,labels) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() cv2.putText(im0, 'In : {}, Out : {}'.format(in_people,out_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) cv2.putText(im0, 'Person : {}'.format(people_counter), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) if time_sum>=60: param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')} response = requests.post(url,data=param) response_text = response.text with open('counting.txt','a') as f: f.write('{}~{} IN : {}, Out : {}, Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text)) people_counter,car_counter,in_people,out_people = 0,0,0,0 time_sum = 0 now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') print('%sDone. (%.3fs)' % (s, t2 - t1)) time_sum += t2-t1 # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')} response = requests.post(url,data=param) response_text = response.text with open('counting.txt','a') as f: f.write('{}~{} IN : {}, Out : {}, Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text)) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) or source.lower().startswith('intel') # Initialize set_logging() device = select_device(opt.device) folder_main = out.split('/')[0] if os.path.exists(out): shutil.rmtree(out) # delete output folder folder_features = folder_main + '/features' if os.path.exists(folder_features): shutil.rmtree(folder_features) # delete features output folder folder_crops = folder_main + '/image_crops' if os.path.exists(folder_crops): shutil.rmtree(folder_crops) # delete output folder with object crops os.makedirs(out) # make new output folder os.makedirs(folder_features) # make new output folder os.makedirs(folder_crops) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights[0], map_location=device)['model'].float() # load to FP32 model.to(device).eval() imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference if source.lower().startswith('intel'): dataset = LoadRealSense2() save_img = True else: dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # frames per second # TODO if use intel or if use given footage fps = 30 # dataset.cap.get(cv2.CAP_PROP_FPS) critical_time_frames = opt.time * fps # COUNTER: initialization counter = VoteCounter(critical_time_frames, fps) print('CRITICAL TIME IS ', opt.time, 'sec, or ', counter.critical_time, ' frames') # Find index corresponding to a person idx_person = names.index("person") # Deep SORT: initialize the tracker cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # AlphaPose: initialization # args_p = update_config(opt.config_alphapose) # cfg_p = update_config(args_p.ALPHAPOSE.cfg) # # args_p.ALPHAPOSE.tracking = args_p.ALPHAPOSE.pose_track or args_p.ALPHAPOSE.pose_flow # # demo = SingleImageAlphaPose(args_p.ALPHAPOSE, cfg_p, device) # output_pose = opt.output.split('/')[0] + '/pose' # if not os.path.exists(output_pose): # os.mkdir(output_pose) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # TODO => COUNTER: draw queueing ROI # compute urn centoid (1st frame only) and plot a bounding box around it # if dataset.frame == 1: # counter.read_urn_coordinates(opt.urn, im0s, opt.radius) # counter.plot_urn_bbox(im0s) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 if source.lower().startswith('intel'): p, s, im0, frame = path, '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) print(save_path) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Deep SORT: person class only idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, : -1] # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # Deep SORT: convert data into a proper format xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu") confs = dets_ppl[:, 4].to("cpu") # Deep SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers, features = deepsort.update(xywhs, confs, im0) # tracks inside a critical sphere trackers_inside = [] for i, d in enumerate(trackers): plot_one_box(d[:-1], im0, label='ID' + str(int(d[-1])), color=colors[1], line_thickness=1) # TODO: queue COUNTER # d_include = counter.centroid_distance(d, im0, colors[1], dataset.frame) # if d_include: # trackers_inside.append(d) # ALPHAPOSE: show skeletons for bounding boxes inside the critical sphere # if len(trackers_inside) > 0: # pose = demo.process('frame_'+str(dataset.frame), im0, trackers_inside) # im0 = demo.vis(im0, pose) # demo.writeJson([pose], output_pose, form=args_p.ALPHAPOSE.format, for_eval=args_p.ALPHAPOSE.eval) # # counter.save_features_and_crops(im0, dataset.frame, trackers_inside, features, folder_main) cv2.putText(im0, 'Voted ' + str(len(counter.voters_count)), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2) print('NUM VOTERS', len(counter.voters)) print(list(counter.voters.keys())) # COUNTER if len(counter.voters) > 0: counter.save_voter_trajectory(dataset.frame, folder_main) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec if type(vid_cap ) is dict: # estimate distance_in_meters # TODO hard code w, h, fps = 640, 480, 6 else: fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def update(self): f = 0 start_time = datetime.datetime.now() today = datetime.date.today() # dd/mm/YY date = today.strftime("%d/%m/%Y") current_time = start_time.strftime("%H:%M:%S") trackIds, position, speed_e, fps = [], {}, 0, 0.0 two_w, three_w, four_w, truck, bus, total = 0, 0, 0, 0, 0, 0 img = torch.zeros((1, 3, self.imgsz, self.imgsz), device=self.device) # init img (grabbed, frame) = self.vs.read() path = "traffic3.mp4" img0 = frame names = self.model.module.names if hasattr( self.model, "module") else self.model.names if grabbed == True: img = letterbox(img0, new_shape=640)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) f = f + 1 # count = self.count+1 img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(img, augment=self.augment)[0] # Apply NMS pred = non_max_suppression( pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms, ) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if self.webcam: # batch_size >= 1 p, s, im0 = path[i], "%g: " % i, img0[i].copy() else: p, s, im0 = path, "", img0 s += "%gx%g " % img.shape[2:] # print string # save_path = str(Path(self.out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() bbox_xywh = [] confs = [] labels = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: label = f"{names[int(cls)]}" bbox_left = min([xyxy[0].item(), xyxy[2].item()]) bbox_top = min([xyxy[1].item(), xyxy[3].item()]) bbox_w = abs(xyxy[0].item() - xyxy[2].item()) bbox_h = abs(xyxy[1].item() - xyxy[3].item()) x_c = bbox_left + bbox_w / 2 y_c = bbox_top + bbox_h / 2 bbox_w = bbox_w bbox_h = bbox_h # x_c, y_c, bbox_w, bbox_h = bbox_rel(self, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) labels.append(label) confss, labelss = [], [] for conf, label in zip(confs, labels): confss.append(conf) labelss.append(label) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = self.deepsort.update(xywhs, confss, im0) # draw line cv2.polylines(im0, [self.pts_arr], self.isClosed, (255, 0, 0), 2) cv2.rectangle(img0, (650, 0), (850, 170), color=(0, 0, 0), thickness=-1) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] offset = (0, 0) counter = 0 for i, box in enumerate(bbox_xyxy): if i < (len(labels[::-1]) - 1): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] # box text and bar id = int(identities[i] ) if identities is not None else 0 label = "{}{:d}".format("", id) cls = labels[::-1][i] # Object counting if cls == "motorcycle": two_w, total = self.Obj_counting( id, label, trackIds, two_w, total) elif cls == "auto": three_w, total = self.Obj_counting( id, label, trackIds, three_w, total) elif cls == "car": four_w, total = self.Obj_counting( id, label, trackIds, four_w, total) elif cls == "truck": truck, total = self.Obj_counting( id, label, trackIds, truck, total) elif cls == "bus": bus, total = self.Obj_counting( id, label, trackIds, bus, total) fps = self.calculate_fps(start_time, f) # check if center points of object is inside the polygon point = Point((int(x1 + (x2 - x1) / 2), int(y1 + (y2 - y1) / 2))) polygon = Polygon(self.points) if (polygon.contains(point)) == True: counter = counter + 1 t_size = cv2.getTextSize( label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] cv2.rectangle(im0, (x1, y1), (x2, y2), (0, 255, 0), 3) if counter > 5: flow = "High" elif counter >= 2 and counter < 5: flow = "Medium" else: flow = "Low" cv2.putText( im0, "Occupancy - " + str(counter), (650, 30), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Date - " + str(date), (650, 60), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Time - " + str(current_time), (650, 90), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Speed - " + "N A", (650, 120), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Flow - " + str(flow), (650, 150), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) # img = cv2.resize(img, (650, 360)) # image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) image = PIL.Image.fromarray(img0) image = PIL.ImageTk.PhotoImage(image) font = ("Arial", 12) self.canvas.configure(image=image) self.canvas.image = image result = tk.Label( self.counting_result, text=f"Counting Results", width=12, font=font, anchor="center", fg="blue", ) result.grid(row=0, column=2, padx=2) # result.pack(padx=10, pady=10) if self.two_w is None: self.two_w = tk.Label( self.counting_result, text=f"Two Wheeler \n\n{two_w}", width=13, font=font, anchor="center", bg="#8080c0", fg="white", ) self.two_w.grid(row=1, column=0, padx=2) else: self.two_w.configure(text=f"Two Wheeler\n\n{two_w}") if self.three_w is None: self.three_w = tk.Label( self.counting_result, text=f"Three Wheeler\n\n{three_w}", font=font, width=13, anchor="center", bg="#8080c0", fg="white", ) self.three_w.grid(row=1, column=1, padx=2) else: self.three_w.configure( text=f"Three Wheeler\n\n{three_w}") if self.four_w is None: self.four_w = tk.Label( self.counting_result, text=f"Four Wheeler\n\n{four_w}", width=13, font=font, anchor="center", bg="#8080c0", fg="white", ) self.four_w.grid(row=1, column=2, padx=2) else: self.four_w.configure(text=f"Four Wheeler\n\n{four_w}") if self.truck is None: self.truck = tk.Label( self.counting_result, text=f"Truck\n\n{truck}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.truck.grid(row=1, column=3, padx=1) else: self.truck.configure(text=f"Truck\n\n{truck}") if self.bus is None: self.bus = tk.Label( self.counting_result, text=f"Bus\n\n{bus}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.bus.grid(row=1, column=4, padx=2) else: self.bus.configure(text=f"Bus\n\n{bus}") if self.total is None: self.total = tk.Label( self.counting_result, text=f"Total Vehicle\n\n{total}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.total.grid(row=1, column=5, pady=2) else: self.total.configure(text=f"Total Vehicle\n\n{total}") if self.fps is None: self.fps = tk.Label( self.counting_result, text=f"FPS\n\n{fps:.2f}", font=font, width=13, anchor="center", bg="#8080c0", fg="white", ) self.fps.grid(row=2, column=0, pady=2) else: self.fps.configure(text=f"FPS\n\n{fps:.2f}") else: self.deepsort.increment_ages() self.root.after(self.delay, self.update) # Print time (inference + NMS) print("%sDone. (%.3fs)" % (s, t2 - t1)) else: self.root.quit() print( "***********************************************FINSHED***********************************************" )
def detect(opt, save_img=False): global bird_image out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize the ROI frame cv2.namedWindow("image") cv2.setMouseCallback("image", get_mouse_points) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names #initialize moving average window movingAverageUpdater = movingAverage.movingAverage(5) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' d = DynamicUpdate() d.on_launch() risk_factors = [] frame_nums = [] count = 0 for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): if (frame_idx == 0): while True: image = im0s cv2.imshow("image", image) cv2.waitKey(1) if len(mouse_pts) == 7: cv2.destroyWindow("image") break four_points = mouse_pts # Get perspective, M is the transformation matrix for bird's eye view M, Minv = get_camera_perspective(image, four_points[0:4]) # Last two points in getMousePoints... this will be the threshold distance between points threshold_pts = src = np.float32(np.array([four_points[4:]])) # Convert distance to bird's eye view warped_threshold_pts = cv2.perspectiveTransform(threshold_pts, M)[0] # Get distance in pixels threshold_pixel_dist = np.sqrt( (warped_threshold_pts[0][0] - warped_threshold_pts[1][0])**2 + (warped_threshold_pts[0][1] - warped_threshold_pts[1][1])**2) # Draw the ROI on the output images ROI_pts = np.array([ four_points[0], four_points[1], four_points[3], four_points[2] ], np.int32) # initialize birdeye view video writer frame_h, frame_w, _ = image.shape bevw = birdeye_video_writer.birdeye_video_writer( frame_h, frame_w, M, threshold_pixel_dist) else: break t = time.time() for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): print("Loop time: ", time.time() - t) t = time.time() img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) cv2.polylines(im0s, [ROI_pts], True, (0, 255, 255), thickness=4) # Inferenc tOther = time.time() t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() print("Non max suppression and inference: ", time.time() - tOther) print("Pre detection time: ", time.time() - t) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() bbox_xywh = [] bbox_xyxy = [] confs = [] ROI_polygon = Polygon(ROI_pts) # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] confs.append([conf.item()]) bbox_xyxy.append(xyxy) bbox_xywh.append(obj) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort deepsortTime = time.time() #outputs = deepsort.update(xywhs, confss, im0) print("Deepsort function call: ", (time.time() - deepsortTime)) outputs = bbox_xyxy # draw boxes for visualization if len(outputs) > 0: # filter deepsort output outputs_in_ROI, ids_in_ROI = remove_points_outside_ROI( bbox_xyxy, ROI_polygon) center_coords_in_ROI = xywh_to_center_coords( outputs_in_ROI) warped_pts = birdeye_transformer.transform_center_coords_to_birdeye( center_coords_in_ROI, M) clusters = DBSCAN(eps=threshold_pixel_dist, min_samples=1).fit(warped_pts) print(clusters.labels_) draw_boxes(im0, outputs_in_ROI, clusters.labels_) risk_dict = Counter(clusters.labels_) bird_image = bevw.create_birdeye_frame( warped_pts, clusters.labels_, risk_dict) # movingAverageUpdater.updatePoints(warped_pts, ids_in_ROI) # # gettingAvgTime = time.time() # movingAveragePairs = movingAverageUpdater.getCurrentAverage() # # movingAverageIds = [id for id, x_coord, y_coord in movingAveragePairs] # movingAveragePts = [(x_coord, y_coord) for id, x_coord, y_coord in movingAveragePairs] # embded the bird image to the video # otherStuff = time.time() # if(len(movingAveragePairs) > 0): # movingAvgClusters = DBSCAN(eps=threshold_pixel_dist, min_samples=1).fit(movingAveragePts) # movingAvgClustersLables = movingAvgClusters.labels_ # risk_dict = Counter(movingAvgClustersLables) # bird_image = bevw.create_birdeye_frame(movingAveragePts, movingAvgClustersLables, risk_dict) # bird_image = resize(bird_image, 20) # bv_height, bv_width, _ = bird_image.shape # frame_x_center, frame_y_center = frame_w //2, frame_h//2 # x_offset = 20 # # im0[ frame_y_center-bv_height//2:frame_y_center+bv_height//2, \ # x_offset:bv_width+x_offset ] = bird_image # else: # risk_dict = Counter(clusters.labels_) # bird_image = bevw.create_birdeye_frame(warped_pts, clusters.labels_, risk_dict) bird_image = resize(bird_image, 20) bv_height, bv_width, _ = bird_image.shape frame_x_center, frame_y_center = frame_w // 2, frame_h // 2 x_offset = 20 im0[frame_y_center - bv_height // 2:frame_y_center + bv_height // 2, \ x_offset:bv_width + x_offset] = bird_image # print("Other stuff: ", time.time() - otherStuff) #write the risk graph risk_factors += [compute_frame_rf(risk_dict)] frame_nums += [frame_idx] graphTime = time.time() if (frame_idx > 100): count += 1 frame_nums.pop(0) risk_factors.pop(0) if frame_idx % 10 == 0: d.on_running(frame_nums, risk_factors, count, count + 100) print("Graph Time: ", time.time() - graphTime) # Write MOT compliant results to file if save_txt and len(outputs_in_ROI) != 0: for j, output in enumerate(outputs_in_ROI): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format # Stream results if view_img: # cv2.imshow("bird_image", bird_image) cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, bird_image) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))