示例#1
0
def mainFunc(args):
    # Set the main function flag
    print("Main Function Start...")

    # Check the GPU device
    print("Number of available GPUs: {}".format(torch.cuda.device_count()))

    # Check whether using the distributed runing for the network
    is_distributed = initDistributed(args)
    master = True
    if is_distributed and os.environ["RANK"]:
        master = int(
            os.environ["RANK"]) == 0  # check whether this node is master node

    # Configuration for device setting
    set_logging()
    if is_distributed:
        device = torch.device('cuda:{}'.format(args.local_rank))
    else:
        device = select_device(args.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load the configuration
    config = loadConfig(args.config)

    # CuDNN related setting
    if torch.cuda.is_available():
        cudnn.benchmark = config.DEVICE.CUDNN.BENCHMARK
        cudnn.deterministic = config.DEVICE.CUDNN.DETERMINISTIC
        cudnn.enabled = config.DEVICE.CUDNN.ENABLED

    # Configurations for dirctories
    save_img, save_dir, source, yolov5_weights, view_img, save_txt, imgsz = \
        False, Path(args.save_dir), args.source, args.weights, args.view_img, args.save_txt, args.img_size
    webcam = source.isnumeric() or source.startswith(
        ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    if save_dir == Path('runs/detect'):  # if default
        os.makedirs('runs/detect', exist_ok=True)  # make base
        save_dir = Path(increment_dir(save_dir / 'exp',
                                      args.name))  # increment run
    os.makedirs(save_dir / 'labels' if save_txt else save_dir,
                exist_ok=True)  # make new dir

    # Load yolov5 model for human detection
    model_yolov5 = attempt_load(config.MODEL.PRETRAINED.YOLOV5,
                                map_location=device)
    imgsz = check_img_size(imgsz,
                           s=model_yolov5.stride.max())  # check img_size
    if half:
        model_yolov5.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        model_classifier = load_classifier(name='resnet101', n=2)  # initialize
        model_classifier.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        model_classifier.to(device).eval()

    # Load resnet model for human keypoints estimation
    model_resnet = eval('pose_models.' + config.MODEL.NAME.RESNET +
                        '.get_pose_net')(config, is_train=False)
    if config.EVAL.RESNET.MODEL_FILE:
        print('=> loading model from {}'.format(config.EVAL.RESNET.MODEL_FILE))
        model_resnet.load_state_dict(torch.load(config.EVAL.RESNET.MODEL_FILE),
                                     strict=False)
    else:
        print('expected model defined in config at EVAL.RESNET.MODEL_FILE')
    model_resnet.to(device)
    model_resnet.eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)
    pose_transform = transforms.Compose(
        [  # input transformation for 2d human pose estimation
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    # Get names and colors
    names = model_yolov5.module.names if hasattr(
        model_yolov5, 'module') else model_yolov5.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Construt filters for filtering 2D/3D human keypoints
    # filters_2d = constructFilters((1,16,2), freq=25, mincutoff=1, beta=0.01)  # for test
    # filters_3d = constructFilters((1,16,3), freq=25, mincutoff=1, beta=0.01)

    # Run the yolov5 and resnet for 2d human pose estimation
    # with torch.no_grad():
    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model_yolov5(img.half() if half else img
                     ) if device.type != 'cpu' else None  # run once

    # Process every video frame
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred_boxes = model_yolov5(img, augment=args.augment)[0]

        # Apply NMS
        pred_boxes = non_max_suppression(pred_boxes,
                                         args.conf_thres,
                                         args.iou_thres,
                                         classes=args.classes,
                                         agnostic=args.agnostic_nms)
        t2 = time_synchronized()

        # Can not find people and move to next frame
        if pred_boxes[0] is None:
            # show the frame with no human detected
            cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL)
            cv2.imshow("2D Human Pose Estimation", im0s[0].copy())
            # wait manual operations
            # with kb.Listener(on_press=on_press) as listener:
            #     listener.join()
            #     return
            # if kb.is_pressed('t'):
            #     return
            print("No Human Detected and Move on.")
            print("-" * 30)
            continue

        # Print time (inference + NMS)
        detect_time = t2 - t1
        detect_fps = 1.0 / detect_time
        print("Human Detection Time: {}, Human Detection FPS: {}".format(
            detect_time, detect_fps))

        # Apply Classifier
        if classify:  # false
            pred_boxes = apply_classifier(pred_boxes, model_classifier, img,
                                          im0s)

        # Estimate 2d human pose(multiple person)
        centers = []
        scales = []
        for id, boxes in enumerate(pred_boxes):
            if boxes is not None and len(boxes):
                boxes[:, :4] = scale_coords(img.shape[2:], boxes[:, :4],
                                            im0s[id].copy().shape).round()
            # convert tensor to list format
            boxes = np.delete(boxes.cpu().numpy(), [-2, -1], axis=1).tolist()
            for l in range(len(boxes)):
                boxes[l] = [tuple(boxes[l][0:2]), tuple(boxes[l][2:4])]
            # convert box to center and scale
            for box in boxes:
                center, scale = box_to_center_scale(box, imgsz, imgsz)
                centers.append(center)
                scales.append(scale)
        t3 = time_synchronized()
        pred_pose_2d = get_pose_estimation_prediction(config,
                                                      model_resnet,
                                                      im0s[0],
                                                      centers,
                                                      scales,
                                                      transform=pose_transform,
                                                      device=device)
        t4 = time_synchronized()

        # Print time (2d human pose estimation)
        estimate_time = t4 - t3
        estimate_fps = 1.0 / estimate_time
        print("Pose Estimation Time: {}, Pose Estimation FPS: {}".format(
            estimate_time, estimate_fps))

        # Filter the predicted 2d human pose(multiple person)
        t5 = time_synchronized()
        # if False:  # for test
        if config.EVAL.RESNET.USE_FILTERS_2D:
            # construct filters for every keypoints of every person in 2D
            filters_2d = constructFilters(pred_pose_2d.shape,
                                          freq=1,
                                          mincutoff=1,
                                          beta=0.01)
            print("Shape of filters_2d: ({}, {}, {})".format(
                len(filters_2d), len(filters_2d[0]),
                len(filters_2d[0][0])))  # for test
            for per in range(pred_pose_2d.shape[0]):
                for kp in range(pred_pose_2d.shape[1]):
                    for coord in range(pred_pose_2d.shape[2]):
                        pred_pose_2d[per][kp][coord] = filters_2d[per][kp][
                            coord](pred_pose_2d[per][kp][coord])
        t6 = time_synchronized()

        # Print time (filter 2d human pose)
        filter_time_2d = t6 - t5
        filter_fps_2d = 1.0 / filter_time_2d
        print("Filter 2D Pose Time: {}, Filter 2D Pose FPS: {}".format(
            filter_time_2d, filter_fps_2d))

        # Process detections and estimations in 2D
        for i, box in enumerate(pred_boxes):
            if webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if box is not None and len(box):
                # Rescale boxes from img_size to im0 size
                box[:, :4] = scale_coords(img.shape[2:], box[:, :4],
                                          im0.shape).round()

                # Print results
                for c in box[:, -1].unique():
                    n = (box[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(box):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if args.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line) + '\n') % line)

                    # Add bbox to image
                    if save_img or view_img:
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                # Draw joint keypoints, number orders and human skeletons for every detected people in 2D
                for person in pred_pose_2d:
                    # draw the human keypoints
                    for idx, coord in enumerate(person):
                        x_coord, y_coord = int(coord[0]), int(coord[1])
                        cv2.circle(im0, (x_coord, y_coord), 1, (0, 0, 255), 5)
                        cv2.putText(im0, str(idx), (x_coord, y_coord),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                                    (255, 255, 255), 2, cv2.LINE_AA)

                    # draw the human skeletons in PACIFIC mode
                    for skeleton in PACIFIC_SKELETON_INDEXES:
                        cv2.line(im0, (int(person[skeleton[0]][0]),
                                       int(person[skeleton[0]][1])),
                                 (int(person[skeleton[1]][0]),
                                  int(person[skeleton[1]][1])), skeleton[2], 2)

            # Print time (inference + NMS + estimation)
            print('%sDone. (%.3fs)' % (s, t4 - t1))

            # Stream results
            if view_img:
                detect_text = "Detect FPS:{0:0>5.2f}/{1:0>6.2f}ms".format(
                    detect_fps, detect_time * 1000)
                estimate_text = "Estimate FPS:{0:0>5.2f}/{1:0>6.2f}ms".format(
                    estimate_fps, estimate_time * 1000)
                cv2.putText(im0, detect_text, (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2,
                            cv2.LINE_AA)
                cv2.putText(im0, estimate_text, (10, 60),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2,
                            cv2.LINE_AA)
                cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL)
                cv2.imshow("2D Human Pose Estimation", im0)
                if cv2.waitKey(1) & 0xFF == ord('q'):  # q to quit
                    return
                    # goto .mainFunc

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

        # Print time (inference + NMS + estimation + 2d filtering)
        all_process_time = t6 - t1
        all_process_fps = 1.0 / all_process_time
        print("All Process Time: {}, All Process FPS: {}".format(
            all_process_time, all_process_fps))
        print("-" * 30)

    # Goto label
    # label .mainFunc

    # Print saving results
    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    # Release video reader and writer, then destory all opencv windows
    dataset.vid_cap.release()
    vid_writer.release()
    cv2.destroyAllWindows()
    print('Present 2D Human Pose Inference Done. Total Time:(%.3f seconds)' %
          (time.time() - t0))
示例#2
0
def detect(
    weights="yolov5s.pt",
    source="yolov5/data/images",
    img_size=640,
    conf_thres=0.75,
    iou_thres=0.45,
    device="",
    view_img=False,
    save_txt=False,
    save_conf=False,
    classes=None,
    agnostic_nms=False,
    augment=False,
    update=False,
    project="runs/detect",
    name="exp",
    exist_ok=False,
    save_img=False,
):
    """
    Args:
        weights: str
            model.pt path(s)
        source: str
            file/folder, 0 for webcam
        img_size: int
            inference size (pixels)
        conf_thres: float
            object confidence threshold
        iou_thres: float
            IOU threshold for NMS
        device: str
            cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img: bool
            display results
        save_txt: bool
            save results to *.txt
        save_conf: bool
            save confidences in save_txt labels
        classes: int
            filter by class: [0], or [0, 2, 3]
        agnostic-nms: bool
            class-agnostic NMS
        augment: bool
            augmented inference
        update: bool
            update all models
        project: str
            save results to project/name
        name: str
            save results to project/name
        exist_ok: bool
            existing project/name ok, do not increment
    """
    source, weights, view_img, save_txt, imgsz = (
        source,
        weights,
        view_img,
        save_txt,
        img_size,
    )
    webcam = (
        source.isnumeric()
        or source.endswith(".txt")
        or source.lower().startswith(("rtsp://", "rtmp://", "http://"))
    )

    # Directories
    save_dir = Path(
        increment_path(Path(project) / name, exist_ok=exist_ok)
    )  # increment run
    (save_dir / "labels" if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True
    )  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half = device.type != "cpu"  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name="resnet101", n=2)  # initialize
        modelc.load_state_dict(
            torch.load("weights/resnet101.pt", map_location=device)["model"]
        ).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, "module") else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != "cpu" else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred,
            conf_thres,
            iou_thres,
            classes=classes,
            agnostic=agnostic_nms,
        )
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], "%g: " % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, "", im0s, getattr(dataset, "frame", 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / "labels" / p.stem) + (
                "" if dataset.mode == "image" else f"_{frame}"
            )  # img.txt
            s += "%gx%g " % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}s, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (
                            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn)
                            .view(-1)
                            .tolist()
                        )  # normalized xywh
                        line = (
                            (cls, *xywh, conf) if save_conf else (cls, *xywh)
                        )  # label format
                        with open(txt_path + ".txt", "a") as f:
                            f.write(("%g " * len(line)).rstrip() % line + "\n")

                    if save_img or view_img:  # Add bbox to image
                        label = f"{names[int(cls)]} {conf:.2f}"
                        plot_one_box(
                            xyxy,
                            im0,
                            label=label,
                            color=colors[int(cls)],
                            line_thickness=3,
                        )

            # Print time (inference + NMS)
            print(f"{s}Done. ({t2 - t1:.3f}s)")

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)

            # Save results (image with detections)
            if save_img:
                if dataset.mode == "image":
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = "mp4v"  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)
                        )
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = (
            f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}"
            if save_txt
            else ""
        )
        print(f"Results saved to {save_dir}{s}")

    print(f"Done. ({time.time() - t0:.3f}s)")
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://',
             'http://')) or source.lower().startswith('intel')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    folder_main = out.split('/')[0]
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    folder_features = folder_main + '/features'
    if os.path.exists(folder_features):
        shutil.rmtree(folder_features)  # delete features output folder
    folder_crops = folder_main + '/image_crops'
    if os.path.exists(folder_crops):
        shutil.rmtree(folder_crops)  # delete output folder with object crops
    os.makedirs(out)  # make new output folder
    os.makedirs(folder_features)  # make new output folder
    os.makedirs(folder_crops)  # make new output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights[0],
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        if source.lower().startswith('intel'):
            dataset = LoadRealSense2()
            save_img = True
        else:
            dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # frames per second
    # TODO if use intel or if use given footage
    fps = 30  # dataset.cap.get(cv2.CAP_PROP_FPS)
    critical_time_frames = opt.time * fps

    # COUNTER: initialization
    counter = VoteCounter(critical_time_frames, fps)
    print('CRITICAL TIME IS ', opt.time, 'sec, or ', counter.critical_time,
          ' frames')

    # Find index corresponding to a person
    idx_person = names.index("person")

    # Deep SORT: initialize the tracker
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # AlphaPose: initialization
    # args_p = update_config(opt.config_alphapose)
    # cfg_p = update_config(args_p.ALPHAPOSE.cfg)
    #
    # args_p.ALPHAPOSE.tracking = args_p.ALPHAPOSE.pose_track or args_p.ALPHAPOSE.pose_flow
    #
    # demo = SingleImageAlphaPose(args_p.ALPHAPOSE, cfg_p, device)
    # output_pose = opt.output.split('/')[0] + '/pose'
    # if not os.path.exists(output_pose):
    #     os.mkdir(output_pose)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # TODO => COUNTER: draw queueing ROI
        #  compute urn centoid (1st frame only) and plot a bounding box around it
        # if dataset.frame == 1:
        #     counter.read_urn_coordinates(opt.urn, im0s, opt.radius)
        # counter.plot_urn_bbox(im0s)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                if source.lower().startswith('intel'):
                    p, s, im0, frame = path, '%g: ' % i, im0s[i].copy(
                    ), dataset.count
                else:
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            print(save_path)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Deep SORT: person class only
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :
                               -1]  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # Deep SORT: convert data into a proper format
                xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu")
                confs = dets_ppl[:, 4].to("cpu")

                # Deep SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers, features = deepsort.update(xywhs, confs, im0)
                    # tracks inside a critical sphere
                    trackers_inside = []
                    for i, d in enumerate(trackers):
                        plot_one_box(d[:-1],
                                     im0,
                                     label='ID' + str(int(d[-1])),
                                     color=colors[1],
                                     line_thickness=1)

                        # TODO: queue COUNTER
                        # d_include = counter.centroid_distance(d, im0, colors[1], dataset.frame)
                        # if d_include:
                        #     trackers_inside.append(d)

                    # ALPHAPOSE: show skeletons for bounding boxes inside the critical sphere
                    # if len(trackers_inside) > 0:
                    #     pose = demo.process('frame_'+str(dataset.frame), im0, trackers_inside)
                    #     im0 = demo.vis(im0, pose)
                    #     demo.writeJson([pose], output_pose, form=args_p.ALPHAPOSE.format, for_eval=args_p.ALPHAPOSE.eval)
                    #
                    #     counter.save_features_and_crops(im0, dataset.frame, trackers_inside, features, folder_main)

            cv2.putText(im0, 'Voted ' + str(len(counter.voters_count)),
                        (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255),
                        2)

            print('NUM VOTERS', len(counter.voters))
            print(list(counter.voters.keys()))

            # COUNTER
            if len(counter.voters) > 0:
                counter.save_voter_trajectory(dataset.frame, folder_main)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        if type(vid_cap
                                ) is dict:  # estimate distance_in_meters
                            # TODO hard code
                            w, h, fps = 640, 480, 6
                        else:
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
示例#4
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                # cv2.imshow(p, im0)
                cv2.imwrite("C:/Users/lenovo/Desktop/server/output/camera.jpg",
                            im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIterationq

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                        vid_writer = cv2.VideoWriter(
                            save_path,
                            cv2.VideoWriter_fourcc('X', '2', '6', '4'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, device, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    colorOrder = ['red', 'purple', 'blue', 'green', 'yellow', 'orange']
    frame_num = 0
    framestr = 'Frame {frame}'
    fpses = []
    frame_catch_pairs = []
    ball_person_pairs = {}

    for color in colorDict:
        ball_person_pairs[color] = 0

    # Read Class Name Yaml
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)
    names = data_dict['names']

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                bbox_xywh = []
                confs = []
                clses = []

                # Write results
                for *xyxy, conf, cls in det:

                    img_h, img_w, _ = im0.shape  # get image shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clses.append([cls.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)
                clses = torch.Tensor(clses)
                # Pass detections to deepsort
                outputs = []
                global groundtruths_path
                if not 'disable' in groundtruths_path:
                    # print('\nenabled', groundtruths_path)
                    groundtruths = solution.load_labels(
                        groundtruths_path, img_w, img_h, frame_num)
                    if (groundtruths.shape[0] == 0):
                        outputs = deepsort.update(xywhs, confss, clses, im0)
                    else:
                        # print(groundtruths)
                        xywhs = groundtruths[:, 2:]
                        tensor = torch.tensor((), dtype=torch.int32)
                        confss = tensor.new_ones((groundtruths.shape[0], 1))
                        clses = groundtruths[:, 0:1]
                        outputs = deepsort.update(xywhs, confss, clses, im0)

                    if frame_num >= 2:
                        for real_ID in groundtruths[:, 1:].tolist():
                            for DS_ID in xyxy2xywh(outputs[:, :5]):
                                if (abs(DS_ID[0] - real_ID[1]) / img_w < 0.005
                                    ) and (abs(DS_ID[1] - real_ID[2]) / img_h <
                                           0.005) and (
                                               abs(DS_ID[2] - real_ID[3]) /
                                               img_w < 0.005) and (
                                                   abs(DS_ID[3] - real_ID[4]) /
                                                   img_w < 0.005):
                                    id_mapping[DS_ID[4]] = int(real_ID[0])
                else:
                    outputs = deepsort.update(xywhs, confss, clses, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4]
                    clses = outputs[:, 5]
                    scores = outputs[:, 6]

                    #Temp solution to get correct id's
                    mapped_id_list = []
                    for ids in identities:
                        if (ids in id_mapping):
                            mapped_id_list.append(int(id_mapping[ids]))
                        else:
                            mapped_id_list.append(ids)

                    ball_detect, frame_catch_pairs, ball_person_pairs = solution.detect_catches(
                        im0, bbox_xyxy, clses, mapped_id_list, frame_num,
                        colorDict, frame_catch_pairs, ball_person_pairs,
                        colorOrder, save_img)

                    t3 = time_synchronized()
                    draw_boxes(im0, bbox_xyxy, [names[i] for i in clses],
                               scores, ball_detect, identities)
                else:
                    t3 = time_synchronized()

            #Draw frame number
            tmp = framestr.format(frame=frame_num)
            t_size = cv2.getTextSize(tmp, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            cv2.putText(im0, tmp, (0, (t_size[1] + 10)),
                        cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)

            #Inference Time
            fps = (1 / (t3 - t1))
            fpses.append(fps)
            print('FPS=%.2f' % fps)

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)
            frame_num += 1

    #t4 = time_synchronized()
    avgFps = (sum(fpses) / len(fpses))
    print('Average FPS = %.2f' % avgFps)
    #print('Total Runtime = %.2f' % (t4 - t0))

    outpath = os.path.basename(source)
    outpath = outpath[:-4]
    outpath = out + '/' + outpath + '_out.csv'
    solution.write_catches(outpath, frame_catch_pairs, colorOrder)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
示例#6
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Find index corresponding to a person
    idx_person = names.index("person")

    # SORT: initialize the tracker
    mot_tracker = sort_module.Sort(max_age=opt.max_age,
                                   min_hits=opt.min_hits,
                                   iou_threshold=opt.iou_threshold)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # SORT: number of people detected
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :-1].to(
                    "cpu")  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers = mot_tracker.update(dets_ppl)
                    for d in trackers:
                        plot_one_box(d[:-1],
                                     im0,
                                     label='ID' + str(int(d[-1])),
                                     color=colors[1],
                                     line_thickness=1)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights, map_location=device)[
        'model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path_raw = str(Path(out)) + '/results_raw.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
            print(pred)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                clss = []
                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clss.append(cls.item())

                bbox_xywh = bbox_xywh
                cls_conf = confs
                cls_ids = clss
                # xywhs = torch.Tensor(bbox_xywh)
                # confss = torch.Tensor(confs)
                # cls_ids = clss


                # if len(bbox_xywh) == 0:
                #     continue
                # print("detection cls_ids:", cls_ids)

                #filter cls id for tracking
                # print("cls_ids")
                # print(cls_ids)

                # # select class
                # mask = []
                # lst_move_life = [0,1,2]
                # # lst_for_track = []
                
                # for id in cls_ids:
                #     if id in lst_move_life:
                #         # lst_for_track.append(id)
                #         mask.append(True)
                #     else:
                #         mask.append()
                # # print("mask cls_ids:", mask)

                # # print(bbox_xywh)
                # bbox_xywh = list(compress(bbox_xywh,mask))
                # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
                # bbox_xywh[:,3:] *= 1.2
                # cls_conf = list(compress(cls_conf,mask))
                # print(cls_conf)

                bbox_xywh = torch.Tensor(bbox_xywh)
                cls_conf = torch.Tensor(cls_conf)

                # Pass detections to deepsort
                outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids)
                '''
                TODO:
                카운터 추가 요망
                '''
                # counting num and class

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4:5]
                    cls_id = outputs[:,-1]
                    # print(outputs[:,-1]) #--> 문제 발견
                    # print("track res cls_id:", cls_id)
                    # cls_ids_show = [cls_ids[i] for i in cls_id]
                    draw_boxes(im0, bbox_xyxy, cls_id, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[4]
                        classname = output[5]

                        with open(txt_path_raw, 'a') as f: # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물)
                            f.write(('%g ' * 6 +'%g' *1 +'%g ' * 3 + '\n') % (frame_idx, identity, bbox_left,
                                                           bbox_top, bbox_w, bbox_h, classname, -1, -1, -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)
                    

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
示例#8
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    flag = 0  # 비디오 저장시 사용할 플래그
    view_img = True
    save_img = True
    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    idx = -1
    compare_dict = {}

    # create a new figure or activate an exisiting figure
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='polar')  # 1,1,1그리드

    for path, img, im0s, vid_cap in dataset:
        #plt
        # Plot origin (agent's start point) - 원점=보행자
        ax.plot(0, 0, color='black', marker='o', markersize=20, alpha=0.3)
        # Plot configuration
        ax.set_rticks([])
        ax.set_rmax(1)
        ax.grid(False)
        ax.set_theta_zero_location("S")  # 0도가 어디에 있는지-S=남쪽
        ax.set_theta_direction(-1)  # 시계방향 극좌표

        img = torch.from_numpy(img).to(device)

        # img 프레임 자르기
        # '''input 이미지 프레임 자르기'''
        img = img[:, 100:260, :]
        temp = img
        add_img = temp[:, :, :32]
        img = torch.cat((img, add_img), dim=2)
        # 결과 이미지 프레임 자르기
        #결과 프레임 자르기 (bouding box와 object 매칭 시키기 위해!!)
        im0s = im0s[200:520, :, :]
        temp = im0s
        add_im0s = temp[:, :64, :]
        print(add_im0s.shape)
        im0s = np.concatenate((im0s, add_im0s), axis=1)

        img = img.half() \
            if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        idx += 1
        if (idx % 10 != 0):  # 동영상 길이 유지
            if len(outputs) > 0:
                ori_im = draw_boxes(im0s, bbox_xyxy, identities,
                                    isCloser)  # 이전 정보로 bbox 그리기
            vid_writer.write(im0s)
            continue

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s  #우리는 여기로 감 - 파일경로 전까지의 출력문은 datasets.py에서 삭제해야함

            save_path = str(Path(out) / Path(p).name)
            #print(dataset.frame) #프레임 번호
            #s += '%gx%g ' % img.shape[2:]  # print string #영상 사이즈 출력 (예:640x320) - 삭제가능
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  #  normalization gain whwh

            #만약 차량이 detect된 경우
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results #개수와 클래스 출력(예: 5 cars) -삭제가능
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                #s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape  #결과프레임의 사이즈
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h,
                                                        *xyxy)  #center좌표, w, h
                    obj = [x_c, y_c, bbox_w, bbox_h]

                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = []

                if len(bbox_xywh) != 0:  #뭔가 detect됐다면 deepsort로 보냄
                    outputs, coors, frame, bbox_size = deepsort.update(
                        xywhs, confss, im0, compare_dict, dataset.frame)

                # draw boxes for visualization
                if len(outputs) > 0:
                    print("!", outputs)
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4]
                    isCloser = outputs[:, -1]
                    print("isCloser:", isCloser)
                    print(compare_dict)
                    ori_im = draw_boxes(im0, bbox_xyxy, identities,
                                        isCloser)  # bbox 그리기
                    alert.show_direction(ax, coors, bbox_size,
                                         isCloser)  # 방향 display하는 함수 호출

            # Print time (inference + NMS)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))

            plt.show(block=False)
            # plot 차트 저장
            # idx가 10이면 1부터 9까지 10과 같은 이미지로 저장
            '''
            file = '/Users/wonyeong/Desktop/ewha/project/plotimgs/img%d.png' % idx
            plt.savefig(file)
            for j in range(9):
                file = '/Users/wonyeong/Desktop/ewha/project/plotimgs/img%d.png' % (j + idx + 1)
                plt.savefig(file)

            # 차량이 detect된 경우에만 그린다..
            '''
            plt.pause(0.01)
            plt.cla()

            # Stream results
            cv2.imshow('frame', im0)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            print(im0s.shape)

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if (flag == 0):
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        #w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        w = 1344
                        h = 320
                        flag = 1
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    else:
                        vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Read Yaml
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)
    names = data_dict['names']
    print(names)

    # Load model
    #google_utils.attempt_download(weights)
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    #model = torch.save(torch.load(weights, map_location=device), weights)  # update model if SourceChangeWarning
    # model.fuse()
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t_fps = time_synchronized()
    frame_fps = 0
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                #print(det[:, -1].unique())
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                #clses = []

                # Write results
                for *xyxy, conf, cls in det:

                    img_h, img_w, _ = im0.shape  # get image shape

                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    #clses.append([cls.item()])
                    #outputs, clses2 = deepsort.update((torch.Tensor(bbox_xywh)), (torch.Tensor(confs)), (torch.Tensor(clses)), im0)
                    #outputs = deepsort.update((torch.Tensor(bbox_xywh)), (torch.Tensor(confs)), im0)

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)
                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_tlwh = []
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_boxes(im0, bbox_xyxy, identities)

                    # Print time (inference + NMS)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))
            print('%sDone.' % s)

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

        frame_fps += 1

        if ((time_synchronized() - t_fps) >= 1):
            print('\n')
            print('FPS=%.2f' % (frame_fps))
            t_fps = time_synchronized()
            frame_fps = 0

        #print('FPS=%.2f' % (1/(time_synchronized() - t1)))

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
示例#10
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
示例#11
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz, GCP_list = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.GCP_list
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path_raw = str(Path(out)) + '/results_raw.txt'

    # 속도까지 붙여버린 데이터 따로 생성해서 비교해보자 : 수정수정
    txt_path_raw2 = str(Path(out)) + '/results_raw2.txt'

    # point load
    with open('./mapdata/point.yaml') as f:
        data = yaml.load(f.read())
    frm_point = data['frm_point']
    geo_point = data['geo_point']

    Counter_1 = [(488, 589), (486, 859)]
    Counter_2 = [(3463, 795), (3487, 1093)]
    Counter_list = [Counter_1, Counter_2]

    datum_dist = []
    counter_dist = []

    line_fileName = './mapdata/Busan1_IC_Polyline_to_Vertex.txt'
    all_line = mapdata_load(line_fileName, frm_point, geo_point)

    percep_frame = 5
    from _collections import deque
    pts = [deque(maxlen=percep_frame + 1) for _ in range(10000)]
    ptsSpeed = [deque(maxlen=1) for _ in range(10000)]

    frame_len = calc_dist(frm_point[1], frm_point[4])
    geo_len = calc_dist(geo_point[1], geo_point[4])

    # ----------------- fix val start
    fixcnt = 1
    # ----------------- fix val end

    # ----------------- counter val start
    memory_index = {}
    memory_id = {}

    cnt = np.zeros((len(Counter_list), 4))
    # total_counter = 0 # 나중에 총 카운터를 만들어 넣으면 되겠지?

    # count_1_total = 0
    # count_1_veh_c0 = 0
    # count_1_veh_c1 = 0
    # count_1_veh_c2 = 0

    # count_2_total = 0
    # count_2_veh_c0 = 0
    # count_2_veh_c1 = 0
    # count_2_veh_c2 = 0
    # ----------------- counter val end

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
            print(pred)

        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                clss = []
                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clss.append(cls.item())

                bbox_xywh = bbox_xywh
                cls_conf = confs
                cls_ids = clss
                # xywhs = torch.Tensor(bbox_xywh)
                # confss = torch.Tensor(confs)
                # cls_ids = clss

                # if len(bbox_xywh) == 0:
                #     continue
                # print("detection cls_ids:", cls_ids)

                #filter cls id for tracking
                # print("cls_ids")
                # print(cls_ids)

                # # select class
                # mask = []
                # lst_move_life = [0,1,2]
                # # lst_for_track = []

                # for id in cls_ids:
                #     if id in lst_move_life:
                #         # lst_for_track.append(id)
                #         mask.append(True)
                #     else:
                #         mask.append()
                # # print("mask cls_ids:", mask)

                # # print(bbox_xywh)
                # bbox_xywh = list(compress(bbox_xywh,mask))
                # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
                # bbox_xywh[:,3:] *= 1.2
                # cls_conf = list(compress(cls_conf,mask))
                # print(cls_conf)

                bbox_xywh = torch.Tensor(bbox_xywh)
                cls_conf = torch.Tensor(cls_conf)

                # Pass detections to deepsort
                outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids)
                """
                # output 형식

                [[박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                ...]
                """

                # ------------------------------------------------------------------------------------------------------ img fix start
                t3 = time_synchronized()
                match_mid_point_list = matcher_BRISK_BF(im0, GCP_list)
                t4 = time_synchronized()
                # ---------------------------------------------------------------------------------------------------------------------- line start

                # 기준점 위치 갱신을 위한 삼변측량의 거리 정의 및 고정
                if frame_idx == 0:
                    for pointNum in range(len(frm_point)):
                        for GCP_num in range(len(match_mid_point_list)):
                            datum_dist.append(
                                point_dist(match_mid_point_list[GCP_num],
                                           frm_point[pointNum]))
                    datum_dist = np.reshape(
                        datum_dist,
                        (len(frm_point), len(match_mid_point_list)))
                    for Ct_list in Counter_list:
                        for Ctpoint_num in range(len(Ct_list)):
                            for GCP_num in range(len(match_mid_point_list)):
                                counter_dist.append(
                                    point_dist(match_mid_point_list[GCP_num],
                                               Ct_list[Ctpoint_num]))
                    counter_dist = np.reshape(counter_dist,
                                              (len(Counter_list), len(Ct_list),
                                               len(match_mid_point_list)))
                t5 = time_synchronized()

                pre_P = (0, 0)

                for line_num, eachline in enumerate(all_line):
                    for newpoint in eachline['frmPoint']:
                        if line_num == 0:
                            im0 = cv2.circle(im0, newpoint, 5, (0, 0, 255),
                                             -1)  # 차선_실선
                            if calc_dist(pre_P, newpoint) < 390:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (0, 0, 255), 2, -1)
                        elif line_num == 1:
                            im0 = cv2.circle(im0, newpoint, 5, (0, 255, 0),
                                             -1)  # 도로 경계
                            if calc_dist(pre_P, newpoint) < 420:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (0, 255, 0), 2, -1)
                        elif line_num == 2:
                            im0 = cv2.circle(im0, newpoint, 5, (255, 0, 0),
                                             -1)  # 차선_겹선
                            if calc_dist(pre_P, newpoint) < 350:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (255, 0, 0), 2, -1)
                        else:
                            im0 = cv2.circle(im0, newpoint, 5, (100, 100, 0),
                                             -1)  # 차선_점선
                            if calc_dist(pre_P, newpoint) < 600:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (100, 100, 0), 2, -1)
                        pre_P = newpoint

                t6 = time_synchronized()
                for pointNum in range(len(frm_point)):
                    im0 = cv2.circle(im0, frm_point[pointNum], 10, (0, 0, 0),
                                     -1)
                    newPoint = intersectionPoint(match_mid_point_list,
                                                 datum_dist[pointNum])
                    frm_point[pointNum] = newPoint

                t7 = time_synchronized()

                #---------------------------------------------------------------------------------------------------------------------- line end

                # ------------------------------------------------------------------------------------------------------ img fix end

                # ------------------------------------------------------------------------------------------------------ counting num and class start
                Counter_newpoint = []
                for Ct_num in range(len(Counter_list)):
                    Ct_list = Counter_list[Ct_num]
                    for Ctpoint_num in range(len(Ct_list)):
                        Counter_newpoint.append(
                            intersectionPoint(
                                match_mid_point_list,
                                counter_dist[Ct_num][Ctpoint_num]))
                Counter_newpoint = np.reshape(
                    Counter_newpoint, (len(Counter_list), len(Ct_list), 2))

                for CountNum in Counter_newpoint:
                    im0 = cv2.line(im0, tuple(CountNum[0]), tuple(CountNum[1]),
                                   (0, 0, 0), 5, -1)

                boxes = []
                indexIDs = []
                classIDs = []
                previous_index = memory_index.copy()
                previous_id = memory_id.copy()
                memory_index = {}
                memory_id = {}
                COLORS = np.random.randint(0,
                                           255,
                                           size=(200, 3),
                                           dtype="uint8")
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        boxes.append(
                            [output[0], output[1], output[2], output[3]])
                        indexIDs.append(int(output[4]))
                        classIDs.append(int(output[5]))
                        memory_index[indexIDs[-1]] = boxes[
                            -1]  # 인덱스 아이디와 박스를 맞춰줌
                        memory_id[indexIDs[-1]] = classIDs[
                            -1]  # 인덱스 아이디와 클레스 아이디를 맞춰줌

                        if len(pts[output[4]]) == 0:
                            pts[output[4]].append(frame_idx)
                        center = (int(((output[0]) + (output[2])) / 2),
                                  int(((output[1]) + (output[3])) / 2))
                        pts[output[4]].append(center)
                        if len(pts[output[4]]) == percep_frame + 1:
                            frmMove_len = np.sqrt(
                                pow(
                                    pts[output[4]][-1][0] -
                                    pts[output[4]][-percep_frame][0], 2) + pow(
                                        pts[output[4]][-1][1] -
                                        pts[output[4]][-percep_frame][1], 2))
                            geoMove_Len = geo_len * frmMove_len / frame_len
                            speed = geoMove_Len * vid_cap.get(
                                cv2.CAP_PROP_FPS) * 3.6 / (pts[output[4]][0] -
                                                           frame_idx)
                            ptsSpeed[output[4]].append(speed)
                            pts[output[4]].clear()

                if len(boxes) > 0:
                    i = int(0)
                    for box in boxes:
                        # 현 위치와 이전 위치를 비교하여 지나갔는지 체크함
                        (x, y) = (int(box[0]), int(box[1]))  # Output 0 1
                        (w, h) = (int(box[2]), int(box[3]))  # Output 2 3 과 같다.
                        color = compute_color_for_labels(indexIDs[i])

                        if indexIDs[i] in previous_index:
                            previous_box = previous_index[indexIDs[i]]
                            # print()
                            # print('previous_box : ')
                            # print(previous_box)
                            (x2, y2) = (int(previous_box[0]),
                                        int(previous_box[1]))
                            (w2, h2) = (int(previous_box[2]),
                                        int(previous_box[3]))
                            p0 = (int(x + (w - x) / 2), int(y + (h - y) / 2)
                                  )  # 현재 박스
                            p1 = (int(x2 + (w2 - x2) / 2),
                                  int(y2 + (h2 - y2) / 2))  # 이전 박스
                            cv2.line(
                                im0, p0, p1, color, 3
                            )  # 이전 정보와 비교 : 중앙에 점을 찍어 가면서 (이전 데이터와 검지 데이터의 점)

                            # 클레스 구분
                            previous_class_id = previous_id[
                                indexIDs[i]]  # 어차피 인덱스 같기 때문에 그냥 넣어줘도 됨 개꿀ㅋ

                            # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(내가 맨든 결과물)
                            # 프레임 수, 인덱스 아이디, 클레스 이름, x좌표, y좌표, w값, h값, 속도값, null, null
                            # with open(txt_path_raw2, 'a') as f:
                            #     f.write(('%g ' * 10+ '\n') % (frame_idx, indexIDs[i], previous_class_id,
                            #                                 p0[0], p0[1], box[2], box[3], -1, -1))  # label format

                            for cntr in range(len(Counter_newpoint)):
                                if intersect(p0, p1, Counter_newpoint[cntr][0],
                                             Counter_newpoint[cntr]
                                             [1]):  # 실질적으로 체크함
                                    if previous_class_id == 0:
                                        cnt[cntr][1] += 1
                                    elif previous_class_id == 1:
                                        cnt[cntr][2] += 1
                                    elif previous_class_id == 2:
                                        cnt[cntr][3] += 1
                                    cnt[cntr][0] += 1

                        i += 1  # 다음 인덱스와 비교하게 만들기 위하여

                # draw counter
                for cntr in range(len(Counter_newpoint)):
                    cv2.putText(im0, 'count_{}_total : {}'.format(
                        cntr + 1, cnt[cntr][0]), (100 + 400 * cntr, 110),
                                cv2.FONT_HERSHEY_DUPLEX, 1.0, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[0],
                        cnt[cntr][1]), (100 + 400 * cntr, 140),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[1],
                        cnt[cntr][2]), (100 + 400 * cntr, 170),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[2],
                        cnt[cntr][3]), (100 + 400 * cntr, 200),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                t8 = time_synchronized()
                # ---------------------------------------------------------------------------------------------------------------------- counter end

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4:5]
                    cls_id = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, cls_id, identities, names,
                               ptsSpeed)

                t9 = time_synchronized()
                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):  # 한 라인씩 쓰는 구조
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[4]
                        classname = output[5]

                        with open(
                                txt_path_raw, 'a'
                        ) as f:  # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물)
                            f.write(('%g ' * 6 + '%g' * 1 + '%g ' * 3 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, classname, -1, -1,
                                     -1))  # label format

            # else:
            #     deepsort.increment_ages()
            t10 = time_synchronized()
            # Print time (inference + NMS + classify)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            t11 = time_synchronized()
            # Save results (image with detections)
            # dataset.mode = 'images'
            # save_path = './track_result/output/{}.jpg'.format(i)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)
            t12 = time_synchronized()

            print('inference + NMS + classify (%.3fs)' % (t2 - t1))
            print('Yolo + DeepSORT (%.3fs)' % (t3 - t2))
            print('find mid point (%.3fs)' % (t4 - t3))
            print('삼변측량을 위한 기준거리 산정 (%.3fs)' % (t5 - t4))
            print('draw line (%.3fs)' % (t6 - t5)
                  )  # 현재는 정밀도로지도에 있는 모든 점들을 대상 계산중 -> 추후 화면에 표시될 점만 계산하는 작업 필요
            print('GCP 점 계산 (%.3fs)' % (t7 - t6))
            print('Count & speed (%.3fs)' % (t8 - t7))
            print('각차량별 그리기 (%.3fs)' % (t9 - t8))
            print('txt 데이터 저장 (%.3fs)' % (t10 - t9))
            print('스크린에 표시하기 (%.3fs)' % (t11 - t10))
            print('비디오파일로 저장하기 (%.3fs)' % (t12 - t11))
            print('one frame done (%.3fs)' % (t12 - t1))

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))