def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', 640, 480)

    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)

    cam.release()
    cv2.destroyAllWindows()
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(args.model)
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box)

    mjpeg_server = MjpegServer(port=args.mjpeg_port)
    print('MJPEG server started...')
    try:
        loop_and_detect(cam,
                        trt_yolo,
                        conf_th=0.3,
                        vis=vis,
                        mjpeg_server=mjpeg_server)
    except Exception as e:
        print(e)
    finally:
        mjpeg_server.shutdown()
        cam.release()
Пример #3
0
def main():
    args = parse_args()
    cam = Camera(args)
    cam.open()

    # import pdb
    # pdb.set_trace()

    if not cam.is_opened:
        sys.exit('[INFO]  Failed to open camera!')

    cls_dict = get_cls_dict('coco')
    yolo_dim = int(args.model.split('-')[-1])  # 416 or 608
    trt_yolov3 = TrtYOLOv3(args.model, (yolo_dim, yolo_dim))

    print('[INFO]  Camera: starting')
    cam.start()
    open_window(WINDOW_NAME, args.image_width, args.image_height,
                'TensorRT YOLOv3 Detector')
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam,
                    args.runtime,
                    trt_yolov3,
                    conf_th=0.3,
                    vis=vis,
                    window_name=WINDOW_NAME)

    print('[INFO]  Program: stopped')
    cam.stop()
    cam.release()
    cv2.destroyAllWindows()
Пример #4
0
def main():
    print(f"{datetime.datetime.now().isoformat()} start!", flush=True)
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    # open_window(
    #     WINDOW_NAME, 'Camera TensorRT YOLO Demo',
    #     cam.img_width, cam.img_height)
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)

    cam.release()
Пример #5
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cap = cv2.VideoCapture(args.video_name)
    if (cap.isOpened() == False):
        print("unable to read read source video feed")

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cap, trt_yolo, args.result_video, conf_th=0.3, vis=vis)

    cv2.destroyAllWindows()
def run_detection():
    # args = parse_args()
    with open("cfg/detection_tracker_cfg.json") as detection_config:
        detect_config = json.load(detection_config)

    if detect_config["category_num"] <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' %
                         detect_config["category_num"])
    if not os.path.isfile('yolo/%s.trt' % detect_config["model"]):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' %
                         detect_config["model"])

    cap = cv2.VideoCapture(detect_config["source"])
    if not cap.isOpened():
        raise SystemExit('ERROR: failed to open the input video file!')
    frame_width, frame_height = int(cap.get(3)), int(cap.get(4))

    cls_dict = get_cls_dict(detect_config["category_num"])
    #vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(detect_config["model"])
    trt_yolo = TrtYOLO(detect_config["model"], (h, w),
                       detect_config["category_num"],
                       detect_config["letter_box"])
    ret, frame = cap.read()
    boxes, confs, clss = perform_detection(frame=frame,
                                           trt_yolo=trt_yolo,
                                           conf_th=0.3,
                                           vis=None)
Пример #7
0
    def __init__(self, frame_info_pool=None, detection_result_pool=None, final_result_pool=None, model='yolov4-416', dataset='obstacle', score_threshold=0.5, nms_threshold=0.3):
        """
        :param model: model name
        :param category_num:
        """
        self.results = dict()
        self.model_name = model
        self.dataset = dataset
        self.frame_info_pool = frame_info_pool
        self.detection_result_pool = detection_result_pool
        self.final_result_pool = final_result_pool
        self.score_threshold = score_threshold
        self.nms_threshold = nms_threshold
        if dataset == "coco":
            category_num = 80
        else :
            category_num = 15
        self.cls_dict = get_cls_dict(category_num)

        yolo_dim = model.split('-')[-1]
        if 'x' in yolo_dim:
            dim_split = yolo_dim.split('x')
            if len(dim_split) != 2:
                raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
            w, h = int(dim_split[0]), int(dim_split[1])
        else:
            h = w = int(yolo_dim)
        if h % 32 != 0 or w % 32 != 0:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

        self.model = TrtYOLO(model, (h, w), category_num)

        PrintLog.i("Object detection model is loaded - {}\t{}".format(model, dataset))
Пример #8
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(args.model)
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box)

    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                cam.img_height)

    msg_queue = Queue(maxsize=100)

    # msg_queue.put("0,0,0,-1".encode())
    Thread(target=serArd, args=(msg_queue, )).start()
    loop_and_detect(cam, trt_yolo, msg_queue, conf_th=0.7, vis=vis)
    while True:
        pass

    cam.release()
    cv2.destroyAllWindows()
Пример #9
0
    def set_attribute(self, settings_attr):
        self.video_url = settings_attr["video_url"]
        self.analysis_fps = float(settings_attr["analysis_fps"])
        self.display_delay = int(settings_attr["display_delay"])
        self.server_url = settings_attr["server_url"]
        self.server_port = settings_attr["server_port"]
        self.dataset_index = settings_attr["dataset"]

        self.capture = cv2.VideoCapture(settings_attr["video_url"])
        self.video_fps = float(self.capture.get(cv2.CAP_PROP_FPS))

        if "://" in self.video_url:
            self.video_type = "streaming"
        else:
            self.video_type = "file"

        if self.dataset_index == 0:  # obstacle
            self.cls_dict = get_cls_dict(15)
        elif self.dataset_index == 1:  # mscoco
            self.cls_dict = get_cls_dict(80)

        self.bbox_visualization = BBoxVisualization(self.cls_dict)
Пример #10
0
def yolo_detection():
    # dev = cuda.Device(0)
    # ctx = dev.make_context()
    args = parse_args()
    print(args)
    """
    config  assert
    """
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/darknet/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/darknet/%s.trt) not found!' %
                         args.model)
    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]

    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
    """
    capture the image
    """
    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')
    """
    deploy the yolo model
    """
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    # open_window(
    #     WINDOW_NAME, 'Camera TensorRT YOLO Demo',
    #     cam.img_width, cam.img_height)
    """
    detect the insulator using model
    """
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)
    """
    release the image
    """

    cam.release()
Пример #11
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit(f'ERROR: bad category_num ({args.category_num})!')
    if not os.path.isfile(args.model):
        raise SystemExit(f'ERROR: file {args.model} not found!')

    # Process valid coco json file
    process_valid_json(args.valid_coco)

    if args.write_images:
        if not os.path.exists(args.image_output): os.mkdir(args.image_output)

    # Create camera for video/image input
    cam = Camera(args)
    if not cam.get_is_opened():
        raise SystemExit('ERROR: failed to open camera!')

    class_dict = get_cls_dict(args.category_num)
    yolo_dim = (args.model.replace(".trt", "")).split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit(f'ERROR: bad yolo_dim ({yolo_dim})!')
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit(f'ERROR: bad yolo_dim ({yolo_dim})!')

    # Create yolo
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    if args.activate_display:
        open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                    cam.img_height)
    visual = BBoxVisualization(class_dict)

    # Run detection
    loop_and_detect(cam,
                    trt_yolo,
                    args,
                    confidence_thresh=args.confidence_threshold,
                    visual=visual)

    # Clean up
    cam.release()
    if args.activate_display:
        cv2.destroyAllWindows()
Пример #12
0
    def init_yolo(self):
        """ Initialises yolo parameters required for trt engine """

        if self.model.find('-') == -1:
            self.model = self.model + "-" + self.input_shape
            yolo_dim = self.model.split('-')[-1]

        if 'x' in yolo_dim:
            dim_split = yolo_dim.split('x')
            if len(dim_split) != 2:
                raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
            self.w, self.h = int(dim_split[0]), int(dim_split[1])
        else:
            self.h = self.w = int(yolo_dim)
        if self.h % 32 != 0 or self.w % 32 != 0:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

        cls_dict = get_cls_dict(self.category_num)
        self.vis = BBoxVisualization(cls_dict)
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    client = init_mqtt(args.host, args.port)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    print("cls_dict:", cls_dict)
    #print(cls_dict[3])
    yolo_dim = args.model.split('-')[-1]
    print("yolo_dim:", yolo_dim)
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
        print('w:{0}, h:{1}'.format(w, h))
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box)

    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                cam.img_height)
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis, \
        cls_dict=cls_dict, client=client, topic=args.topic)

    cam.release()
    cv2.destroyAllWindows()

    client.disclose()
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    vis = BBoxVisualization(cls_dict)
    trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)

    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                cam.img_height)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)

    cam.release()
    cv2.destroyAllWindows()
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cap = cv2.VideoCapture(args.video)
    if not cap.isOpened():
        raise SystemExit('ERROR: failed to open the input video file!')
    frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
    writer = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*'mp4v'), 30,
                             (frame_width, frame_height))

    cls_dict = get_cls_dict(args.category_num)
    vis = BBoxVisualization(cls_dict)
    trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)

    loop_and_detect(cap, trt_yolo, conf_th=0.3, vis=vis, writer=writer)

    writer.release()
    cap.release()
Пример #16
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    vis = BBoxVisualization(cls_dict)
    mjpeg_server = MjpegServer(port=args.mjpeg_port)
    print('MJPEG server started...')
    try:
        loop_and_detect(cam,
                        trt_yolo,
                        conf_th=0.3,
                        vis=vis,
                        mjpeg_server=mjpeg_server)
    except Exception as e:
        print(e)
    finally:
        mjpeg_server.shutdown()
        cam.release()
Пример #17
0
def detect_and_alarm():
    WINDOW_NAME = "Robot_YOLOv3_Detector_with_TensorRT"
    MODEL = "yolov3-tiny-416"
    IMAGE_WIDTH = 640
    IMAGE_HEIGHT = 480
    RUN_TIME = False
    global DETECT_TIME

    cam = Camera_for_Robot(video_dev=DETECT_ID,
                           image_width=IMAGE_WIDTH,
                           image_height=IMAGE_HEIGHT)
    #cam = Camera_for_Robot(video_dev='./test.mp4', image_width=IMAGE_WIDTH, image_height=IMAGE_HEIGHT)
    cam.open()

    if not cam.is_opened:
        print("Capture road opens failure")
        #return
    cam.start()

    open_window(WINDOW_NAME, IMAGE_WIDTH, IMAGE_HEIGHT,
                'TensorRT YOLOv3 Detector')
    cls_dict = get_cls_dict('coco')
    yolo_dim = int(MODEL.split('-')[-1])  # 416 or 608
    trt_yolov3 = TrtYOLOv3(model=MODEL, input_shape=(yolo_dim, yolo_dim))
    vis = BBoxVisualization(cls_dict)

    loop_and_detect(cam,
                    RUN_TIME,
                    trt_yolov3,
                    conf_th=0.3,
                    vis=vis,
                    window_name=WINDOW_NAME,
                    total_time=DETECT_TIME)
    cam.stop()
    cam.release()
    cv2.destroyAllWindows()
    print("Detection finishes successfully")
Пример #18
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    load_weight_start = time.time()
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)
    load_weights_time = datetime.timedelta(seconds=time.time() -
                                           load_weight_start)
    print('Load weights Time: %s' % (load_weights_time))
    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                cam.img_height)
    vis = BBoxVisualization(cls_dict)

    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)
    reporter = MemReporter()
    reporter.report()
    cam.release()
    cv2.destroyAllWindows()
Пример #19
0
               'format=(string)NV12, framerate=(fraction)30/1 ! '
               'nvvidconv flip-method=2 ! '
               'video/x-raw, width=(int){}, height=(int){}, '
               'format=(string)BGRx ! '
               'videoconvert ! appsink').format(width, height)
else:
    raise RuntimeError('onboard camera source not found!')

cap = VideoCapture(gst_str)

print("Start to load YoloV4 model")
trt_yolo = TrtYOLO('yolov4_my-416', (416, 416), 4)

print("YoloV4 model is loaded.")

cls_dict = get_cls_dict(4)
vis = BBoxVisualization(cls_dict)


def detect_center(bboxes):
    center_x = (bboxes[0][0] / 416 + bboxes[0][2] / 416) / 2.0 - 0.5
    center_y = (bboxes[0][1] / 416 + bboxes[0][3] / 416) / 2.0 - 0.5
    return (center_x, center_y)


speed = 0.5
turn_gain = 0.3
center = None

bypass_number = 0
found_number = 0
def detect(config):
    COLOR_AROUND_DOOR = (48, 58, 221)
    COLOR_DOOR = (23, 158, 21)
    COLOR_LINE = (214, 4, 54)
    sent_videos = set()
    video_name = ""
    fpeses = []
    fps = 0

    # door_array = select_object()
    # door_array = [475, 69, 557, 258]
    global flag, vid_writer, lost_ids
    # initial parameters
    door_array = [611, 70, 663, 310]
    around_door_array = [507, 24, 724, 374]
    low_border = 225
    high_border = 342
    #
    door_c = find_centroid(door_array)
    rect_door = Rectangle(door_array[0], door_array[1], door_array[2],
                          door_array[3])
    rect_around_door = Rectangle(around_door_array[0], around_door_array[1],
                                 around_door_array[2], around_door_array[3])
    # socket
    HOST = "localhost"
    PORT = 8083
    # camera info
    save_img = True
    imgsz = (416, 416) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \
                                           config["half"], config["view_img"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    # initial objects of classes
    counter = Counter()
    VideoHandler = Writer()
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    # Initialize device, weights etc.
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else config["device"])
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # Initialize colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    if config["category_num"] <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' %
                         config["category_num"])
    if not os.path.isfile('yolo/%s.trt' % config["model"]):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' %
                         config["model"])

    # cap = cv2.VideoCapture(config["source"])
    # if not cap.isOpened():
    #     raise SystemExit('ERROR: failed to open the input video file!')
    # frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    if webcam:
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)
    img = torch.zeros((3, imgsz, imgsz), device=device)  # init img

    cls_dict = get_cls_dict(config["category_num"])
    #vis = BBoxVisualization(cls_dict)
    vis = None
    h, w = get_input_shape(config["model"])
    trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"],
                       config["letter_box"])

    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.connect((HOST, PORT))
        img_shape = (256, 256)
        for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
            t0 = time.time()

            flag_move = False
            flag_anyone_in_door = False

            ratio_detection = 0
            # Process detections
            lost_ids = counter.return_lost_ids()
            if webcam:  # batch_size >= 1
                p, s, im0 = path[0], '%g: ' % 0, im0s[0].copy(
                )  # TODO mb needed in loop for detection
            else:
                p, s, im0 = path, '', im0s
            preds, confs, clss = perform_detection(
                frame=im0,
                trt_yolo=trt_yolo,
                conf_th=config["conf_thres"],
                vis=vis)
            scaled_pred = []
            scaled_conf = []
            detections = torch.Tensor()
            for i, (det, conf, cls) in enumerate(zip(preds, confs, clss)):
                if det is not None and len(det):
                    if names[int(cls)] not in config["needed_classes"]:
                        continue
                    det = xyxy_to_xywh(det)
                    # det = scale_coords(img_shape, det, im0.shape)
                    scaled_pred.append(det)
                    scaled_conf.append(conf)
                detections = torch.Tensor(scaled_pred)
                confidences = torch.Tensor(scaled_conf)
                # Pass detections to deepsort
            if len(detections) != 0:
                outputs = deepsort.update(detections, confidences, im0)
                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)
                    # print('bbox_xywh ', bbox_xywh, 'id', identities)
                    counter.update_identities(identities)
                    for bbox_tracked, id_tracked in zip(bbox_xyxy, identities):
                        ratio_initial = find_ratio_ofbboxes(
                            bbox=bbox_tracked, rect_compare=rect_around_door)
                        ratio_door = find_ratio_ofbboxes(
                            bbox=bbox_tracked, rect_compare=rect_door)
                        #  чел первый раз в контуре двери
                        if ratio_initial > 0.2:
                            if VideoHandler.counter_frames_indoor == 0:
                                #     флаг о начале записи
                                VideoHandler.start_video(id_tracked)
                            flag_anyone_in_door = True
                        elif ratio_initial > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected:
                            VideoHandler.continue_opened_video(id=id_tracked,
                                                               seconds=3)
                            flag_anyone_in_door = True
                        if id_tracked not in counter.people_init or counter.people_init[
                                id_tracked] == 0:
                            counter.obj_initialized(id_tracked)
                            if ratio_door >= 0.2 and low_border < bbox_tracked[
                                    3] < high_border:
                                #     was initialized in door, probably going out of office
                                counter.people_init[id_tracked] = 2
                            elif ratio_door < 0.2:
                                #     initialized in the corridor, mb going in
                                counter.people_init[id_tracked] = 1
                            # else:
                            #     # res is None, means that object is not in door contour
                            #     counter.people_init[id_tracked] = 1
                            counter.frame_age_counter[id_tracked] = 0
                            counter.people_bbox[id_tracked] = bbox_tracked
                        counter.cur_bbox[id_tracked] = bbox_tracked
            else:
                deepsort.increment_ages()
                if counter.need_to_clear():
                    counter.clear_all()
            # Stream results
            vals_to_del = []
            for val in counter.people_init.keys():
                # check bbox also
                cur_c = find_centroid(counter.cur_bbox[val])
                centroid_distance = np.sum(
                    np.array([(door_c[i] - cur_c[i])**2
                              for i in range(len(door_c))]))
                ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val],
                                            rect_compare=rect_door)
                if val in lost_ids and counter.people_init[val] != -1:
                    # if vector_person < 0 then current coord is less than initialized, it means that man is going
                    # in the exit direction
                    if counter.people_init[val] == 2 \
                            and ratio < 0.4 and centroid_distance > 5000:
                        print('ratio out: {}\n centroids: {}\n'.format(
                            ratio, centroid_distance))
                        counter.get_out()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(
                            action_occured="вышел из кабинета")
                        vals_to_del.append(val)

                    elif counter.people_init[val] == 1 \
                            and ratio >= 0.4 and centroid_distance < 5000:
                        print('ratio in: {}\n centroids: {}\n'.format(
                            ratio, centroid_distance))
                        counter.get_in()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(
                            action_occured="зашел внутрь")
                        vals_to_del.append(val)
                    lost_ids.remove(val)

                # TODO maybe delete this condition
                elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \
                        and counter.people_init[val] == 2:

                    if ratio < 0.2 and centroid_distance > 10000:
                        counter.get_out()
                        print('ratio out max frames: ', ratio)
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел")
                        vals_to_del.append(val)
                    counter.age_counter[val] = 0

                counter.clear_lost_ids()

            for valtodel in vals_to_del:
                counter.delete_person_data(track_id=valtodel)

            ins, outs = counter.show_counter()
            cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8)

            cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])),
                          (int(door_array[2]), int(door_array[3])), COLOR_DOOR,
                          3)

            cv2.rectangle(
                im0, (int(around_door_array[0]), int(around_door_array[1])),
                (int(around_door_array[2]), int(around_door_array[3])),
                COLOR_AROUND_DOOR, 3)

            cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0,
                        1e-3 * im0.shape[0], (255, 255, 255), 3)

            cv2.line(im0, (door_array[0], low_border), (680, low_border),
                     COLOR_LINE, 4)
            cv2.line(im0, (door_array[0], high_border), (680, high_border),
                     COLOR_LINE, 4)

            if VideoHandler.stop_writing(im0):
                # send_new_posts(video_name, action_occured)
                sock.sendall(
                    bytes(
                        VideoHandler.video_name + ":" +
                        VideoHandler.action_occured, "utf-8"))
                data = sock.recv(100)
                print('Received', repr(data.decode("utf-8")))
                sent_videos.add(VideoHandler.video_name)
                with open('data_files/logs2.txt', 'a',
                          encoding="utf-8-sig") as wr:
                    wr.write(
                        'video {}, action: {}, centroid: {}, ratio_init: {}, ratio_door: {}, ratio: {} \n'
                        .format(VideoHandler.video_name,
                                VideoHandler.action_occured, centroid_distance,
                                ratio_initial, ratio_door, ratio))

                print('_________________video was sent _________________')

                VideoHandler = Writer()
                VideoHandler.set_fps(fps)

            else:
                VideoHandler.continue_writing(im0, flag_anyone_in_door)
            if view_img is True:
                cv2.imshow('image', im0)
                cv2.waitKey(1)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            delta_time = (time.time() - t0)
            # t2_ds = time.time()
            # print('%s Torch:. (%.3fs)' % (s, t2 - t1))
            # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds))
            if len(fpeses) < 15:
                fpeses.append(round(1 / delta_time))
                print(delta_time)
            elif len(fpeses) == 15:
                # fps = round(np.median(np.array(fpeses)))
                median_fps = float(np.median(np.array(fpeses)))
                fps = round(median_fps, 2)
                print('max fps: ', fps)
                fps = 20
                VideoHandler.set_fps(fps)
                counter.set_fps(fps)
                fpeses.append(fps)
                motion_detection = True
            else:
                if VideoHandler.flag_writing_video:
                    print('\writing video ')
                if VideoHandler.flag_stop_writing:
                    print('stop writing')
                if flag_anyone_in_door:
                    print('anyone in door')
                if VideoHandler.counter_frames_indoor:
                    print('counter frames indoor: {}'.format(
                        VideoHandler.counter_frames_indoor))
import cv2
from time import sleep
import time
import subprocess as sp
from utils.yolo_classes import get_cls_dict
from utils.visualization import BBoxVisualization
from utils.yolo_with_plugins import TrtYOLO
import pycuda.autoinit

cap = cv2.VideoCapture('rtsp://localhost:8554/stream_input')
cls_dict = get_cls_dict(80)
h = w = 288
model = 'yolov4-tiny-288'
trt_yolo = TrtYOLO(model, (h, w))
vis = BBoxVisualization(cls_dict)
rtsp_server_output = 'rtsp://localhost:8554/stream_output'
command = ['ffmpeg',
               '-re',
               # '-s', sizeStr,
               # '-r', str(fps),  # rtsp fps (from input server)
               # '-f', 'v4l2',
               '-i', '-',

               # You can change ffmpeg parameter after this item.
               # '-pix_fmt', 'yuv420p',
               # '-r', '30',  # output fps
               # '-g', '50',
               # '-c:v', 'libx264',
               # '-b:v', '2M',
               # '-bufsize', '64M',
               # '-maxrate', "4M",
def detect(config):
    sent_videos = set()
    video_name = ""
    fpeses = []
    fps = 0

    # door_array = select_object()
    # door_array = [475, 69, 557, 258]
    global flag, vid_writer, lost_ids
    # initial parameters
    # door_array = [528, 21, 581, 315]
    # door_array = [596, 76, 650, 295]  #  18 stream
    door_array = [611, 70, 663, 310]
    # around_door_array = [572, 79, 694, 306]  #
    # around_door_array = [470, 34, 722, 391]
    around_door_array = [507, 24, 724, 374]
    low_border = 225
    #
    door_c = find_centroid(door_array)
    rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3])
    rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3])
    # socket
    HOST = "localhost"
    PORT = 8084
    # camera info
    save_img = True
    imgsz = (416, 416) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \
                                           config["half"], config["view_img"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    # initial objects of classes
    counter = Counter(counter_in=0, counter_out=0, track_id=0)
    VideoHandler = Writer()
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    # Initialize device, weights etc.
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # Initialize colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    if config["category_num"] <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"])
    if not os.path.isfile('yolo/%s.trt' % config["model"]):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"])

    cap = cv2.VideoCapture(config["source"])
    if not cap.isOpened():
        raise SystemExit('ERROR: failed to open the input video file!')
    frame_width, frame_height = int(cap.get(3)), int(cap.get(4))

    cls_dict = get_cls_dict(config["category_num"])
    vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(config["model"])
    trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"])


    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.connect((HOST, PORT))
        img_shape = (288, 288)
        # for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        while True:
            ret, im0 = cap.read()
            if not ret:
                break
 
            preds, confs, clss = perform_detection(frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis)

            flag_move = False
            flag_anyone_in_door = False
            t0 = time.time()
            ratio_detection = 0

            # Process detections
            lost_ids = counter.return_lost_ids()
            for i, (det, conf, cls) in enumerate(zip( preds, confs, clss)):  

                if det is not None and len(det):
                    # Rescale boxes from imgsz to im0 size
                    # det = scale_coords(img_shape, det, im0.shape).round()
                    if names[int(cls)] not in config["needed_classes"]:
                    	continue
                    # bbox_xywh = []
                    # confs = []
                    # Write results
                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(det, im0, label=label, color=colors[int(cls)])

            detections = torch.Tensor(preds)
            confidences = torch.Tensor(confs)

            # Pass detections to deepsort
            if len(detections) == 0:
                continue
            outputs = deepsort.update(detections, confidences, im0)
            print('detections ', detections)
            print('outputs ', outputs)          

            # draw boxes for visualization
            if len(outputs) > 0:
                bbox_xyxy = outputs[:, :4]
                identities = outputs[:, -1]
                draw_boxes(im0, bbox_xyxy, identities)
                print('bbox_xyxy ', bbox_xyxy)
                counter.update_identities(identities)

                for bbox_tracked, id_tracked in zip(bbox_xyxy, identities):

                    rect_detection = Rectangle(bbox_tracked[0], bbox_tracked[1],
                                               bbox_tracked[2], bbox_tracked[3])
                    inter_detection = rect_detection & rect_around_door
                    if inter_detection:
                        inter_square_detection = rect_square(*inter_detection)
                        cur_square_detection = rect_square(*rect_detection)
                        try:
                            ratio_detection = inter_square_detection / cur_square_detection
                        except ZeroDivisionError:
                            ratio_detection = 0
                        #  чел первый раз в контуре двери
                    if ratio_detection > 0.2:
                        if VideoHandler.counter_frames_indoor == 0:
                            #     флаг о начале записи
                            VideoHandler.start_video(id_tracked)
                        flag_anyone_in_door = True

                    elif ratio_detection > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected:
                        VideoHandler.continue_opened_video(id=id_tracked, seconds=3)
                        flag_anyone_in_door = True

                    # elif ratio_detection > 0.6 and counter.people_init.get(id_tracked) == 1:
                    #     VideoHandler.continue_opened_video(id=id_tracked, seconds=0.005)

                    if id_tracked not in counter.people_init or counter.people_init[id_tracked] == 0:
                        counter.obj_initialized(id_tracked)
                        rect_head = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2],
                                              bbox_tracked[3])
                        intersection = rect_head & rect_door
                        if intersection:
                            intersection_square = rect_square(*intersection)
                            head_square = rect_square(*rect_head)
                            rat = intersection_square / head_square
                            if rat >= 0.4 and bbox_tracked[3] > low_border :
                                #     was initialized in door, probably going out of office
                                counter.people_init[id_tracked] = 2
                            elif rat < 0.4:
                                #     initialized in the corridor, mb going in
                                counter.people_init[id_tracked] = 1
                        else:
                            # res is None, means that object is not in door contour
                            counter.people_init[id_tracked] = 1
                        counter.frame_age_counter[id_tracked] = 0

                        counter.people_bbox[id_tracked] = bbox_tracked

                    counter.cur_bbox[id_tracked] = bbox_tracked
                else:
                    deepsort.increment_ages()
                # Print time (inference + NMS)
                t2 = torch_utils.time_synchronized()

                # Stream results
            vals_to_del = []
            for val in counter.people_init.keys():
                # check bbox also
                inter = 0
                cur_square = 0
                ratio = 0
                cur_c = find_centroid(counter.cur_bbox[val])
                centroid_distance = np.sum(np.array([(door_c[i] - cur_c[i]) ** 2 for i in range(len(door_c))]))

                # init_c = find_centroid(counter.people_bbox[val])
                # vector_person = (cur_c[0] - init_c[0],
                #                  cur_c[1] - init_c[1])

                rect_cur = Rectangle(counter.cur_bbox[val][0], counter.cur_bbox[val][1],
                                     counter.cur_bbox[val][2], counter.cur_bbox[val][3])
                inter = rect_cur & rect_door

                if val in lost_ids and counter.people_init[val] != -1:

                    if inter:
                        inter_square = rect_square(*inter)
                        cur_square = rect_square(*rect_cur)
                        try:
                            ratio = inter_square / cur_square

                        except ZeroDivisionError:
                            ratio = 0
                    # if vector_person < 0 then current coord is less than initialized, it means that man is going
                    # in the exit direction

                    if counter.people_init[val] == 2 \
                            and ratio < 0.4 and centroid_distance > 5000:
                        print('ratio out: {}\n centroids: {}\n'.format(ratio, centroid_distance))
                        counter.get_out()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел из кабинета")

                        vals_to_del.append(val)

                    elif counter.people_init[val] == 1 \
                            and ratio >= 0.4 and centroid_distance < 1000:
                        print('ratio in: {}\n centroids: {}\n'.format(ratio, centroid_distance))
                        counter.get_in()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="зашел внутрь")
                        vals_to_del.append(val)

                    lost_ids.remove(val)

                # TODO maybe delete this condition
                elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \
                        and counter.people_init[val] == 2:
                    if inter:
                        inter_square = rect_square(*inter)
                        cur_square = rect_square(*rect_cur)
                        try:
                            ratio = inter_square / cur_square
                        except ZeroDivisionError:
                            ratio = 0

                    if ratio < 0.2 and centroid_distance > 10000:
                        counter.get_out()
                        print('ratio out max frames: ', ratio)
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел")
                        vals_to_del.append(val)
                    counter.age_counter[val] = 0

                counter.clear_lost_ids()

            for valtodel in vals_to_del:
                counter.delete_person_data(track_id=valtodel)

            ins, outs = counter.show_counter()
            cv2.rectangle(im0, (0, 0), (250, 50),
                          (0, 0, 0), -1, 8)

            cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])),
                          (int(door_array[2]), int(door_array[3])),
                          (23, 158, 21), 3)

            cv2.rectangle(im0, (int(around_door_array[0]), int(around_door_array[1])),
                          (int(around_door_array[2]), int(around_door_array[3])),
                          (48, 58, 221), 3)

            cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0,
                        1e-3 * im0.shape[0], (255, 255, 255), 3)

            cv2.line(im0, (door_array[0], low_border), (880, low_border), (214, 4, 54), 4)

            if VideoHandler.stop_writing(im0):
                # send_new_posts(video_name, action_occured)
                sock.sendall(bytes(VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8"))
                data = sock.recv(100)
                print('Received', repr(data.decode("utf-8")))
                sent_videos.add(VideoHandler.video_name)
                with open('../data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr:
                    wr.write('video {}, man {}, centroid {} '.format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance))

                VideoHandler = Writer()
                VideoHandler.set_fps(fps)

            else:
                VideoHandler.continue_writing(im0, flag_anyone_in_door)

            if view_img:
                cv2.imshow('image', im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            delta_time = (time.time() - t0)
            # t2_ds = time.time()
            # print('%s Torch:. (%.3fs)' % (s, t2 - t1))
            # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds))
            if len (fpeses) < 30:
                fpeses.append(round(1 / delta_time))
            elif len(fpeses) == 30:
                # fps = round(np.median(np.array(fpeses)))
                fps = np.median(np.array(fpeses))
                # fps = 3
                print('fps set: ', fps)
                VideoHandler.set_fps(fps)
                counter.set_fps(fps)
                fpeses.append(fps)
                motion_detection = True
            else:
                print('\nflag writing video: ', VideoHandler.flag_writing_video)
                print('flag stop writing: ', VideoHandler.flag_stop_writing)
                print('flag anyone in door: ', flag_anyone_in_door)
                print('counter frames indoor: ', VideoHandler.counter_frames_indoor)
Пример #23
0
def run_detection(od_model, event_detectors, frame_dir, frame_path_list,
                  fram_bbox_dir, json_dir, bbox_video_path):
    fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    video_writer = cv2.VideoWriter(bbox_video_path, fourcc, 20, (640, 360))

    frame_number = 0
    event_results = []
    cls_dict = get_cls_dict(15)
    bbox_visualization = BBoxVisualization(cls_dict)
    for i, frame_name in enumerate(frame_path_list):
        frame_number += 1
        frame = cv2.imread(os.path.join(frame_dir, frame_name))

        results = od_model.inference_by_image(frame)

        frame_bbox = bbox_visualization.draw_bboxes(frame, results)
        cv2.imwrite(os.path.join(fram_bbox_dir, frame_name), frame_bbox)
        video_writer.write(frame_bbox)

        dict_result = dict()
        dict_result["image_path"] = os.path.join(frame_dir, frame_name)
        dict_result["cam_address"] = video_path
        dict_result["module"] = od_model_name
        dict_result["frame_number"] = int(frame_number / extract_fps * fps)
        dict_result["timestamp"] = str(
            convert_framenumber2timestamp(frame_number / extract_fps * fps,
                                          fps))
        dict_result["results"] = []
        dict_result["results"].append({"detection_result": results})

        event_result = dict()
        event_result["cam_address"] = video_path
        event_result["frame_number"] = int(frame_number / extract_fps * fps)
        event_result["timestamp"] = str(
            convert_framenumber2timestamp(frame_number / extract_fps * fps,
                                          fps))
        event_result["event_result"] = dict()

        for event_detector in event_detectors:
            event_result["event_result"][
                event_detector.model_name] = event_detector.inference(
                    frame, dict_result)
        event_results.append(event_result)
        print(
            "\rframe number: {:>6}/{}\t/ extract frame number: {:>6}\t/ timestamp: {:>6}"
            .format(
                frame_number, len(frame_path_list),
                int(frame_number / extract_fps * fps),
                str(
                    convert_framenumber2timestamp(
                        frame_number / extract_fps * fps, fps))),
            end='')

        json_result_file = open(
            os.path.join(json_dir,
                         frame_name.split(".jpg")[0] + ".json"), "w")
        json.dump(dict_result, json_result_file, indent=4)
        json_result_file.close()
    video_writer.release()
    print()
    PrintLog.i("Extraction is successfully completed(framecount: {})".format(
        frame_number))
    if os.path.exists(bbox_video_path):
        PrintLog.i("BBox video is successfully generated(path: {})".format(
            bbox_video_path))
    else:
        PrintLog.i("BBox video is failed to generated.")
    return event_results