def main():
    current_dir = os.path.dirname(os.path.abspath(__file__))
    """Prepares data for the person recognition demo"""
    parser = argparse.ArgumentParser(description='Multi camera multi person \
                                                  tracking live demo script')
    parser.add_argument('-i',
                        type=str,
                        nargs='+',
                        help='Input sources (indexes \
                        of cameras or paths to video files)',
                        required=True)
    parser.add_argument('--config',
                        type=str,
                        default=os.path.join(current_dir, 'config.py'),
                        required=False,
                        help='Configuration file')

    parser.add_argument('--detections',
                        type=str,
                        help='JSON file with bounding boxes')

    parser.add_argument('-m',
                        '--m_detector',
                        type=str,
                        required=False,
                        help='Path to the person detection model')
    parser.add_argument('--t_detector',
                        type=float,
                        default=0.6,
                        help='Threshold for the person detection model')

    parser.add_argument('--m_segmentation',
                        type=str,
                        required=False,
                        help='Path to the person instance segmentation model')
    parser.add_argument(
        '--t_segmentation',
        type=float,
        default=0.6,
        help='Threshold for person instance segmentation model')

    parser.add_argument('--m_reid',
                        type=str,
                        required=True,
                        help='Path to the person re-identification model')

    parser.add_argument('--output_video',
                        type=str,
                        default='',
                        required=False,
                        help='Optional. Path to output video')
    parser.add_argument(
        '--history_file',
        type=str,
        default='',
        required=False,
        help='Optional. Path to file in JSON format to save results of the demo'
    )
    parser.add_argument(
        '--save_detections',
        type=str,
        default='',
        required=False,
        help='Optional. Path to file in JSON format to save bounding boxes')
    parser.add_argument("--no_show",
                        help="Optional. Don't show output",
                        action='store_true')

    parser.add_argument('-d', '--device', type=str, default='CPU')
    parser.add_argument('-l',
                        '--cpu_extension',
                        help='MKLDNN (CPU)-targeted custom layers.Absolute \
                              path to a shared library with the kernels impl.',
                        type=str,
                        default=None)
    parser.add_argument('-u',
                        '--utilization_monitors',
                        default='',
                        type=str,
                        help='Optional. List of monitors to show initially.')

    args = parser.parse_args()
    if check_detectors(args) != 1:
        sys.exit(1)

    if len(args.config):
        log.info('Reading configuration file {}'.format(args.config))
        config = read_py_config(args.config)
    else:
        log.error(
            'No configuration file specified. Please specify parameter \'--config\''
        )
        sys.exit(1)

    random.seed(config['random_seed'])
    capture = MulticamCapture(args.i)

    log.info("Creating Inference Engine")
    ie = IECore()

    if args.detections:
        person_detector = DetectionsFromFileReader(args.detections,
                                                   args.t_detector)
    elif args.m_segmentation:
        person_detector = MaskRCNN(ie, args.m_segmentation,
                                   args.t_segmentation,
                                   args.device, args.cpu_extension,
                                   capture.get_num_sources())
    else:
        person_detector = Detector(ie, args.m_detector, args.t_detector,
                                   args.device, args.cpu_extension,
                                   capture.get_num_sources())

    if args.m_reid:
        person_recognizer = VectorCNN(ie, args.m_reid, args.device,
                                      args.cpu_extension)
    else:
        person_recognizer = None

    run(args, config, capture, person_detector, person_recognizer)
    log.info('Demo finished successfully')
def main():
    parser = argparse.ArgumentParser(description='Whiteboard inpainting demo')
    parser.add_argument('-i', '--input', required=True,
                         help='Required. Path to a video file or a device node of a web-camera.')
    parser.add_argument('--loop', default=False, action='store_true',
                        help='Optional. Enable reading the input in a loop.')
    parser.add_argument('-o', '--output', required=False,
                        help='Optional. Name of the output file(s) to save.')
    parser.add_argument('-limit', '--output_limit', required=False, default=1000, type=int,
                        help='Optional. Number of frames to store in output. '
                             'If 0 is set, all frames are stored.')
    parser.add_argument('-m_i', '--m_instance_segmentation', type=str, required=False,
                        help='Required. Path to the instance segmentation model.')
    parser.add_argument('-m_s', '--m_semantic_segmentation', type=str, required=False,
                        help='Required. Path to the semantic segmentation model.')
    parser.add_argument('-t', '--threshold', type=float, default=0.6,
                        help='Optional. Threshold for person instance segmentation model.')
    parser.add_argument('--no_show', help="Optional. Don't show output.", action='store_true')
    parser.add_argument('-d', '--device', type=str, default='CPU',
                        help='Optional. Specify a target device to infer on. CPU, GPU, HDDL or MYRIAD is '
                             'acceptable. The demo will look for a suitable plugin for the device specified.')
    parser.add_argument('-l', '--cpu_extension', type=str, default=None,
                        help='MKLDNN (CPU)-targeted custom layers. Absolute \
                              path to a shared library with the kernels impl.')
    parser.add_argument('-u', '--utilization_monitors', default='', type=str,
                        help='Optional. List of monitors to show initially.')
    args = parser.parse_args()

    cap = open_images_capture(args.input, args.loop)
    if cap.get_type() not in ('VIDEO', 'CAMERA'):
        raise RuntimeError("The input should be a video file or a numeric camera ID")

    if bool(args.m_instance_segmentation) == bool(args.m_semantic_segmentation):
        raise ValueError('Set up exactly one of segmentation models: '
                         '--m_instance_segmentation or --m_semantic_segmentation')

    labels_dir = Path(__file__).resolve().parents[3] / 'data/dataset_classes'
    mouse = MouseClick()
    if not args.no_show:
        cv2.namedWindow(WINNAME)
        cv2.setMouseCallback(WINNAME, mouse.get_points)

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    model_path = args.m_instance_segmentation if args.m_instance_segmentation else args.m_semantic_segmentation
    log.info('Reading model {}'.format(model_path))
    if args.m_instance_segmentation:
        labels_file = str(labels_dir / 'coco_80cl_bkgr.txt')
        segmentation = MaskRCNN(core, args.m_instance_segmentation, labels_file,
                                args.threshold, args.device, args.cpu_extension)
    elif args.m_semantic_segmentation:
        labels_file = str(labels_dir / 'cityscapes_19cl_bkgr.txt')
        segmentation = SemanticSegmentation(core, args.m_semantic_segmentation, labels_file,
                                            args.threshold, args.device, args.cpu_extension)
    log.info('The model {} is loaded to {}'.format(model_path, args.device))

    metrics = PerformanceMetrics()
    video_writer = cv2.VideoWriter()
    black_board = False
    frame_number = 0
    key = -1

    start_time = perf_counter()
    frame = cap.read()
    if frame is None:
        raise RuntimeError("Can't read an image from the input")

    out_frame_size = (frame.shape[1], frame.shape[0] * 2)
    output_frame = np.full((frame.shape[0], frame.shape[1], 3), 255, dtype='uint8')
    presenter = monitors.Presenter(args.utilization_monitors, 20,
                                   (out_frame_size[0] // 4, out_frame_size[1] // 16))
    if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'),
                                             cap.fps(), out_frame_size):
        raise RuntimeError("Can't open video writer")

    while frame is not None:
        mask = None
        detections = segmentation.get_detections([frame])
        expand_mask(detections, frame.shape[1] // 27)
        if len(detections[0]) > 0:
            mask = detections[0][0][2]
            for i in range(1, len(detections[0])):
                mask = cv2.bitwise_or(mask, detections[0][i][2])

        if mask is not None:
            mask = np.stack([mask, mask, mask], axis=-1)
        else:
            mask = np.zeros(frame.shape, dtype='uint8')

        clear_frame = remove_background(frame, invert_colors=not black_board)

        output_frame = np.where(mask, output_frame, clear_frame)
        merged_frame = np.vstack([frame, output_frame])
        merged_frame = cv2.resize(merged_frame, out_frame_size)

        metrics.update(start_time, merged_frame)

        if video_writer.isOpened() and (args.output_limit <= 0 or frame_number <= args.output_limit-1):
            video_writer.write(merged_frame)

        presenter.drawGraphs(merged_frame)
        if not args.no_show:
            cv2.imshow(WINNAME, merged_frame)
            key = check_pressed_keys(key)
            if key == 27:  # 'Esc'
                break
            if key == ord('i'):  # catch pressing of key 'i'
                black_board = not black_board
                output_frame = 255 - output_frame
            else:
                presenter.handleKey(key)

        if mouse.crop_available:
            x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \
                     max(mouse.points[0][0], mouse.points[1][0])
            y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \
                     max(mouse.points[0][1], mouse.points[1][1])
            x1, y1 = min(x1, output_frame.shape[1] - 1), min(y1, output_frame.shape[0] - 1)
            board = output_frame[y0: y1, x0: x1, :]
            if board.shape[0] > 0 and board.shape[1] > 0:
                cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO)
                cv2.imshow('Board', board)

        frame_number += 1
        start_time = perf_counter()
        frame = cap.read()

    metrics.log_total()
    for rep in presenter.reportMeans():
        log.info(rep)
Пример #3
0
def main():
    current_dir = os.path.dirname(os.path.abspath(__file__))
    """Prepares data for the object tracking demo"""
    parser = argparse.ArgumentParser(description='Multi camera multi object \
                                                  tracking live demo script')
    parser.add_argument(
        '-i',
        '--input',
        required=True,
        nargs='+',
        help=
        'Required. Input sources (indexes of cameras or paths to video files)')
    parser.add_argument('--loop',
                        default=False,
                        action='store_true',
                        help='Optional. Enable reading the input in a loop')
    parser.add_argument('--config',
                        type=str,
                        default=os.path.join(current_dir, 'configs/person.py'),
                        required=False,
                        help='Configuration file')

    parser.add_argument('--detections',
                        type=str,
                        help='JSON file with bounding boxes')

    parser.add_argument('-m',
                        '--m_detector',
                        type=str,
                        required=False,
                        help='Path to the object detection model')
    parser.add_argument('--t_detector',
                        type=float,
                        default=0.6,
                        help='Threshold for the object detection model')

    parser.add_argument('--m_segmentation',
                        type=str,
                        required=False,
                        help='Path to the object instance segmentation model')
    parser.add_argument(
        '--t_segmentation',
        type=float,
        default=0.6,
        help='Threshold for object instance segmentation model')

    parser.add_argument(
        '--m_reid',
        type=str,
        required=True,
        help='Required. Path to the object re-identification model')

    parser.add_argument('--output_video',
                        type=str,
                        default='',
                        required=False,
                        help='Optional. Path to output video')
    parser.add_argument(
        '--history_file',
        type=str,
        default='',
        required=False,
        help='Optional. Path to file in JSON format to save results of the demo'
    )
    parser.add_argument(
        '--save_detections',
        type=str,
        default='',
        required=False,
        help='Optional. Path to file in JSON format to save bounding boxes')
    parser.add_argument("--no_show",
                        help="Optional. Don't show output",
                        action='store_true')

    parser.add_argument('-d', '--device', type=str, default='CPU')
    parser.add_argument('-u',
                        '--utilization_monitors',
                        default='',
                        type=str,
                        help='Optional. List of monitors to show initially.')

    args = parser.parse_args()
    if check_detectors(args) != 1:
        sys.exit(1)

    if len(args.config):
        log.debug('Reading config from {}'.format(args.config))
        config = read_py_config(args.config)
    else:
        log.error(
            'No configuration file specified. Please specify parameter \'--config\''
        )
        sys.exit(1)

    random.seed(config.random_seed)
    capture = MulticamCapture(args.input, args.loop)

    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    if args.detections:
        object_detector = DetectionsFromFileReader(args.detections,
                                                   args.t_detector)
    elif args.m_segmentation:
        object_detector = MaskRCNN(core, args.m_segmentation,
                                   config.obj_segm.trg_classes,
                                   args.t_segmentation, args.device,
                                   capture.get_num_sources())
    else:
        object_detector = Detector(core, args.m_detector,
                                   config.obj_det.trg_classes, args.t_detector,
                                   args.device, capture.get_num_sources())

    if args.m_reid:
        object_recognizer = VectorCNN(core, args.m_reid, args.device)
    else:
        object_recognizer = None

    run(args, config, capture, object_detector, object_recognizer)
def main():
    parser = argparse.ArgumentParser(description='Whiteboard inpainting demo')
    parser.add_argument('-i', type=str, help='Input sources (index of camera \
                        or path to a video file)', required=True)
    parser.add_argument('-m_i', '--m_instance_segmentation', type=str, required=False,
                        help='Path to the instance segmentation model')
    parser.add_argument('-m_s', '--m_semantic_segmentation', type=str, required=False,
                        help='Path to the semantic segmentation model')
    parser.add_argument('-t', '--threshold', type=float, default=0.6,
                        help='Threshold for person instance segmentation model')
    parser.add_argument('--output_video', type=str, default='', required=False,
                        help='Optional. Path to output video')
    parser.add_argument("--no_show", help="Optional. Don't show output", action='store_true')

    parser.add_argument('-d', '--device', type=str, default='CPU',
                        help='Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
                             'acceptable. The demo will look for a suitable plugin for the device specified')
    parser.add_argument('-l', '--cpu_extension', type=str, default=None,
                        help='MKLDNN (CPU)-targeted custom layers.Absolute \
                              path to a shared library with the kernels impl.')
    parser.add_argument('-u', '--utilization_monitors', default='', type=str,
                        help='Optional. List of monitors to show initially')
    args = parser.parse_args()

    capture = VideoCapture(args.i)

    if bool(args.m_instance_segmentation) == bool(args.m_semantic_segmentation):
        raise ValueError('Set up exactly one of segmentation models: '\
                         '--m_instance_segmentation or --m_semantic_segmentation')

    frame_size, fps = capture.get_source_parameters()
    out_frame_size = (int(frame_size[0]), int(frame_size[1] * 2))
    presenter = monitors.Presenter(args.utilization_monitors, 20,
                                   (out_frame_size[0] // 4, out_frame_size[1] // 16))

    root_dir = osp.dirname(osp.abspath(__file__))

    mouse = MouseClick()
    if not args.no_show:
        cv2.namedWindow(WINNAME)
        cv2.setMouseCallback(WINNAME, mouse.get_points)

    if args.output_video:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        output_video = cv2.VideoWriter(args.output_video, fourcc, fps, out_frame_size)
    else:
        output_video = None

    log.info("Initializing Inference Engine")
    ie = IECore()
    if args.m_instance_segmentation:
        labels_file = osp.join(root_dir, 'coco_labels.txt')
        segmentation = MaskRCNN(ie, args.m_instance_segmentation, labels_file,
                                args.threshold, args.device, args.cpu_extension)
    elif args.m_semantic_segmentation:
        labels_file = osp.join(root_dir, 'cityscapes_labels.txt')
        segmentation = SemanticSegmentation(ie, args.m_semantic_segmentation, labels_file,
                                            args.threshold, args.device, args.cpu_extension)

    black_board = False
    output_frame = np.full((frame_size[1], frame_size[0], 3), 255, dtype='uint8')
    frame_number = 0
    key = -1

    while True:
        start = time.time()
        _, frame = capture.get_frame()

        mask = None
        if frame is not None:
            detections = segmentation.get_detections([frame])
            expand_mask(detections, frame_size[0] // 27)
            if len(detections[0]) > 0:
                mask = detections[0][0][2]
                for i in range(1, len(detections[0])):
                    mask = cv2.bitwise_or(mask, detections[0][i][2])
        else:
            break

        if mask is not None:
            mask = np.stack([mask, mask, mask], axis=-1)
        else:
            mask = np.zeros(frame.shape, dtype='uint8')

        clear_frame = remove_background(frame, invert_colors=not black_board)

        output_frame = np.where(mask, output_frame, clear_frame)
        merged_frame = np.vstack([frame, output_frame])
        merged_frame = cv2.resize(merged_frame, out_frame_size)

        if output_video is not None:
            output_video.write(merged_frame)

        presenter.drawGraphs(merged_frame)
        if not args.no_show:
            cv2.imshow(WINNAME, merged_frame)
            key = check_pressed_keys(key)
            if key == 27:  # 'Esc'
                break
            if key == ord('i'):  # catch pressing of key 'i'
                black_board = not black_board
                output_frame = 255 - output_frame
            else:
                presenter.handleKey(key)

        if mouse.crop_available:
            x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \
                     max(mouse.points[0][0], mouse.points[1][0])
            y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \
                     max(mouse.points[0][1], mouse.points[1][1])
            x1, y1 = min(x1, output_frame.shape[1] - 1), min(y1, output_frame.shape[0] - 1)
            board = output_frame[y0: y1, x0: x1, :]
            if board.shape[0] > 0 and board.shape[1] > 0:
                cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO)
                cv2.imshow('Board', board)

        end = time.time()
        print('\rProcessing frame: {}, fps = {:.3}' \
            .format(frame_number, 1. / (end - start)), end="")
        frame_number += 1
    print('')

    log.info(presenter.reportMeans())

    if output_video is not None:
        output_video.release()