def run_demo(args): ie = IECore() detector_person = Detector(ie, path_to_model_xml=args.model_od, device=args.device, label_class=args.person_label) single_human_pose_estimator = HumanPoseEstimator( ie, path_to_model_xml=args.model_hpe, device=args.device) if args.input != '': img = cv2.imread(args.input[0], cv2.IMREAD_COLOR) frames_reader, delay = (VideoReader( args.input), 1) if img is None else (ImageReader(args.input), 0) else: raise ValueError('--input has to be set') for frame in frames_reader: bboxes = detector_person.detect(frame) human_poses = [ single_human_pose_estimator.estimate(frame, bbox) for bbox in bboxes ] colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0)] for pose, bbox in zip(human_poses, bboxes): cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2) print("{} keypoints".format(len(pose))) for id_kpt, kpt in enumerate(pose): print("Position: {} id:{}".format((int(kpt[0]), int(kpt[1])), id_kpt)) cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 6, colors[id_kpt], -1) cv2.putText( frame, 'summary: {:.1f} FPS (estimation: {:.1f} FPS / detection: {:.1f} FPS)' .format( float(1 / (detector_person.infer_time + single_human_pose_estimator.infer_time * len(human_poses))), float(1 / single_human_pose_estimator.infer_time), float(1 / detector_person.infer_time)), (5, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 200)) if args.no_show: continue cv2.imshow('Human Pose Estimation Demo', frame) key = cv2.waitKey(delay) if key == 27: return
def run_demo(args): ie = IECore() detector_person = Detector(ie, path_to_model_xml=args.model_od, device=args.device, label_class=args.person_label) single_human_pose_estimator = HumanPoseEstimator( ie, path_to_model_xml=args.model_hpe, device=args.device) cap = open_images_capture(args.input, args.loop) frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") delay = int(cap.get_type() in ('VIDEO', 'CAMERA')) video_writer = cv2.VideoWriter() if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") frames_processed = 0 presenter = monitors.Presenter(args.utilization_monitors, 25) while frame is not None: bboxes = detector_person.detect(frame) human_poses = [ single_human_pose_estimator.estimate(frame, bbox) for bbox in bboxes ] presenter.drawGraphs(frame) colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0)] for pose, bbox in zip(human_poses, bboxes): cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2) for id_kpt, kpt in enumerate(pose): cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 3, colors[id_kpt], -1) cv2.putText( frame, 'summary: {:.1f} FPS (estimation: {:.1f} FPS / detection: {:.1f} FPS)' .format( float(1 / (detector_person.infer_time + single_human_pose_estimator.infer_time * len(human_poses))), float(1 / single_human_pose_estimator.infer_time), float(1 / detector_person.infer_time)), (5, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 200)) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if not args.no_show: cv2.imshow('Human Pose Estimation Demo', frame) key = cv2.waitKey(delay) if key == 27: break presenter.handleKey(key) frame = cap.read() print(presenter.reportMeans())
def run_demo(args): cap = open_images_capture(args.input, args.loop) log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading Object Detection model {}'.format(args.model_od)) detector_person = Detector(core, args.model_od, device=args.device, label_class=args.person_label) log.info('The Object Detection model {} is loaded to {}'.format(args.model_od, args.device)) log.info('Reading Human Pose Estimation model {}'.format(args.model_hpe)) single_human_pose_estimator = HumanPoseEstimator(core, args.model_hpe, device=args.device) log.info('The Human Pose Estimation model {} is loaded to {}'.format(args.model_hpe, args.device)) delay = int(cap.get_type() in ('VIDEO', 'CAMERA')) video_writer = cv2.VideoWriter() frames_processed = 0 presenter = monitors.Presenter(args.utilization_monitors, 25) metrics = PerformanceMetrics() start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") while frame is not None: bboxes = detector_person.detect(frame) human_poses = [single_human_pose_estimator.estimate(frame, bbox) for bbox in bboxes] presenter.drawGraphs(frame) colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0)] for pose, bbox in zip(human_poses, bboxes): cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2) for id_kpt, kpt in enumerate(pose): cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 3, colors[id_kpt], -1) metrics.update(start_time, frame) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if not args.no_show: cv2.imshow('Human Pose Estimation Demo', frame) key = cv2.waitKey(delay) if key == 27: break presenter.handleKey(key) start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)