def __init__(self, env, Q): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.Q = Q # Preformance Metrics -------- self.performance = PerformanceMetrics() self.iter = 0 self.score = 0
def main(): args = build_argparser().parse_args() log.info('Initializing Inference Engine...') ie = IECore() plugin_config = get_plugin_configs(args.device, args.num_streams, args.num_threads) log.info('Loading network...') model = get_model(ie, args) has_landmarks = args.architecture_type == 'retina' detector_pipeline = AsyncPipeline(ie, model, plugin_config, device=args.device, max_num_requests=args.num_infer_requests) try: input_stream = int(args.input) except ValueError: input_stream = args.input cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): log.error('OpenCV: Failed to open capture: ' + str(input_stream)) sys.exit(1) next_frame_id = 0 next_frame_id_to_show = 0 log.info('Starting inference...') print("To close the application, press 'CTRL+C' here or switch to the output window and press ESC key") palette = ColorPalette(len(model.labels) if model.labels else 100) presenter = monitors.Presenter(args.utilization_monitors, 55, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) / 4), round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / 8))) metrics = PerformanceMetrics() while cap.isOpened(): if detector_pipeline.callback_exceptions: raise detector_pipeline.callback_exceptions[0] # Process all completed requests results = detector_pipeline.get_result(next_frame_id_to_show) if results: objects, frame_meta = results frame = frame_meta['frame'] start_time = frame_meta['start_time'] if len(objects) and args.raw_output_message: print_raw_results(frame.shape[:2], objects, model.labels, args.prob_threshold) presenter.drawGraphs(frame) frame = draw_detections(frame, objects, palette, model.labels, args.prob_threshold, has_landmarks) metrics.update(start_time, frame) if not args.no_show: cv2.imshow('Detection Results', frame) key = cv2.waitKey(1) ESC_KEY = 27 # Quit. if key in {ord('q'), ord('Q'), ESC_KEY}: break presenter.handleKey(key) next_frame_id_to_show += 1 continue if detector_pipeline.is_ready(): # Get new image/frame start_time = perf_counter() ret, frame = cap.read() if not ret: if args.loop: cap.open(input_stream) else: cap.release() continue # Submit for inference detector_pipeline.submit_data(frame, next_frame_id, {'frame': frame, 'start_time': start_time}) next_frame_id += 1 else: # Wait for empty request detector_pipeline.await_any() detector_pipeline.await_all() # Process completed requests while detector_pipeline.has_completed_request(): results = detector_pipeline.get_result(next_frame_id_to_show) if results: objects, frame_meta = results frame = frame_meta['frame'] start_time = frame_meta['start_time'] if len(objects) and args.raw_output_message: print_raw_results(frame.shape[:2], objects, model.labels, args.prob_threshold) presenter.drawGraphs(frame) frame = draw_detections(frame, objects, palette, model.labels, args.prob_threshold, has_landmarks) metrics.update(start_time, frame) if not args.no_show: cv2.imshow('Detection Results', frame) key = cv2.waitKey(1) ESC_KEY = 27 # Quit. if key in {ord('q'), ord('Q'), ESC_KEY}: break presenter.handleKey(key) next_frame_id_to_show += 1 else: break metrics.print_total() print(presenter.reportMeans())
def main(): args = build_argparser().parse_args() log.info('Initializing Inference Engine...') ie = IECore() plugin_config = get_user_config(args.device, args.num_streams, args.num_threads) cap = open_images_capture(args.input, args.loop) start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") log.info('Loading network...') model = Deblurring(ie, args.model, frame.shape) pipeline = AsyncPipeline(ie, model, plugin_config, device=args.device, max_num_requests=args.num_infer_requests) log.info('Starting inference...') print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) pipeline.submit_data(frame, 0, {'frame': frame, 'start_time': start_time}) next_frame_id = 1 next_frame_id_to_show = 0 metrics = PerformanceMetrics() presenter = monitors.Presenter( args.utilization_monitors, 55, (round(frame.shape[1] / 4), round(frame.shape[0] / 8))) video_writer = cv2.VideoWriter() if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (2 * frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") while True: if pipeline.is_ready(): # Get new image/frame start_time = perf_counter() frame = cap.read() if frame is None: break # Submit for inference pipeline.submit_data(frame, next_frame_id, { 'frame': frame, 'start_time': start_time }) next_frame_id += 1 else: # Wait for empty request pipeline.await_any() if pipeline.callback_exceptions: raise pipeline.callback_exceptions[0] # Process all completed requests results = pipeline.get_result(next_frame_id_to_show) if results: result_frame, frame_meta = results input_frame = frame_meta['frame'] start_time = frame_meta['start_time'] if input_frame.shape != result_frame.shape: input_frame = cv2.resize( input_frame, (result_frame.shape[1], result_frame.shape[0])) final_image = cv2.hconcat([input_frame, result_frame]) presenter.drawGraphs(final_image) metrics.update(start_time, final_image) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(final_image) if not args.no_show: cv2.imshow('Deblurring Results', final_image) key = cv2.waitKey(1) if key == 27 or key == 'q' or key == 'Q': break presenter.handleKey(key) next_frame_id_to_show += 1 pipeline.await_all() # Process completed requests while pipeline.has_completed_request(): results = pipeline.get_result(next_frame_id_to_show) if results: result_frame, frame_meta = results input_frame = frame_meta['frame'] start_time = frame_meta['start_time'] if input_frame.shape != result_frame.shape: input_frame = cv2.resize( input_frame, (result_frame.shape[1], result_frame.shape[0])) final_image = cv2.hconcat([input_frame, result_frame]) presenter.drawGraphs(final_image) metrics.update(start_time, final_image) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(final_image) if not args.no_show: cv2.imshow('Deblurring Results', final_image) key = cv2.waitKey(1) next_frame_id_to_show += 1 else: break metrics.print_total() print(presenter.reportMeans())
def main(): args = build_argparser().parse_args() log.info('Initializing Inference Engine...') ie = IECore() plugin_config = get_plugin_configs(args.device, args.num_streams, args.num_threads) log.info('Loading network...') model = get_model(ie, args) detector_pipeline = AsyncPipeline(ie, model, plugin_config, device=args.device, max_num_requests=args.num_infer_requests) cap = open_images_capture(args.input, args.loop) next_frame_id = 0 next_frame_id_to_show = 0 log.info('Starting inference...') print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) palette = ColorPalette(len(model.labels) if model.labels else 100) metrics = PerformanceMetrics() presenter = None video_writer = cv2.VideoWriter() while True: if detector_pipeline.callback_exceptions: raise detector_pipeline.callback_exceptions[0] # Process all completed requests results = detector_pipeline.get_result(next_frame_id_to_show) if results: objects, frame_meta = results frame = frame_meta['frame'] start_time = frame_meta['start_time'] if len(objects) and args.raw_output_message: print_raw_results(frame.shape[:2], objects, model.labels, args.prob_threshold) presenter.drawGraphs(frame) frame = draw_detections(frame, objects, palette, model.labels, args.prob_threshold) metrics.update(start_time, frame) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(frame) if not args.no_show: cv2.imshow('Detection Results', frame) key = cv2.waitKey(1) ESC_KEY = 27 # Quit. if key in {ord('q'), ord('Q'), ESC_KEY}: break presenter.handleKey(key) next_frame_id_to_show += 1 continue if detector_pipeline.is_ready(): # Get new image/frame start_time = perf_counter() frame = cap.read() if frame is None: if next_frame_id == 0: raise ValueError("Can't read an image from the input") break if next_frame_id == 0: presenter = monitors.Presenter( args.utilization_monitors, 55, (round(frame.shape[1] / 4), round(frame.shape[0] / 8))) if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") # Submit for inference detector_pipeline.submit_data(frame, next_frame_id, { 'frame': frame, 'start_time': start_time }) next_frame_id += 1 else: # Wait for empty request detector_pipeline.await_any() detector_pipeline.await_all() # Process completed requests while detector_pipeline.has_completed_request(): results = detector_pipeline.get_result(next_frame_id_to_show) if results: objects, frame_meta = results frame = frame_meta['frame'] start_time = frame_meta['start_time'] if len(objects) and args.raw_output_message: print_raw_results(frame.shape[:2], objects, model.labels, args.prob_threshold) presenter.drawGraphs(frame) frame = draw_detections(frame, objects, palette, model.labels, args.prob_threshold) metrics.update(start_time, frame) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(frame) if not args.no_show: cv2.imshow('Detection Results', frame) key = cv2.waitKey(1) ESC_KEY = 27 # Quit. if key in {ord('q'), ord('Q'), ESC_KEY}: break presenter.handleKey(key) next_frame_id_to_show += 1 else: break metrics.print_total() print(presenter.reportMeans())
def main(): args = build_argparser().parse_args() log.info('Initializing Inference Engine...') ie = IECore() plugin_config = get_plugin_configs(args.device, args.num_streams, args.num_threads) log.info('Loading network...') model = get_model(ie, args) detector_pipeline = AsyncPipeline(ie, model, plugin_config, device=args.device, max_num_requests=args.num_infer_requests) ### READ TIME ### read_time_start = time.time() cap = open_images_capture(args.input, args.loop) read_time_end = time.time() next_frame_id = 0 next_frame_id_to_show = 0 image_id = 0 log.info('Starting inference...') print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) palette = ColorPalette(len(model.labels) if model.labels else 100) metrics = PerformanceMetrics() presenter = None video_writer = cv2.VideoWriter() results_list = [] detection_ids = [1, 3, 4] all_starts = 0 while True: print('NEXT FRAME ID', next_frame_id) id = images[image_id] if next_frame_id == 5000: break if detector_pipeline.callback_exceptions: raise detector_pipeline.callback_exceptions[0] # Process all completed requests #### DETECTION TIME #### detect_time_start = time.time() results = detector_pipeline.get_result(next_frame_id_to_show) detect_time_end = time.time() detect_time_list.append(detect_time_end - detect_time_start) if results: objects, frame_meta = results for detection in objects: x = float(detection.xmin) y = float(detection.ymin) w = float(detection.xmax - detection.xmin) h = float(detection.ymax - detection.ymin) cls = detection.id cls = yolo_to_ssd_classes[cls] id = str(id.lstrip('0').split('.')[0]) conf = detection.score # if cls in detection_ids: results_list.append({ 'image_id': int(id), 'category_id': cls, 'bbox': [x, y, w, h], 'score': float(conf) }) frame = frame_meta['frame'] post_process_start = time.time() start_time = frame_meta['start_time'] all_starts += start_time all_starts += start_time if len(objects) and args.raw_output_message: print_raw_results(frame.shape[:2], objects, model.labels, args.prob_threshold, images[image_id]) presenter.drawGraphs(frame) frame = draw_detections(frame, objects, palette, model.labels, args.prob_threshold, images[image_id]) metrics.update(start_time, frame) post_process_end = time.time() post_process_list.append(post_process_end - post_process_start) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(frame) if not args.no_show: # cv2.imshow('Detection Results', frame) cv2.imwrite( f"/home/sovit/my_data/Data_Science/Projects/openvino_experiments/model_quantization/data/images/image_{image_id}.jpg", frame) # key = cv2.waitKey(1) ESC_KEY = 27 # Quit. #if key in {ord('q'), ord('Q'), ESC_KEY}: #break #presenter.handleKey(key) next_frame_id_to_show += 1 image_id += 1 continue if detector_pipeline.is_ready(): # Get new image/frame pre_process_start = time.time() start_time = perf_counter() frame = cap.read() if frame is None: if next_frame_id == 0: raise ValueError("Can't read an image from the input") break if next_frame_id == 0: presenter = monitors.Presenter( args.utilization_monitors, 55, (round(frame.shape[1] / 4), round(frame.shape[0] / 8))) if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") # Submit for inference detector_pipeline.submit_data(frame, next_frame_id, { 'frame': frame, 'start_time': start_time }) pre_process_end = time.time() pre_process_list.append(pre_process_end - pre_process_start) next_frame_id += 1 else: # Wait for empty request detector_pipeline.await_any() results_file = 'results.json' with open(results_file, 'w') as f: f.write(json.dumps(results_list, indent=4)) detector_pipeline.await_all() # Process completed requests while detector_pipeline.has_completed_request(): results = detector_pipeline.get_result(next_frame_id_to_show) if results: objects, frame_meta = results frame = frame_meta['frame'] post_process_two_start = time.time() start_time = frame_meta['start_time'] if len(objects) and args.raw_output_message: print() # print_raw_results(frame.shape[:2], objects, model.labels, args.prob_threshold) presenter.drawGraphs(frame) # frame = draw_detections(frame, objects, palette, model.labels, args.prob_threshold) metrics.update(start_time, frame) post_process_two_end = time.time() post_process_list_two.append(post_process_two_end - post_process_two_start) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(frame) if not args.no_show: # cv2.imshow('Detection Results', frame) cv2.imwrite( f"/home/sovit/my_data/Data_Science/Projects/openvino_experiments/model_quantization/data/images/image_{frame_id}.jpg", frame) # key = cv2.waitKey(1) ESC_KEY = 27 # Quit. if key in {ord('q'), ord('Q'), ESC_KEY}: break presenter.handleKey(key) next_frame_id_to_show += 1 else: break metrics.print_total() print("Presentor", presenter.reportMeans())
def main(): args = build_argparser().parse_args() cap = open_images_capture(args.input, args.loop) frame_processor = FrameProcessor(args) log.info('Starting inference...') print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) frame_num = 0 metrics = PerformanceMetrics() print_perf_stats = args.perf_stats presenter = None output_transform = None input_crop = None if args.crop_size[0] > 0 and args.crop_size[1] > 0: input_crop = np.array(args.crop_size) elif not (args.crop_size[0] == 0 and args.crop_size[1] == 0): raise ValueError('Both crop height and width should be positive') video_writer = cv2.VideoWriter() while True: start_time = perf_counter() frame = cap.read() if frame is None: if frame_num == 0: raise ValueError("Can't read an image from the input") break if input_crop: frame = center_crop(frame, input_crop) if frame_num == 0: output_transform = OutputTransform(frame.shape[:2], args.output_resolution) if args.output_resolution: output_resolution = output_transform.new_resolution else: output_resolution = (frame.shape[1], frame.shape[0]) presenter = monitors.Presenter(args.utilization_monitors, 55, (round(output_resolution[0] / 4), round(output_resolution[1] / 8))) if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), output_resolution): raise RuntimeError("Can't open video writer") detections = frame_processor.process(frame) presenter.drawGraphs(frame) frame = draw_detections(frame, frame_processor, detections, output_transform) metrics.update(start_time, frame) frame_num += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frame_num <= args.output_limit): video_writer.write(frame) if print_perf_stats: log.info('Performance stats:') log.info(frame_processor.get_performance_stats()) if not args.no_show: cv2.imshow('Face recognition demo', frame) key = cv2.waitKey(1) # Quit if key in {ord('q'), ord('Q'), 27}: break presenter.handleKey(key) metrics.print_total() print(presenter.reportMeans())
def main(): metrics = PerformanceMetrics() args = build_argparser().parse_args() log.info('Initializing Inference Engine...') ie = IECore() plugin_config = get_user_config(args.device, args.num_streams, args.num_threads) log.info('Loading network...') model, visualizer = get_model(ie, args) pipeline = AsyncPipeline(ie, model, plugin_config, device=args.device, max_num_requests=args.num_infer_requests) cap = open_images_capture(args.input, args.loop) next_frame_id = 0 next_frame_id_to_show = 0 log.info('Starting inference...') print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) presenter = None output_transform = None video_writer = cv2.VideoWriter() only_masks = args.only_masks while True: if pipeline.is_ready(): # Get new image/frame start_time = perf_counter() frame = cap.read() if frame is None: if next_frame_id == 0: raise ValueError("Can't read an image from the input") break if next_frame_id == 0: output_transform = OutputTransform(frame.shape[:2], args.output_resolution) if args.output_resolution: output_resolution = output_transform.new_resolution else: output_resolution = (frame.shape[1], frame.shape[0]) presenter = monitors.Presenter( args.utilization_monitors, 55, (round(output_resolution[0] / 4), round(output_resolution[1] / 8))) if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), output_resolution): raise RuntimeError("Can't open video writer") # Submit for inference pipeline.submit_data(frame, next_frame_id, { 'frame': frame, 'start_time': start_time }) next_frame_id += 1 else: # Wait for empty request pipeline.await_any() if pipeline.callback_exceptions: raise pipeline.callback_exceptions[0] # Process all completed requests results = pipeline.get_result(next_frame_id_to_show) if results: objects, frame_meta = results frame = frame_meta['frame'] start_time = frame_meta['start_time'] frame = render_segmentation(frame, objects, visualizer, output_transform, only_masks) presenter.drawGraphs(frame) metrics.update(start_time, frame) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(frame) next_frame_id_to_show += 1 if not args.no_show: cv2.imshow('Segmentation Results', frame) key = cv2.waitKey(1) if key == 27 or key == 'q' or key == 'Q': break if key == 9: only_masks = not only_masks presenter.handleKey(key) pipeline.await_all() # Process completed requests for next_frame_id_to_show in range(next_frame_id_to_show, next_frame_id): results = pipeline.get_result(next_frame_id_to_show) while results is None: results = pipeline.get_result(next_frame_id_to_show) objects, frame_meta = results frame = frame_meta['frame'] start_time = frame_meta['start_time'] frame = render_segmentation(frame, objects, visualizer, output_transform, only_masks) presenter.drawGraphs(frame) metrics.update(start_time, frame) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(frame) if not args.no_show: cv2.imshow('Segmentation Results', frame) key = cv2.waitKey(1) metrics.print_total() print(presenter.reportMeans())
def main(): args = build_argparser().parse_args() # ------------- 1. Plugin initialization for specified device and load extensions library if specified ------------- log.info("Creating Inference Engine...") ie = IECore() config_user_specified = {} config_min_latency = {} devices_nstreams = {} if args.num_streams: devices_nstreams = {device: args.num_streams for device in ['CPU', 'GPU'] if device in args.device} \ if args.num_streams.isdigit() \ else dict([device.split(':') for device in args.num_streams.split(',')]) if 'CPU' in args.device: if args.cpu_extension: ie.add_extension(args.cpu_extension, 'CPU') if args.number_threads is not None: config_user_specified['CPU_THREADS_NUM'] = str(args.number_threads) if 'CPU' in devices_nstreams: config_user_specified['CPU_THROUGHPUT_STREAMS'] = devices_nstreams['CPU'] \ if int(devices_nstreams['CPU']) > 0 \ else 'CPU_THROUGHPUT_AUTO' config_min_latency['CPU_THROUGHPUT_STREAMS'] = '1' if 'GPU' in args.device: if 'GPU' in devices_nstreams: config_user_specified['GPU_THROUGHPUT_STREAMS'] = devices_nstreams['GPU'] \ if int(devices_nstreams['GPU']) > 0 \ else 'GPU_THROUGHPUT_AUTO' config_min_latency['GPU_THROUGHPUT_STREAMS'] = '1' # -------------------- 2. Reading the IR generated by the Model Optimizer (.xml and .bin files) -------------------- log.info("Loading network") net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin") output_info = get_output_info(net) assert len( net.input_info ) == 1, "Sample supports only YOLO V3 based single input topologies" # ---------------------------------------------- 3. Preparing inputs ----------------------------------------------- log.info("Preparing inputs") input_blob = next(iter(net.input_info)) # Read and pre-process input images if net.input_info[input_blob].input_data.shape[1] == 3: input_height, input_width = net.input_info[ input_blob].input_data.shape[2:] nchw_shape = True else: input_height, input_width = net.input_info[ input_blob].input_data.shape[1:3] nchw_shape = False if args.labels: with open(args.labels, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None input_stream = 0 if args.input == "cam" else args.input mode = Mode(Modes.USER_SPECIFIED) cap = cv2.VideoCapture(input_stream) wait_key_time = 1 # ----------------------------------------- 4. Loading model to the plugin ----------------------------------------- log.info("Loading model to the plugin") exec_nets = {} exec_nets[Modes.USER_SPECIFIED] = ie.load_network( network=net, device_name=args.device, config=config_user_specified, num_requests=args.num_infer_requests) exec_nets[Modes.MIN_LATENCY] = ie.load_network( network=net, device_name=args.device.split(":")[-1].split(",")[0], config=config_min_latency, num_requests=1) empty_requests = deque(exec_nets[mode.current].requests) completed_request_results = {} next_frame_id = 0 next_frame_id_to_show = 0 mode_metrics = {mode.current: PerformanceMetrics()} prev_mode_active_request_count = 0 event = threading.Event() callback_exceptions = [] # ----------------------------------------------- 5. Doing inference ----------------------------------------------- log.info("Starting inference...") print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) print( "To switch between min_latency/user_specified modes, press TAB key in the output window" ) presenter = monitors.Presenter( args.utilization_monitors, 55, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) / 4), round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / 8))) while (cap.isOpened() \ or completed_request_results \ or len(empty_requests) < len(exec_nets[mode.current].requests)) \ and not callback_exceptions: if next_frame_id_to_show in completed_request_results: frame, output, start_time, is_same_mode = completed_request_results.pop( next_frame_id_to_show) next_frame_id_to_show += 1 objects = get_objects(output, output_info, (input_height, input_width), frame.shape[:-1], args.prob_threshold, args.keep_aspect_ratio) objects = filter_objects(objects, args.iou_threshold, args.prob_threshold) if len(objects) and args.raw_output_message: log.info( " Class ID | Confidence | XMIN | YMIN | XMAX | YMAX | COLOR " ) origin_im_size = frame.shape[:-1] presenter.drawGraphs(frame) for obj in objects: # Validation bbox of detected object obj['xmax'] = min(obj['xmax'], origin_im_size[1]) obj['ymax'] = min(obj['ymax'], origin_im_size[0]) obj['xmin'] = max(obj['xmin'], 0) obj['ymin'] = max(obj['ymin'], 0) color = (min(obj['class_id'] * 12.5, 255), min(obj['class_id'] * 7, 255), min(obj['class_id'] * 5, 255)) det_label = labels_map[obj['class_id']] if labels_map and len(labels_map) >= obj['class_id'] else \ str(obj['class_id']) if args.raw_output_message: log.info( "{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} | {} ". format(det_label, obj['confidence'], obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], color)) cv2.rectangle(frame, (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), color, 2) cv2.putText( frame, "#" + det_label + ' ' + str(round(obj['confidence'] * 100, 1)) + ' %', (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1) helpers.put_highlighted_text(frame, "{} mode".format(mode.current.name), (10, int(origin_im_size[0] - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.75, (10, 10, 200), 2) if is_same_mode and prev_mode_active_request_count == 0: mode_metrics[mode.current].update(start_time, frame) else: mode_metrics[mode.get_other()].update(start_time, frame) prev_mode_active_request_count -= 1 helpers.put_highlighted_text( frame, "Switching modes, please wait...", (10, int(origin_im_size[0] - 50)), cv2.FONT_HERSHEY_COMPLEX, 0.75, (10, 200, 10), 2) if not args.no_show: cv2.imshow("Detection Results", frame) key = cv2.waitKey(wait_key_time) if key in {ord("q"), ord("Q"), 27}: # ESC key break if key == 9: # Tab key if prev_mode_active_request_count == 0: prev_mode = mode.current mode.switch() prev_mode_active_request_count = len( exec_nets[prev_mode].requests) - len( empty_requests) empty_requests.clear() empty_requests.extend(exec_nets[mode.current].requests) mode_metrics[mode.current] = PerformanceMetrics() else: presenter.handleKey(key) elif empty_requests and prev_mode_active_request_count == 0 and cap.isOpened( ): start_time = perf_counter() ret, frame = cap.read() if not ret: if args.loop: cap.open(input_stream) else: cap.release() continue request = empty_requests.popleft() # resize input_frame to network size in_frame = preprocess_frame(frame, input_height, input_width, nchw_shape, args.keep_aspect_ratio) # Start inference request.set_completion_callback( py_callback=async_callback, py_data=(request, next_frame_id, mode.current, frame, start_time, completed_request_results, empty_requests, mode, event, callback_exceptions)) request.async_infer(inputs={input_blob: in_frame}) next_frame_id += 1 else: event.wait() event.clear() if callback_exceptions: raise callback_exceptions[0] for mode, metrics in mode_metrics.items(): print("\nMode: {}".format(mode.name)) metrics.print_total() print(presenter.reportMeans()) for exec_net in exec_nets.values(): await_requests_completion(exec_net.requests)
def main(): metrics = PerformanceMetrics() args = build_argparser().parse_args() # Plugin initialization for specified device and load extensions library if specified log.info("Creating Inference Engine") ie = IECore() # Read IR log.info("Loading network files:\n\t{}".format(args.model_pnet)) p_net = ie.read_network(args.model_pnet) assert len(p_net.input_info.keys() ) == 1, "Pnet supports only single input topologies" assert len(p_net.outputs) == 2, "Pnet supports two output topologies" log.info("Loading network files:\n\t{}".format(args.model_rnet)) r_net = ie.read_network(args.model_rnet) assert len(r_net.input_info.keys() ) == 1, "Rnet supports only single input topologies" assert len(r_net.outputs) == 2, "Rnet supports two output topologies" log.info("Loading network files:\n\t{}".format(args.model_onet)) o_net = ie.read_network(args.model_onet) assert len(o_net.input_info.keys() ) == 1, "Onet supports only single input topologies" assert len(o_net.outputs) == 3, "Onet supports three output topologies" log.info("Preparing input blobs") pnet_input_blob = next(iter(p_net.input_info)) rnet_input_blob = next(iter(r_net.input_info)) onet_input_blob = next(iter(o_net.input_info)) log.info("Preparing output blobs") for name, blob in p_net.outputs.items(): if blob.shape[1] == 2: pnet_cls_name = name elif blob.shape[1] == 4: pnet_roi_name = name else: raise RuntimeError("Unsupported output layer for Pnet") for name, blob in r_net.outputs.items(): if blob.shape[1] == 2: rnet_cls_name = name elif blob.shape[1] == 4: rnet_roi_name = name else: raise RuntimeError("Unsupported output layer for Rnet") for name, blob in o_net.outputs.items(): if blob.shape[1] == 2: onet_cls_name = name elif blob.shape[1] == 4: onet_roi_name = name elif blob.shape[1] == 10: onet_pts_name = name else: raise RuntimeError("Unsupported output layer for Onet") cap = open_images_capture(args.input, args.loop) next_frame_id = 0 log.info('Starting inference...') print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) presenter = None video_writer = cv2.VideoWriter() while True: start_time = perf_counter() origin_image = cap.read() if origin_image is None: if next_frame_id == 0: raise ValueError("Can't read an image from the input") break if next_frame_id == 0: presenter = monitors.Presenter(args.utilization_monitors, 55, (round(origin_image.shape[1] / 4), round(origin_image.shape[0] / 8))) if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (origin_image.shape[1], origin_image.shape[0])): raise RuntimeError("Can't open video writer") next_frame_id += 1 rgb_image = cv2.cvtColor(origin_image, cv2.COLOR_BGR2RGB) oh, ow, _ = rgb_image.shape scales = utils.calculate_scales(rgb_image) # ************************************* # Pnet stage # ************************************* log.info("Loading Pnet model to the plugin") t0 = cv2.getTickCount() pnet_res = [] for scale in scales: hs = int(oh * scale) ws = int(ow * scale) image = preprocess_image(rgb_image, ws, hs) p_net.reshape({pnet_input_blob: [1, 3, ws, hs]}) # Change weidth and height of input blob exec_pnet = ie.load_network(network=p_net, device_name=args.device) p_res = exec_pnet.infer(inputs={pnet_input_blob: image}) pnet_res.append(p_res) image_num = len(scales) rectangles = [] for i in range(image_num): roi = pnet_res[i][pnet_roi_name] cls = pnet_res[i][pnet_cls_name] _, _, out_h, out_w = cls.shape out_side = max(out_h, out_w) rectangle = utils.detect_face_12net(cls[0][1], roi[0], out_side, 1 / scales[i], ow, oh, score_threshold[0], iou_threshold[0]) rectangles.extend(rectangle) rectangles = utils.NMS(rectangles, iou_threshold[1], 'iou') # Rnet stage if len(rectangles) > 0: log.info("Loading Rnet model to the plugin") r_net.reshape({rnet_input_blob: [len(rectangles), 3, 24, 24]}) # Change batch size of input blob exec_rnet = ie.load_network(network=r_net, device_name=args.device) rnet_input = [] for rectangle in rectangles: crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] crop_img = preprocess_image(crop_img, 24, 24) rnet_input.extend(crop_img) rnet_res = exec_rnet.infer(inputs={rnet_input_blob: rnet_input}) roi = rnet_res[rnet_roi_name] cls = rnet_res[rnet_cls_name] rectangles = utils.filter_face_24net(cls, roi, rectangles, ow, oh, score_threshold[1], iou_threshold[2]) # Onet stage if len(rectangles) > 0: log.info("Loading Onet model to the plugin") o_net.reshape({onet_input_blob: [len(rectangles), 3, 48, 48]}) # Change batch size of input blob exec_onet = ie.load_network(network=o_net, device_name=args.device) onet_input = [] for rectangle in rectangles: crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] crop_img = preprocess_image(crop_img, 48, 48) onet_input.extend(crop_img) onet_res = exec_onet.infer(inputs={onet_input_blob: onet_input}) roi = onet_res[onet_roi_name] cls = onet_res[onet_cls_name] pts = onet_res[onet_pts_name] rectangles = utils.filter_face_48net(cls, roi, pts, rectangles, ow, oh, score_threshold[2], iou_threshold[3]) # display results for rectangle in rectangles: # Draw detected boxes cv2.putText(origin_image, 'confidence: {:.2f}'.format(rectangle[4]), (int(rectangle[0]), int(rectangle[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0)) cv2.rectangle(origin_image, (int(rectangle[0]), int(rectangle[1])), (int(rectangle[2]), int(rectangle[3])), (255, 0, 0), 1) # Draw landmarks for i in range(5, 15, 2): cv2.circle(origin_image, (int(rectangle[i + 0]), int(rectangle[i + 1])), 2, (0, 255, 0)) infer_time = (cv2.getTickCount() - t0) / cv2.getTickFrequency() # Record infer time cv2.putText(origin_image, 'summary: {:.1f} FPS'.format(1.0 / infer_time), (5, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 200)) if video_writer.isOpened() and (args.output_limit <= 0 or next_frame_id <= args.output_limit - 1): video_writer.write(origin_image) if not args.no_show: cv2.imshow('MTCNN Results', origin_image) key = cv2.waitKey(1) if key in {ord('q'), ord('Q'), 27}: break presenter.handleKey(key) metrics.update(start_time, origin_image) metrics.print_total()
def main(): args = build_argparser().parse_args() metrics = PerformanceMetrics() log.info('Initializing Inference Engine...') ie = IECore() plugin_config = get_user_config(args.device, args.num_streams, args.num_threads) cap = open_images_capture(args.input, args.loop) start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") log.info('Loading network...') model = get_model(ie, args, frame.shape[1] / frame.shape[0]) hpe_pipeline = AsyncPipeline(ie, model, plugin_config, device=args.device, max_num_requests=args.num_infer_requests) log.info('Starting inference...') hpe_pipeline.submit_data(frame, 0, { 'frame': frame, 'start_time': start_time }) next_frame_id = 1 next_frame_id_to_show = 0 output_transform = models.OutputTransform(frame.shape[:2], args.output_resolution) if args.output_resolution: output_resolution = output_transform.new_resolution else: output_resolution = (frame.shape[1], frame.shape[0]) presenter = monitors.Presenter( args.utilization_monitors, 55, (round(output_resolution[0] / 4), round(output_resolution[1] / 8))) video_writer = cv2.VideoWriter() if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), output_resolution): raise RuntimeError("Can't open video writer") print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) while True: if hpe_pipeline.callback_exceptions: raise hpe_pipeline.callback_exceptions[0] # Process all completed requests results = hpe_pipeline.get_result(next_frame_id_to_show) if results: (poses, scores), frame_meta = results frame = frame_meta['frame'] start_time = frame_meta['start_time'] if len(poses) and args.raw_output_message: print_raw_results(poses, scores) presenter.drawGraphs(frame) frame = draw_poses(frame, poses, args.prob_threshold, output_transform) metrics.update(start_time, frame) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(frame) next_frame_id_to_show += 1 if not args.no_show: cv2.imshow('Pose estimation results', frame) key = cv2.waitKey(1) ESC_KEY = 27 # Quit. if key in {ord('q'), ord('Q'), ESC_KEY}: break presenter.handleKey(key) continue if hpe_pipeline.is_ready(): # Get new image/frame start_time = perf_counter() frame = cap.read() if frame is None: break # Submit for inference hpe_pipeline.submit_data(frame, next_frame_id, { 'frame': frame, 'start_time': start_time }) next_frame_id += 1 else: # Wait for empty request hpe_pipeline.await_any() hpe_pipeline.await_all() # Process completed requests for next_frame_id_to_show in range(next_frame_id_to_show, next_frame_id): results = hpe_pipeline.get_result(next_frame_id_to_show) while results is None: results = hpe_pipeline.get_result(next_frame_id_to_show) (poses, scores), frame_meta = results frame = frame_meta['frame'] start_time = frame_meta['start_time'] if len(poses) and args.raw_output_message: print_raw_results(poses, scores) presenter.drawGraphs(frame) frame = draw_poses(frame, poses, args.prob_threshold, output_transform) metrics.update(start_time, frame) if video_writer.isOpened() and ( args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit - 1): video_writer.write(frame) if not args.no_show: cv2.imshow('Pose estimation results', frame) key = cv2.waitKey(1) ESC_KEY = 27 # Quit. if key in {ord('q'), ord('Q'), ESC_KEY}: break presenter.handleKey(key) metrics.print_total() print(presenter.reportMeans())