def run(args): statistics = None try: if args.number_streams is None: logger.warning(" -nstreams default value is determined automatically for a device. " "Although the automatic selection usually provides a reasonable performance, " "but it still may be non-optimal for some cases, for more information look at README. ") command_line_arguments = get_command_line_arguments(sys.argv) if args.report_type: statistics = StatisticsReport(StatisticsReport.Config(args.report_type, args.report_folder)) statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments) def is_flag_set_in_command_line(flag): return any(x.strip('-') == flag for x, y in command_line_arguments) device_name = args.target_device devices = parse_devices(device_name) device_number_streams = parse_nstreams_value_per_device(devices, args.number_streams) config = {} if args.load_config: load_config(args.load_config, config) is_network_compiled = False _, ext = os.path.splitext(args.path_to_model) if ext == BLOB_EXTENSION: is_network_compiled = True print("Model is compiled") # ------------------------------ 2. Loading OpenVINO --------------------------------------------------- next_step(step_id=2) benchmark = Benchmark(args.target_device, args.number_infer_requests, args.number_iterations, args.time, args.api_type, args.inference_only) ## CPU (MKLDNN) extensions if CPU_DEVICE_NAME in device_name and args.path_to_extension: benchmark.add_extension(path_to_extension=args.path_to_extension) ## GPU (clDNN) Extensions if GPU_DEVICE_NAME in device_name and args.path_to_cldnn_config: if GPU_DEVICE_NAME not in config.keys(): config[GPU_DEVICE_NAME] = {} config[GPU_DEVICE_NAME]['CONFIG_FILE'] = args.path_to_cldnn_config if GPU_DEVICE_NAME in config.keys() and 'CONFIG_FILE' in config[GPU_DEVICE_NAME].keys(): cldnn_config = config[GPU_DEVICE_NAME]['CONFIG_FILE'] benchmark.add_extension(path_to_cldnn_config=cldnn_config) if not args.perf_hint: for device in devices: supported_config_keys = benchmark.core.get_property(device, 'SUPPORTED_CONFIG_KEYS') if 'PERFORMANCE_HINT' in supported_config_keys: logger.warning(f"-hint default value is determined as 'THROUGHPUT' automatically for {device} device" + "For more detailed information look at README.") args.perf_hint = "throughput" version = benchmark.get_version_info() logger.info(version) # --------------------- 3. Setting device configuration -------------------------------------------------------- next_step() def get_device_type_from_name(name) : new_name = str(name) new_name = new_name.split(".", 1)[0] new_name = new_name.split("(", 1)[0] return new_name ## Set default values from dumped config default_devices = set() for device in devices: device_type = get_device_type_from_name(device) if device_type in config and device not in config: config[device] = config[device_type].copy() default_devices.add(device_type) for def_device in default_devices: config.pop(def_device) perf_counts = False for device in devices: if device not in config.keys(): config[device] = {} ## Set performance counter if is_flag_set_in_command_line('pc'): ## set to user defined value config[device]['PERF_COUNT'] = 'YES' if args.perf_counts else 'NO' elif 'PERF_COUNT' in config[device].keys() and config[device]['PERF_COUNT'] == 'YES': logger.warning(f"Performance counters for {device} device is turned on. " + "To print results use -pc option.") elif args.report_type in [ averageCntReport, detailedCntReport ]: logger.warning(f"Turn on performance counters for {device} device " + f"since report type is {args.report_type}.") config[device]['PERF_COUNT'] = 'YES' elif args.exec_graph_path is not None: logger.warning(f"Turn on performance counters for {device} device " + "due to execution graph dumping.") config[device]['PERF_COUNT'] = 'YES' else: ## set to default value config[device]['PERF_COUNT'] = 'YES' if args.perf_counts else 'NO' perf_counts = True if config[device]['PERF_COUNT'] == 'YES' else perf_counts ## high-level performance hints if is_flag_set_in_command_line('hint') or args.perf_hint: config[device]['PERFORMANCE_HINT'] = args.perf_hint.upper() if is_flag_set_in_command_line('nireq'): config[device]['PERFORMANCE_HINT_NUM_REQUESTS'] = str(args.number_infer_requests) ## the rest are individual per-device settings (overriding the values the device will deduce from perf hint) def set_throughput_streams(): key = get_device_type_from_name(device) + "_THROUGHPUT_STREAMS" if device in device_number_streams.keys(): ## set to user defined value supported_config_keys = benchmark.core.get_property(device, 'SUPPORTED_CONFIG_KEYS') if key not in supported_config_keys: raise Exception(f"Device {device} doesn't support config key '{key}'! " + "Please specify -nstreams for correct devices in format <dev1>:<nstreams1>,<dev2>:<nstreams2>") config[device][key] = device_number_streams[device] elif key not in config[device].keys() and args.api_type == "async" and not is_flag_set_in_command_line('hint'): ## set the _AUTO value for the #streams logger.warning(f"-nstreams default value is determined automatically for {device} device. " + "Although the automatic selection usually provides a reasonable performance, " "but it still may be non-optimal for some cases, for more information look at README.") if device != MYRIAD_DEVICE_NAME: ## MYRIAD sets the default number of streams implicitly config[device][key] = get_device_type_from_name(device) + "_THROUGHPUT_AUTO" if key in config[device].keys(): device_number_streams[device] = config[device][key] if CPU_DEVICE_NAME in device: # CPU supports few special performance-oriented keys # limit threading for CPU portion of inference if args.number_threads and is_flag_set_in_command_line("nthreads"): config[device]['CPU_THREADS_NUM'] = str(args.number_threads) if is_flag_set_in_command_line("enforcebf16") or is_flag_set_in_command_line("enforce_bfloat16"): config[device]['ENFORCE_BF16'] = 'YES' if args.enforce_bfloat16 else 'NO' if is_flag_set_in_command_line('pin'): ## set to user defined value config[device]['CPU_BIND_THREAD'] = args.infer_threads_pinning elif 'CPU_BIND_THREAD' not in config[device].keys(): if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name: logger.warning(f"Turn off threads pinning for {device} " + "device since multi-scenario with GPU device is used.") config[device]['CPU_BIND_THREAD'] = 'NO' ## for CPU execution, more throughput-oriented execution via streams set_throughput_streams() elif GPU_DEVICE_NAME in device: ## for GPU execution, more throughput-oriented execution via streams set_throughput_streams() if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name: logger.warning("Turn on GPU throttling. Multi-device execution with the CPU + GPU performs best with GPU throttling hint, " + "which releases another CPU thread (that is otherwise used by the GPU driver for active polling)") config[device]['GPU_PLUGIN_THROTTLE'] = '1' elif MYRIAD_DEVICE_NAME in device: set_throughput_streams() config[device]['LOG_LEVEL'] = 'LOG_INFO' elif GNA_DEVICE_NAME in device: if is_flag_set_in_command_line('qb'): if args.qb == 8: config[device]['GNA_PRECISION'] = 'I8' else: config[device]['GNA_PRECISION'] = 'I16' else: supported_config_keys = benchmark.core.get_property(device, 'SUPPORTED_CONFIG_KEYS') if 'CPU_THREADS_NUM' in supported_config_keys and args.number_threads and is_flag_set_in_command_line("nthreads"): config[device]['CPU_THREADS_NUM'] = str(args.number_threads) if 'CPU_THROUGHPUT_STREAMS' in supported_config_keys and args.number_streams and is_flag_set_in_command_line("streams"): config[device]['CPU_THROUGHPUT_STREAMS'] = args.number_streams if 'CPU_BIND_THREAD' in supported_config_keys and args.infer_threads_pinning and is_flag_set_in_command_line("pin"): config[device]['CPU_BIND_THREAD'] = args.infer_threads_pinning perf_counts = perf_counts benchmark.set_config(config) if args.cache_dir: benchmark.set_cache_dir(args.cache_dir) topology_name = "" load_from_file_enabled = is_flag_set_in_command_line('load_from_file') or is_flag_set_in_command_line('lfile') if load_from_file_enabled and not is_network_compiled: next_step() print("Skipping the step for loading model from file") next_step() print("Skipping the step for loading model from file") next_step() print("Skipping the step for loading model from file") # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() compiled_model = benchmark.core.compile_model(args.path_to_model, benchmark.device) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Compile model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('load network time (ms)', duration_ms) ]) app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, compiled_model.inputs) batch_size = get_network_batch_size(app_inputs_info) elif not is_network_compiled: # --------------------- 4. Read the Intermediate Representation of the network ----------------------------- next_step() start_time = datetime.utcnow() model = benchmark.read_model(args.path_to_model) topology_name = model.get_name() duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Read model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('read network time (ms)', duration_ms) ]) # --------------------- 5. Resizing network to match image sizes and given batch --------------------------- next_step() app_inputs_info, reshape = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, model.inputs) if reshape: start_time = datetime.utcnow() shapes = { info.name : info.partial_shape for info in app_inputs_info } logger.info( 'Reshaping model: {}'.format(', '.join("'{}': {}".format(k, str(v)) for k, v in shapes.items()))) model.reshape(shapes) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Reshape model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('reshape network time (ms)', duration_ms) ]) # use batch size according to provided layout and shapes batch_size = get_network_batch_size(app_inputs_info) logger.info(f'Network batch size: {batch_size}') # --------------------- 6. Configuring inputs and outputs of the model -------------------------------------------------- next_step() pre_post_processing(model, app_inputs_info, args.input_precision, args.output_precision, args.input_output_precision) print_inputs_and_outputs_info(model) # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() compiled_model = benchmark.core.compile_model(model, benchmark.device) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Compile model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('load network time (ms)', duration_ms) ]) else: next_step() print("Skipping the step for compiled network") next_step() print("Skipping the step for compiled network") next_step() print("Skipping the step for compiled network") # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() compiled_model = benchmark.core.import_model(args.path_to_model) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Import model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('import network time (ms)', duration_ms) ]) app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, compiled_model.inputs) batch_size = get_network_batch_size(app_inputs_info) # --------------------- 8. Querying optimal runtime parameters -------------------------------------------------- next_step() ## actual device-deduced settings for device in devices: keys = benchmark.core.get_property(device, 'SUPPORTED_CONFIG_KEYS') logger.info(f'DEVICE: {device}') for k in keys: try: logger.info(f' {k} , {benchmark.core.get_property(device, k)}') except: pass # Update number of streams for device in device_number_streams.keys(): key = get_device_type_from_name(device) + '_THROUGHPUT_STREAMS' device_number_streams[device] = benchmark.core.get_property(device, key) # ------------------------------------ 9. Creating infer requests and preparing input data ---------------------- next_step() # Create infer requests start_time = datetime.utcnow() requests = benchmark.create_infer_requests(compiled_model) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Create {benchmark.nireq} infer requests took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('create infer requests time (ms)', duration_ms) ]) # Prepare input data paths_to_input = list() if args.paths_to_input: for path in args.paths_to_input: if ":" in next(iter(path), ""): paths_to_input.extend(path) else: paths_to_input.append(os.path.abspath(*path)) data_queue = get_input_data(paths_to_input, app_inputs_info) static_mode = check_for_static(app_inputs_info) allow_inference_only_or_sync = can_measure_as_static(app_inputs_info) if not allow_inference_only_or_sync and benchmark.api_type == 'sync': raise Exception("Benchmarking of the model with dynamic shapes is available for async API only." "Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior.") if benchmark.inference_only == None: if static_mode: benchmark.inference_only = True else: benchmark.inference_only = False elif benchmark.inference_only and not allow_inference_only_or_sync: raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!") if benchmark.inference_only: logger.info("Benchmarking in inference only mode (inputs filling are not included in measurement loop).") else: logger.info("Benchmarking in full mode (inputs filling are included in measurement loop).") # update batch size in case dynamic network with one data_shape if benchmark.inference_only and batch_size.is_dynamic: batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id]) benchmark.latency_groups = get_latency_groups(app_inputs_info) if len(benchmark.latency_groups) > 1: logger.info(f"Defined {len(benchmark.latency_groups)} tensor groups:") for group in benchmark.latency_groups: print(f"\t{str(group)}") # Iteration limit benchmark.niter = get_number_iterations(benchmark.niter, benchmark.nireq, max(len(info.shapes) for info in app_inputs_info), benchmark.api_type) # Set input tensors before first inference for request in requests: data_tensors = data_queue.get_next_input() for port, data_tensor in data_tensors.items(): input_tensor = request.get_input_tensor(port) if not static_mode: input_tensor.shape = data_tensor.shape input_tensor.data[:] = data_tensor.data if statistics: statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, [ ('topology', topology_name), ('target device', device_name), ('API', args.api_type), ('inference_only', benchmark.inference_only), ('precision', "UNSPECIFIED"), ('batch size', str(batch_size)), ('number of iterations', str(benchmark.niter)), ('number of parallel infer requests', str(benchmark.nireq)), ('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))), ]) for nstreams in device_number_streams.items(): statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, [ (f"number of {nstreams[0]} streams", str(nstreams[1])), ]) # ------------------------------------ 10. Measuring performance ----------------------------------------------- output_string = process_help_inference_string(benchmark, device_number_streams) next_step(additional_info=output_string) progress_bar_total_count = 10000 if benchmark.niter and not benchmark.duration_seconds: progress_bar_total_count = benchmark.niter progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) if args.progress else None duration_ms = f"{benchmark.first_infer(requests):.2f}" logger.info(f"First inference took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('first inference time (ms)', duration_ms) ]) pcseq = args.pcseq if static_mode or len(benchmark.latency_groups) == 1: pcseq = False fps, median_latency_ms, avg_latency_ms, min_latency_ms, max_latency_ms, total_duration_sec, iteration = benchmark.main_loop(requests, data_queue, batch_size, args.latency_percentile, progress_bar, pcseq) # ------------------------------------ 11. Dumping statistics report ------------------------------------------- next_step() if args.dump_config: dump_config(args.dump_config, config) logger.info(f"OpenVINO configuration settings were dumped to {args.dump_config}") if args.exec_graph_path: dump_exec_graph(compiled_model, args.exec_graph_path) if perf_counts: perfs_count_list = [] for request in requests: perfs_count_list.append(request.profiling_info) if args.perf_counts: print_perf_counters(perfs_count_list) if statistics: statistics.dump_performance_counters(perfs_count_list) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('total execution time (ms)', f'{get_duration_in_milliseconds(total_duration_sec):.2f}'), ('total number of iterations', str(iteration)), ]) if MULTI_DEVICE_NAME not in device_name: latency_prefix = None if args.latency_percentile == 50 and static_mode: #latency_prefix = 'median latency (ms)' latency_prefix = 'latency (ms)' elif args.latency_percentile != 50: latency_prefix = 'latency (' + str(args.latency_percentile) + ' percentile) (ms)' if latency_prefix: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ (latency_prefix, f'{median_latency_ms:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("avg latency", f'{avg_latency_ms:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("min latency", f'{min_latency_ms:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("max latency", f'{max_latency_ms:.2f}'), ]) if pcseq: for group in benchmark.latency_groups: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("group", str(group)), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("avg latency", f'{group.avg:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("min latency", f'{group.min:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ("max latency", f'{group.max:.2f}'), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('throughput', f'{fps:.2f}'), ]) statistics.dump() print(f'Count: {iteration} iterations') print(f'Duration: {get_duration_in_milliseconds(total_duration_sec):.2f} ms') if MULTI_DEVICE_NAME not in device_name: print('Latency:') if args.latency_percentile == 50 and static_mode: print(f' Median: {median_latency_ms:.2f} ms') elif args.latency_percentile != 50: print(f'({args.latency_percentile} percentile): {median_latency_ms:.2f} ms') print(f' AVG: {avg_latency_ms:.2f} ms') print(f' MIN: {min_latency_ms:.2f} ms') print(f' MAX: {max_latency_ms:.2f} ms') if pcseq: print("Latency for each data shape group: ") for group in benchmark.latency_groups: print(f" {str(group)}") print(f' AVG: {group.avg:.2f} ms') print(f' MIN: {group.min:.2f} ms') print(f' MAX: {group.max:.2f} ms') print(f'Throughput: {fps:.2f} FPS') del compiled_model next_step.step_id = 0 except Exception as e: logger.exception(e) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('error', str(e)), ]) statistics.dump() sys.exit(1)
def run(args): statistics = None try: if args.number_streams is None: logger.warn(" -nstreams default value is determined automatically for a device. " "Although the automatic selection usually provides a reasonable performance, " "but it still may be non-optimal for some cases, for more information look at README. ") command_line_arguments = get_command_line_arguments(sys.argv) if args.report_type: statistics = StatisticsReport(StatisticsReport.Config(args.report_type, args.report_folder)) statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments) def is_flag_set_in_command_line(flag): return any(x.strip('-') == flag for x, y in command_line_arguments) device_name = args.target_device devices = parse_devices(device_name) device_number_streams = parse_nstreams_value_per_device(devices, args.number_streams) config = {} if args.load_config: load_config(args.load_config, config) is_network_compiled = False _, ext = os.path.splitext(args.path_to_model) if ext == BLOB_EXTENSION: is_network_compiled = True print("Network is compiled") # ------------------------------ 2. Loading Inference Engine --------------------------------------------------- next_step(step_id=2) benchmark = Benchmark(args.target_device, args.number_infer_requests, args.number_iterations, args.time, args.api_type) ## CPU (MKLDNN) extensions if CPU_DEVICE_NAME in device_name and args.path_to_extension: benchmark.add_extension(path_to_extension=args.path_to_extension) ## GPU (clDNN) Extensions if GPU_DEVICE_NAME in device_name and args.path_to_cldnn_config: if GPU_DEVICE_NAME not in config.keys(): config[GPU_DEVICE_NAME] = {} config[GPU_DEVICE_NAME]['CONFIG_FILE'] = args.path_to_cldnn_config if GPU_DEVICE_NAME in config.keys() and 'CONFIG_FILE' in config[GPU_DEVICE_NAME].keys(): cldnn_config = config[GPU_DEVICE_NAME]['CONFIG_FILE'] benchmark.add_extension(path_to_cldnn_config=cldnn_config) version = benchmark.get_version_info() logger.info(version) # --------------------- 3. Setting device configuration -------------------------------------------------------- next_step() perf_counts = False for device in devices: if device not in config.keys(): config[device] = {} ## Set performance counter if is_flag_set_in_command_line('pc'): ## set to user defined value config[device]['PERF_COUNT'] = 'YES' if args.perf_counts else 'NO' elif 'PERF_COUNT' in config[device].keys() and config[device]['PERF_COUNT'] == 'YES': logger.warn("Performance counters for {} device is turned on. ".format(device) + "To print results use -pc option.") elif args.report_type in [ averageCntReport, detailedCntReport ]: logger.warn("Turn on performance counters for {} device ".format(device) + "since report type is {}.".format(args.report_type)) config[device]['PERF_COUNT'] = 'YES' elif args.exec_graph_path is not None: logger.warn("Turn on performance counters for {} device ".format(device) + "due to execution graph dumping.") config[device]['PERF_COUNT'] = 'YES' else: ## set to default value config[device]['PERF_COUNT'] = 'YES' if args.perf_counts else 'NO' perf_counts = True if config[device]['PERF_COUNT'] == 'YES' else perf_counts def set_throughput_streams(): key = device + "_THROUGHPUT_STREAMS" if device in device_number_streams.keys(): ## set to user defined value supported_config_keys = benchmark.ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS') if key not in supported_config_keys: raise Exception("Device {} doesn't support config key '{}'! ".format(device, key) + "Please specify -nstreams for correct devices in format <dev1>:<nstreams1>,<dev2>:<nstreams2>") config[device][key] = device_number_streams[device] elif key not in config[device].keys() and args.api_type == "async": logger.warn("-nstreams default value is determined automatically for {} device. ".format(device) + "Although the automatic selection usually provides a reasonable performance," "but it still may be non-optimal for some cases, for more information look at README.") config[device][key] = device + "_THROUGHPUT_AUTO" if key in config[device].keys(): device_number_streams[device] = config[device][key] if device == CPU_DEVICE_NAME: # CPU supports few special performance-oriented keys # limit threading for CPU portion of inference if args.number_threads and is_flag_set_in_command_line("nthreads"): config[device]['CPU_THREADS_NUM'] = str(args.number_threads) if is_flag_set_in_command_line("enforcebf16") or is_flag_set_in_command_line("enforce_bfloat16"): config[device]['ENFORCE_BF16'] = 'YES' if args.enforce_bfloat16 else 'NO' if is_flag_set_in_command_line('pin'): ## set to user defined value config[device]['CPU_BIND_THREAD'] = args.infer_threads_pinning elif 'CPU_BIND_THREAD' not in config[device].keys(): if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name: logger.warn("Turn off threads pinning for {}".format(device) + "device since multi-scenario with GPU device is used.") config[device]['CPU_BIND_THREAD'] = 'NO' else: ## set to default value config[device]['CPU_BIND_THREAD'] = args.infer_threads_pinning ## for CPU execution, more throughput-oriented execution via streams set_throughput_streams() elif device == GPU_DEVICE_NAME: ## for GPU execution, more throughput-oriented execution via streams set_throughput_streams() if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name: logger.warn("Turn on GPU trottling. Multi-device execution with the CPU + GPU performs best with GPU trottling hint, " + "which releases another CPU thread (that is otherwise used by the GPU driver for active polling)") config[device]['CLDNN_PLUGIN_THROTTLE'] = '1' elif device == MYRIAD_DEVICE_NAME: config[device]['LOG_LEVEL'] = 'LOG_INFO' elif device == GNA_DEVICE_NAME: if is_flag_set_in_command_line('qb'): if args.qb == 8: config[device]['GNA_PRECISION'] = 'I8' else: config[device]['GNA_PRECISION'] = 'I16' if args.number_threads and is_flag_set_in_command_line("nthreads"): config[device]['GNA_LIB_N_THREADS'] = str(args.number_threads) perf_counts = perf_counts benchmark.set_config(config) batch_size = args.batch_size if not is_network_compiled: # --------------------- 4. Read the Intermediate Representation of the network ----------------------------- next_step() start_time = datetime.utcnow() ie_network = benchmark.read_network(args.path_to_model) duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000) logger.info("Read network took {} ms".format(duration_ms)) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('read network time (ms)', duration_ms) ]) # --------------------- 5. Resizing network to match image sizes and given batch --------------------------- next_step() shapes = {k: v.input_data.shape.copy() for k, v in ie_network.input_info.items()} reshape = False if args.shape: reshape |= update_shapes(shapes, args.shape, ie_network.input_info) if args.batch_size and args.batch_size != ie_network.batch_size: reshape |= adjust_shapes_batch(shapes, args.batch_size, ie_network.input_info) if reshape: start_time = datetime.utcnow() logger.info( 'Reshaping network: {}'.format(', '.join("'{}': {}".format(k, v) for k, v in shapes.items()))) ie_network.reshape(shapes) duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000) logger.info("Reshape network took {} ms".format(duration_ms)) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('reshape network time (ms)', duration_ms) ]) batch_size = ie_network.batch_size logger.info('Network batch size: {}'.format(ie_network.batch_size)) # --------------------- 6. Configuring input of the model -------------------------------------------------- next_step() config_network_inputs(ie_network) # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() exe_network = benchmark.load_network(ie_network) duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000) logger.info("Load network took {} ms".format(duration_ms)) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('load network time (ms)', duration_ms) ]) else: next_step() print("Skipping the step for compiled network") next_step() print("Skipping the step for compiled network") next_step() print("Skipping the step for compiled network") # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() exe_network = benchmark.import_network(args.path_to_model) duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000) logger.info("Import network took {} ms".format(duration_ms)) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('import network time (ms)', duration_ms) ]) if batch_size == 0: batch_size = 1 # --------------------- 8. Setting optimal runtime parameters -------------------------------------------------- next_step() # Update number of streams for device in device_number_streams.keys(): key = device + '_THROUGHPUT_STREAMS' device_number_streams[device] = benchmark.ie.get_config(device, key) # Number of requests infer_requests = exe_network.requests # Iteration limit benchmark.niter = get_number_iterations(benchmark.niter, benchmark.nireq, args.api_type) # ------------------------------------ 9. Creating infer requests and filling input blobs ---------------------- next_step() paths_to_input = list() if args.paths_to_input: for path in args.paths_to_input: paths_to_input.append(os.path.abspath(*path) if args.paths_to_input else None) set_inputs(paths_to_input, batch_size, exe_network.input_info, infer_requests) if statistics: statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, [ ('topology', ie_network.name), ('target device', device_name), ('API', args.api_type), ('precision', "UNSPECIFIED"), ('batch size', str(batch_size)), ('number of iterations', str(benchmark.niter) if benchmark.niter else "0"), ('number of parallel infer requests', str(benchmark.nireq)), ('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))), ]) for nstreams in device_number_streams.items(): statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, [ ("number of {} streams".format(nstreams[0]), str(nstreams[1])), ]) # ------------------------------------ 10. Measuring performance ----------------------------------------------- output_string = process_help_inference_string(benchmark) next_step(additional_info=output_string) progress_bar_total_count = 10000 if benchmark.niter and not benchmark.duration_seconds: progress_bar_total_count = benchmark.niter progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) if args.progress else None fps, latency_ms, total_duration_sec, iteration = benchmark.infer(exe_network, batch_size, progress_bar) # ------------------------------------ 11. Dumping statistics report ------------------------------------------- next_step() if args.dump_config: dump_config(args.dump_config, config) logger.info("Inference Engine configuration settings were dumped to {}".format(args.dump_config)) if args.exec_graph_path: dump_exec_graph(exe_network, args.exec_graph_path) if perf_counts: perfs_count_list = [] for ni in range(int(benchmark.nireq)): perfs_count_list.append(exe_network.requests[ni].get_perf_counts()) if args.perf_counts: print_perf_counters(perfs_count_list) if statistics: statistics.dump_performance_counters(perfs_count_list) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('total execution time (ms)', '{:.2f}'.format(get_duration_in_milliseconds(total_duration_sec))), ('total number of iterations', str(iteration)), ]) if MULTI_DEVICE_NAME not in device_name: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('latency (ms)', '{:.2f}'.format(latency_ms)), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('throughput', '{:.2f}'.format(fps)), ]) if statistics: statistics.dump() print('Count: {} iterations'.format(iteration)) print('Duration: {:.2f} ms'.format(get_duration_in_milliseconds(total_duration_sec))) if MULTI_DEVICE_NAME not in device_name: print('Latency: {:.2f} ms'.format(latency_ms)) print('Throughput: {:.2f} FPS'.format(fps)) del exe_network next_step.step_id = 0 except Exception as e: logger.exception(e) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('error', str(e)), ]) statistics.dump() sys.exit(1)
def main(args): statistics = None try: if args.number_streams is None: logger.warn(" -nstreams default value is determined automatically for a device. " "Although the automatic selection usually provides a reasonable performance, " "but it still may be non-optimal for some cases, for more information look at README. ") if args.report_type: statistics = StatisticsReport(StatisticsReport.Config(args.report_type, args.report_folder)) statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, get_command_line_arguments(sys.argv)) # ------------------------------ 2. Loading Inference Engine --------------------------------------------------- next_step() device_name = args.target_device.upper() benchmark = Benchmark(args.target_device, args.number_infer_requests, args.number_iterations, args.time, args.api_type) benchmark.add_extension(args.path_to_extension, args.path_to_cldnn_config) version = benchmark.get_version_info() logger.info(version) # --------------------- 3. Read the Intermediate Representation of the network --------------------------------- next_step() start_time = datetime.now() ie_network = read_network(args.path_to_model) duration_ms = "{:.2f}".format((datetime.now() - start_time).total_seconds() * 1000) if statistics: logger.info("Read network took {} ms".format(duration_ms)) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('read network time (ms)', duration_ms) ]) # --------------------- 4. Resizing network to match image sizes and given batch ------------------------------- next_step() if args.batch_size and args.batch_size != ie_network.batch_size: benchmark.reshape(ie_network, args.batch_size) batch_size = ie_network.batch_size logger.info('Network batch size: {}, precision: {}'.format(ie_network.batch_size, ie_network.precision)) # --------------------- 5. Configuring input of the model ------------------------------------------------------ next_step() config_network_inputs(ie_network) # --------------------- 6. Setting device configuration -------------------------------------------------------- next_step() benchmark.set_config(args.number_streams, args.api_type, args.number_threads, args.infer_threads_pinning) # --------------------- 7. Loading the model to the device ----------------------------------------------------- next_step() start_time = datetime.now() perf_counts = True if args.perf_counts or \ args.report_type in [ averageCntReport, detailedCntReport ] or \ args.exec_graph_path else False exe_network = benchmark.load_network(ie_network, perf_counts, args.number_infer_requests) duration_ms = "{:.2f}".format((datetime.now() - start_time).total_seconds() * 1000) if statistics: logger.info("Load network took {} ms".format(duration_ms)) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('load network time (ms)', duration_ms) ]) # --------------------- 8. Setting optimal runtime parameters -------------------------------------------------- next_step() # Number of requests infer_requests = exe_network.requests benchmark.nireq = len(infer_requests) # Iteration limit benchmark.niter = get_number_iterations(benchmark.niter, len(exe_network.requests), args.api_type) # ------------------------------------ 9. Creating infer requests and filling input blobs ---------------------- next_step() request_queue = InferRequestsQueue(infer_requests) path_to_input = os.path.abspath(args.path_to_input) if args.path_to_input else None requests_input_data = get_inputs(path_to_input, batch_size, ie_network.inputs, infer_requests) if statistics: statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, [ ('topology', ie_network.name), ('target device', device_name), ('API', args.api_type), ('precision', str(ie_network.precision)), ('batch size', str(ie_network.batch_size)), ('number of iterations', str(benchmark.niter) if benchmark.niter else "0"), ('number of parallel infer requests', str(benchmark.nireq)), ('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))), ]) for nstreams in benchmark.device_number_streams.items(): statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, [ ("number of {} streams".format(nstreams[0]), str(nstreams[1])), ]) # ------------------------------------ 10. Measuring performance ----------------------------------------------- output_string = process_help_inference_string(benchmark) next_step(output_string) progress_bar_total_count = 10000 if benchmark.niter and not benchmark.duration_seconds: progress_bar_total_count = benchmark.niter progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) fps, latency_ms, total_duration_sec, iteration = benchmark.infer(request_queue, requests_input_data, batch_size, progress_bar) # ------------------------------------ 11. Dumping statistics report ------------------------------------------- next_step() if args.exec_graph_path: dump_exec_graph(exe_network, args.exec_graph_path) if perf_counts: perfs_count_list = [] for ni in range(int(benchmark.nireq)): perfs_count_list.append(exe_network.requests[ni].get_perf_counts()) if args.perf_counts: print_perf_counters(perfs_count_list) if statistics: statistics.dump_performance_counters(perfs_count_list) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('total execution time (ms)', '{:.2f}'.format(get_duration_in_milliseconds(total_duration_sec))), ('total number of iterations', str(iteration)), ]) if MULTI_DEVICE_NAME not in device_name: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('latency (ms)', '{:.2f}'.format(latency_ms)), ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('throughput', '{:.2f}'.format(fps)), ]) if statistics: statistics.dump() print('Count: {} iterations'.format(iteration)) print('Duration: {:.2f} ms'.format(get_duration_in_milliseconds(total_duration_sec))) if MULTI_DEVICE_NAME not in device_name: print('Latency: {:.2f} ms'.format(latency_ms)) print('Throughput: {:.2f} FPS'.format(fps)) del exe_network next_step.step_id = 0 except Exception as e: logger.exception(e) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('error', str(e)), ]) statistics.dump()
def run(args): statistics = None try: if args.number_streams is None: logger.warning( " -nstreams default value is determined automatically for a device. " "Although the automatic selection usually provides a reasonable performance, " "but it still may be non-optimal for some cases, for more information look at README. " ) command_line_arguments = get_command_line_arguments(sys.argv) if args.report_type: statistics = StatisticsReport( StatisticsReport.Config(args.report_type, args.report_folder)) statistics.add_parameters( StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments) def is_flag_set_in_command_line(flag): return any(x.strip('-') == flag for x, y in command_line_arguments) device_name = args.target_device devices = parse_devices(device_name) device_number_streams = parse_nstreams_value_per_device( devices, args.number_streams) config = {} if args.load_config: load_config(args.load_config, config) is_network_compiled = False _, ext = os.path.splitext(args.path_to_model) if ext == BLOB_EXTENSION: is_network_compiled = True print("Network is compiled") # ------------------------------ 2. Loading Inference Engine --------------------------------------------------- next_step(step_id=2) benchmark = Benchmark(args.target_device, args.number_infer_requests, args.number_iterations, args.time, args.api_type) ## CPU (MKLDNN) extensions if CPU_DEVICE_NAME in device_name and args.path_to_extension: benchmark.add_extension(path_to_extension=args.path_to_extension) ## GPU (clDNN) Extensions if GPU_DEVICE_NAME in device_name and args.path_to_cldnn_config: if GPU_DEVICE_NAME not in config.keys(): config[GPU_DEVICE_NAME] = {} config[GPU_DEVICE_NAME]['CONFIG_FILE'] = args.path_to_cldnn_config if GPU_DEVICE_NAME in config.keys( ) and 'CONFIG_FILE' in config[GPU_DEVICE_NAME].keys(): cldnn_config = config[GPU_DEVICE_NAME]['CONFIG_FILE'] benchmark.add_extension(path_to_cldnn_config=cldnn_config) version = benchmark.get_version_info() logger.info(version) # --------------------- 3. Setting device configuration -------------------------------------------------------- next_step() def get_device_type_from_name(name): new_name = str(name) new_name = new_name.split(".", 1)[0] new_name = new_name.split("(", 1)[0] return new_name ## Set default values from dumped config default_devices = set() for device in devices: device_type = get_device_type_from_name(device) if device_type in config and device not in config: config[device] = config[device_type].copy() default_devices.add(device_type) for def_device in default_devices: config.pop(def_device) perf_counts = False for device in devices: if device not in config.keys(): config[device] = {} ## Set performance counter if is_flag_set_in_command_line('pc'): ## set to user defined value config[device][ 'PERF_COUNT'] = 'YES' if args.perf_counts else 'NO' elif 'PERF_COUNT' in config[device].keys( ) and config[device]['PERF_COUNT'] == 'YES': logger.warning( f"Performance counters for {device} device is turned on. " + "To print results use -pc option.") elif args.report_type in [averageCntReport, detailedCntReport]: logger.warning( f"Turn on performance counters for {device} device " + f"since report type is {args.report_type}.") config[device]['PERF_COUNT'] = 'YES' elif args.exec_graph_path is not None: logger.warning( f"Turn on performance counters for {device} device " + "due to execution graph dumping.") config[device]['PERF_COUNT'] = 'YES' else: ## set to default value config[device][ 'PERF_COUNT'] = 'YES' if args.perf_counts else 'NO' perf_counts = True if config[device][ 'PERF_COUNT'] == 'YES' else perf_counts ## high-level performance hints if is_flag_set_in_command_line('hint'): config[device]['PERFORMANCE_HINT'] = args.perf_hint.upper() if is_flag_set_in_command_line('nireq'): config[device]['PERFORMANCE_HINT_NUM_REQUESTS'] = str( args.number_infer_requests) ## the rest are individual per-device settings (overriding the values the device will deduce from perf hint) def set_throughput_streams(): key = get_device_type_from_name(device) + "_THROUGHPUT_STREAMS" if device in device_number_streams.keys(): ## set to user defined value supported_config_keys = benchmark.ie.get_metric( device, 'SUPPORTED_CONFIG_KEYS') if key not in supported_config_keys: raise Exception( f"Device {device} doesn't support config key '{key}'! " + "Please specify -nstreams for correct devices in format <dev1>:<nstreams1>,<dev2>:<nstreams2>" ) config[device][key] = device_number_streams[device] elif key not in config[device].keys( ) and args.api_type == "async" and not is_flag_set_in_command_line( 'hint'): ## set the _AUTO value for the #streams logger.warning( f"-nstreams default value is determined automatically for {device} device. " + "Although the automatic selection usually provides a reasonable performance, " "but it still may be non-optimal for some cases, for more information look at README." ) if device != MYRIAD_DEVICE_NAME: ## MYRIAD sets the default number of streams implicitly config[device][key] = get_device_type_from_name( device) + "_THROUGHPUT_AUTO" if key in config[device].keys(): device_number_streams[device] = config[device][key] if CPU_DEVICE_NAME in device: # CPU supports few special performance-oriented keys # limit threading for CPU portion of inference if args.number_threads and is_flag_set_in_command_line( "nthreads"): config[device]['CPU_THREADS_NUM'] = str( args.number_threads) if is_flag_set_in_command_line( "enforcebf16") or is_flag_set_in_command_line( "enforce_bfloat16"): config[device][ 'ENFORCE_BF16'] = 'YES' if args.enforce_bfloat16 else 'NO' if is_flag_set_in_command_line('pin'): ## set to user defined value config[device][ 'CPU_BIND_THREAD'] = args.infer_threads_pinning elif 'CPU_BIND_THREAD' not in config[device].keys(): if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name: logger.warning( f"Turn off threads pinning for {device} " + "device since multi-scenario with GPU device is used." ) config[device]['CPU_BIND_THREAD'] = 'NO' ## for CPU execution, more throughput-oriented execution via streams set_throughput_streams() elif GPU_DEVICE_NAME in device: ## for GPU execution, more throughput-oriented execution via streams set_throughput_streams() if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name: logger.warning( "Turn on GPU throttling. Multi-device execution with the CPU + GPU performs best with GPU throttling hint, " + "which releases another CPU thread (that is otherwise used by the GPU driver for active polling)" ) config[device]['GPU_PLUGIN_THROTTLE'] = '1' elif MYRIAD_DEVICE_NAME in device: set_throughput_streams() config[device]['LOG_LEVEL'] = 'LOG_INFO' elif GNA_DEVICE_NAME in device: if is_flag_set_in_command_line('qb'): if args.qb == 8: config[device]['GNA_PRECISION'] = 'I8' else: config[device]['GNA_PRECISION'] = 'I16' if args.number_threads and is_flag_set_in_command_line( "nthreads"): config[device]['GNA_LIB_N_THREADS'] = str( args.number_threads) else: supported_config_keys = benchmark.ie.get_metric( device, 'SUPPORTED_CONFIG_KEYS') if 'CPU_THREADS_NUM' in supported_config_keys and args.number_threads and is_flag_set_in_command_line( "nthreads"): config[device]['CPU_THREADS_NUM'] = str( args.number_threads) if 'CPU_THROUGHPUT_STREAMS' in supported_config_keys and args.number_streams and is_flag_set_in_command_line( "streams"): config[device][ 'CPU_THROUGHPUT_STREAMS'] = args.number_streams if 'CPU_BIND_THREAD' in supported_config_keys and args.infer_threads_pinning and is_flag_set_in_command_line( "pin"): config[device][ 'CPU_BIND_THREAD'] = args.infer_threads_pinning perf_counts = perf_counts benchmark.set_config(config) batch_size = args.batch_size if args.cache_dir: benchmark.set_cache_dir(args.cache_dir) topology_name = "" load_from_file_enabled = is_flag_set_in_command_line( 'load_from_file') or is_flag_set_in_command_line('lfile') if load_from_file_enabled and not is_network_compiled: next_step() print("Skipping the step for loading network from file") next_step() print("Skipping the step for loading network from file") next_step() print("Skipping the step for loading network from file") # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() exe_network = benchmark.load_network(args.path_to_model) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Load network took {duration_ms} ms") if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [('load network time (ms)', duration_ms)]) app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.input_info) if batch_size == 0: batch_size = 1 elif not is_network_compiled: # --------------------- 4. Read the Intermediate Representation of the network ----------------------------- next_step() start_time = datetime.utcnow() ie_network = benchmark.read_network(args.path_to_model) topology_name = ie_network.name duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Read network took {duration_ms} ms") if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [('read network time (ms)', duration_ms)]) # --------------------- 5. Resizing network to match image sizes and given batch --------------------------- next_step() app_inputs_info, reshape = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, ie_network.input_info) if reshape: start_time = datetime.utcnow() shapes = {k: v.shape for k, v in app_inputs_info.items()} logger.info('Reshaping network: {}'.format(', '.join( "'{}': {}".format(k, v) for k, v in shapes.items()))) ie_network.reshape(shapes) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Reshape network took {duration_ms} ms") if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [('reshape network time (ms)', duration_ms)]) # use batch size according to provided layout and shapes batch_size = get_batch_size( app_inputs_info) if args.layout else ie_network.batch_size logger.info(f'Network batch size: {batch_size}') # --------------------- 6. Configuring inputs and outputs of the model -------------------------------------------------- next_step() process_precision(ie_network, app_inputs_info, args.input_precision, args.output_precision, args.input_output_precision) print_inputs_and_outputs_info(ie_network) # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() exe_network = benchmark.load_network(ie_network) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Load network took {duration_ms} ms") if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [('load network time (ms)', duration_ms)]) else: next_step() print("Skipping the step for compiled network") next_step() print("Skipping the step for compiled network") next_step() print("Skipping the step for compiled network") # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() exe_network = benchmark.import_network(args.path_to_model) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" logger.info(f"Import network took {duration_ms} ms") if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [('import network time (ms)', duration_ms)]) app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.input_info) if batch_size == 0: batch_size = 1 # --------------------- 8. Querying optimal runtime parameters -------------------------------------------------- next_step() if is_flag_set_in_command_line('hint'): ## actual device-deduced settings for the hint for device in devices: keys = benchmark.ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS') logger.info(f'DEVICE: {device}') for k in keys: logger.info(f' {k} , {exe_network.get_config(k)}') # Update number of streams for device in device_number_streams.keys(): key = get_device_type_from_name(device) + '_THROUGHPUT_STREAMS' device_number_streams[device] = benchmark.ie.get_config( device, key) # Number of requests infer_requests = exe_network.requests # Iteration limit benchmark.niter = get_number_iterations(benchmark.niter, benchmark.nireq, args.api_type) # ------------------------------------ 9. Creating infer requests and filling input blobs ---------------------- next_step() paths_to_input = list() if args.paths_to_input: for path in args.paths_to_input: if ":" in next(iter(path), ""): paths_to_input.extend(path) else: paths_to_input.append(os.path.abspath(*path)) set_inputs(paths_to_input, batch_size, app_inputs_info, infer_requests) if statistics: statistics.add_parameters( StatisticsReport.Category.RUNTIME_CONFIG, [ ('topology', topology_name), ('target device', device_name), ('API', args.api_type), ('precision', "UNSPECIFIED"), ('batch size', str(batch_size)), ('number of iterations', str(benchmark.niter) if benchmark.niter else "0"), ('number of parallel infer requests', str( benchmark.nireq)), ('duration (ms)', str( get_duration_in_milliseconds( benchmark.duration_seconds))), ]) for nstreams in device_number_streams.items(): statistics.add_parameters( StatisticsReport.Category.RUNTIME_CONFIG, [ (f"number of {nstreams[0]} streams", str(nstreams[1])), ]) # ------------------------------------ 10. Measuring performance ----------------------------------------------- output_string = process_help_inference_string(benchmark, device_number_streams) next_step(additional_info=output_string) progress_bar_total_count = 10000 if benchmark.niter and not benchmark.duration_seconds: progress_bar_total_count = benchmark.niter progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) if args.progress else None duration_ms = f"{benchmark.first_infer(exe_network):.2f}" logger.info(f"First inference took {duration_ms} ms") if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [('first inference time (ms)', duration_ms)]) fps, latency_ms, total_duration_sec, iteration = benchmark.infer( exe_network, batch_size, args.latency_percentile, progress_bar) # ------------------------------------ 11. Dumping statistics report ------------------------------------------- next_step() if args.dump_config: dump_config(args.dump_config, config) logger.info( f"Inference Engine configuration settings were dumped to {args.dump_config}" ) if args.exec_graph_path: dump_exec_graph(exe_network, args.exec_graph_path) if perf_counts: perfs_count_list = [] for ni in range(int(benchmark.nireq)): perfs_count_list.append( exe_network.requests[ni].get_perf_counts()) if args.perf_counts: print_perf_counters(perfs_count_list) if statistics: statistics.dump_performance_counters(perfs_count_list) if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [ ('total execution time (ms)', f'{get_duration_in_milliseconds(total_duration_sec):.2f}' ), ('total number of iterations', str(iteration)), ]) if MULTI_DEVICE_NAME not in device_name: if args.latency_percentile == 50: latency_prefix = 'latency (ms)' else: latency_prefix = 'latency (' + args.latency_percentile + ' percentile) (ms)' statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [ (latency_prefix, f'{latency_ms:.2f}'), ]) statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [ ('throughput', f'{fps:.2f}'), ]) if statistics: statistics.dump() print(f'Count: {iteration} iterations') print( f'Duration: {get_duration_in_milliseconds(total_duration_sec):.2f} ms' ) if MULTI_DEVICE_NAME not in device_name: if args.latency_percentile == 50: print(f'Latency: {latency_ms:.2f} ms') else: print( f'Latency ({args.latency_percentile} percentile): {latency_ms:.2f} ms' ) print(f'Throughput: {fps:.2f} FPS') del exe_network next_step.step_id = 0 except Exception as e: logger.exception(e) if statistics: statistics.add_parameters( StatisticsReport.Category.EXECUTION_RESULTS, [ ('error', str(e)), ]) statistics.dump() sys.exit(1)