def test_set_batch_default_batch_size(): param1 = ops.parameter(Shape([2, 1]), dtype=np.float32, name="data1") param2 = ops.parameter(Shape([2, 1]), dtype=np.float32, name="data2") add = ops.add(param1, param2) func = Model(add, [param1, param2], "TestFunction") func_param1 = func.get_parameters()[0] func_param1.set_layout(Layout("NC")) set_batch(func) assert func.is_dynamic()
def create_infer_requests(self, model, path, batch_sizes=None): if batch_sizes is not None: requests = [] for parameter in model.get_parameters(): parameter.set_layout(Layout("BC")) for b_s in batch_sizes: set_batch(model, b_s) compiled_model = self.ie.compile_model(model, device_name=self.device) requests.append(compiled_model.create_infer_request()) else: compiled_model = self.ie.compile_model(model, device_name=self.device) requests = compiled_model.create_infer_request() log.info('The WaveRNN model {} is loaded to {}'.format(path, self.device)) return requests
def test_set_batch_int(): model = create_test_model() model_param1 = model.get_parameters()[0] model_param2 = model.get_parameters()[1] # batch == 2 model_param1.set_layout(Layout("NC")) assert get_batch(model) == 2 # set batch to 1 set_batch(model, 1) assert get_batch(model) == 1 # check if shape of param 1 has changed assert model_param1.get_output_shape(0) == PartialShape([1, 1]) # check if shape of param 2 has not changed assert model_param2.get_output_shape(0) == PartialShape([2, 1])
def test_set_batch_int(): param1 = ops.parameter(Shape([2, 1]), dtype=np.float32, name="data1") param2 = ops.parameter(Shape([2, 1]), dtype=np.float32, name="data2") add = ops.add(param1, param2) func = Model(add, [param1, param2], "TestFunction") func_param1 = func.get_parameters()[0] func_param2 = func.get_parameters()[1] # batch == 2 func_param1.set_layout(Layout("NC")) assert get_batch(func) == 2 # set batch to 1 set_batch(func, 1) assert get_batch(func) == 1 # check if shape of param 1 has changed assert str(func_param1.get_output_shape(0) == {1, 1}) # check if shape of param 2 has not changed assert str(func_param2.get_output_shape(0) == {2, 1})
def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) # Parsing and validation of input arguments if len(sys.argv) != 3: log.info(f'Usage: {sys.argv[0]} <path_to_model> <device_name>') return 1 model_path = sys.argv[1] device_name = sys.argv[2] labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] number_top = 1 # ---------------------------Step 1. Initialize inference engine core-------------------------------------------------- log.info('Creating OpenVINO Runtime Core') core = Core() # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation------------------------------ log.info( f'Loading the model using ngraph function with weights from {model_path}' ) model = create_ngraph_function(model_path) # ---------------------------Step 3. Apply preprocessing---------------------------------------------------------- # Get names of input and output blobs ppp = PrePostProcessor(model) # 1) Set input tensor information: # - input() provides information about a single model input # - precision of tensor is supposed to be 'u8' # - layout of data is 'NHWC' ppp.input().tensor() \ .set_element_type(Type.u8) \ .set_layout(Layout('NHWC')) # noqa: N400 # 2) Here we suppose model has 'NCHW' layout for input ppp.input().model().set_layout(Layout('NCHW')) # 3) Set output tensor information: # - precision of tensor is supposed to be 'f32' ppp.output().tensor().set_element_type(Type.f32) # 4) Apply preprocessing modifing the original 'model' model = ppp.build() # Set a batch size equal to number of input images set_batch(model, digits.shape[0]) # ---------------------------Step 4. Loading model to the device------------------------------------------------------- log.info('Loading the model to the plugin') compiled_model = core.compile_model(model, device_name) # ---------------------------Step 5. Prepare input--------------------------------------------------------------------- n, c, h, w = model.input().shape input_data = np.ndarray(shape=(n, c, h, w)) for i in range(n): image = digits[i].reshape(28, 28) image = image[:, :, np.newaxis] input_data[i] = image # ---------------------------Step 6. Do inference---------------------------------------------------------------------- log.info('Starting inference in synchronous mode') results = compiled_model.infer_new_request({0: input_data}) # ---------------------------Step 7. Process output-------------------------------------------------------------------- predictions = next(iter(results.values())) log.info(f'Top {number_top} results: ') for i in range(n): probs = predictions[i] # Get an array of number_top class IDs in descending order of probability top_n_idexes = np.argsort(probs)[-number_top:][::-1] header = 'classid probability' header = header + ' label' if labels else header log.info(f'Image {i}') log.info('') log.info(header) log.info('-' * len(header)) for class_id in top_n_idexes: probability_indent = ' ' * (len('classid') - len(str(class_id)) + 1) label_indent = ' ' * (len('probability') - 8) if labels else '' label = labels[class_id] if labels else '' log.info( f'{class_id}{probability_indent}{probs[class_id]:.7f}{label_indent}{label}' ) log.info('') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
def test_set_batch_default_batch_size(): model = create_test_model() model_param1 = model.get_parameters()[0] model_param1.set_layout(Layout("NC")) set_batch(model) assert model.is_dynamic()
def main(): args = parse_args() # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------ log.info('Creating OpenVINO Runtime Core') core = Core() # --------------------------- Step 2. Read a model -------------------------------------------------------------------- if args.model: log.info(f'Reading the model: {args.model}') # (.xml and .bin files) or (.onnx file) model = core.read_model(args.model) # --------------------------- Step 3. Apply preprocessing ------------------------------------------------------------- if args.output_layers: output_layer_names, output_layer_ports = parse_outputs_from_args( args) model.add_outputs(list(zip(output_layer_names, output_layer_ports))) if args.layout: layouts = parse_input_layouts(args, model.inputs) ppp = PrePostProcessor(model) for i in range(len(model.inputs)): ppp.input(i).tensor().set_element_type(Type.f32) input_name = model.input(i).get_any_name() if args.layout and input_name in layouts.keys(): ppp.input(i).tensor().set_layout(Layout(layouts[input_name])) ppp.input(i).model().set_layout(Layout(layouts[input_name])) for i in range(len(model.outputs)): ppp.output(i).tensor().set_element_type(Type.f32) model = ppp.build() if args.batch_size: batch_size = args.batch_size if args.context_window_left == args.context_window_right == 0 else 1 if any([not _input.node.layout.empty for _input in model.inputs]): set_batch(model, batch_size) else: log.warning( 'Layout is not set for any input, so custom batch size is not set' ) # ---------------------------Step 4. Configure plugin --------------------------------------------------------- devices = args.device.replace('HETERO:', '').split(',') plugin_config = {} if 'GNA' in args.device: gna_device_mode = devices[0] if '_' in devices[0] else 'GNA_AUTO' devices[0] = 'GNA' plugin_config['GNA_DEVICE_MODE'] = gna_device_mode plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}' plugin_config['GNA_EXEC_TARGET'] = args.exec_target plugin_config['GNA_PWL_MAX_ERROR_PERCENT'] = str(args.pwl_me) # Set a GNA scale factor if args.import_gna_model: if args.scale_factor: log.warning( f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}' ) set_scale_factors(plugin_config, parse_scale_factors(args)) else: log.info( f'Using scale factor from the imported GNA model: {args.import_gna_model}' ) else: if args.scale_factor: set_scale_factors(plugin_config, parse_scale_factors(args)) else: scale_factors = [] for file_name in re.split(', |,', args.input): _, utterances = read_utterance_file(file_name) scale_factors.append(get_scale_factor(utterances[0])) log.info( 'Using scale factor(s) calculated from first utterance') set_scale_factors(plugin_config, scale_factors) if args.export_embedded_gna_model: plugin_config[ 'GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model plugin_config[ 'GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration if args.performance_counter: plugin_config['PERF_COUNT'] = 'YES' device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[ 0] # --------------------------- Step 5. Loading model to the device ----------------------------------------------------- log.info('Loading the model to the plugin') if args.model: compiled_model = core.compile_model(model, device_str, plugin_config) else: with open(args.import_gna_model, 'rb') as f: buf = BytesIO(f.read()) compiled_model = core.import_model(buf, device_str, plugin_config) # --------------------------- Exporting GNA model using InferenceEngine AOT API --------------------------------------- if args.export_gna_model: log.info(f'Writing GNA Model to {args.export_gna_model}') user_stream = compiled_model.export_model() with open(args.export_gna_model, 'wb') as f: f.write(user_stream) return 0 if args.export_embedded_gna_model: log.info( f'Exported GNA embedded model to file {args.export_embedded_gna_model}' ) log.info( f'GNA embedded model export done for GNA generation {args.embedded_gna_configuration}' ) return 0 # --------------------------- Step 6. Set up input -------------------------------------------------------------------- if args.input_layers: input_layer_names = re.split(', |,', args.input_layers) else: input_layer_names = [ _input.any_name for _input in compiled_model.inputs ] input_file_names = re.split(', |,', args.input) if len(input_layer_names) != len(input_file_names): log.error( f'Number of model inputs ({len(compiled_model.inputs)}) is not equal ' f'to number of ark files ({len(input_file_names)})') sys.exit(-3) input_file_data = [ read_utterance_file(file_name) for file_name in input_file_names ] infer_data = [{ input_layer_names[j]: input_file_data[j].utterances[i] for j in range(len(input_layer_names)) } for i in range(len(input_file_data[0].utterances))] if args.output_layers: output_layer_names, output_layer_ports = parse_outputs_from_args(args) # If a name of output layer contains a port number then concatenate output_layer_names and output_layer_ports if ':' in compiled_model.outputs[0].any_name: output_layer_names = [ f'{output_layer_names[i]}:{output_layer_ports[i]}' for i in range(len(output_layer_names)) ] else: output_layer_names = [compiled_model.outputs[0].any_name] if args.output: output_file_names = re.split(', |,', args.output) if len(output_layer_names) != len(output_file_names): log.error( 'The number of output files is not equal to the number of model outputs.' ) sys.exit(-6) if args.reference: reference_file_names = re.split(', |,', args.reference) if len(output_layer_names) != len(reference_file_names): log.error( 'The number of reference files is not equal to the number of model outputs.' ) sys.exit(-5) reference_file_data = [ read_utterance_file(file_name) for file_name in reference_file_names ] references = [{ output_layer_names[j]: reference_file_data[j].utterances[i] for j in range(len(output_layer_names)) } for i in range(len(input_file_data[0].utterances))] # --------------------------- Step 7. Create infer request ------------------------------------------------------------ infer_request = compiled_model.create_infer_request() # --------------------------- Step 8. Do inference -------------------------------------------------------------------- log.info('Starting inference in synchronous mode') results = [] total_infer_time = 0 for i in range(len(infer_data)): start_infer_time = default_timer() # Reset states between utterance inferences to remove a memory impact for state in infer_request.query_state(): state.reset() results.append( do_inference( infer_data[i], infer_request, args.context_window_left, args.context_window_right, )) infer_time = default_timer() - start_infer_time total_infer_time += infer_time num_of_frames = infer_data[i][input_layer_names[0]].shape[0] avg_infer_time_per_frame = infer_time / num_of_frames # --------------------------- Step 9. Process output ------------------------------------------------------------------ log.info('') log.info(f'Utterance {i}:') log.info(f'Total time in Infer (HW and SW): {infer_time * 1000:.2f}ms') log.info(f'Frames in utterance: {num_of_frames}') log.info( f'Average Infer time per frame: {avg_infer_time_per_frame * 1000:.2f}ms' ) for name in output_layer_names: log.info('') log.info(f'Output blob name: {name}') log.info(f'Number scores per frame: {results[i][name].shape[1]}') if args.reference: log.info('') compare_with_reference(results[i][name], references[i][name]) if args.performance_counter: if 'GNA' in args.device: total_cycles = infer_request.profiling_info[ 0].real_time.total_seconds() stall_cycles = infer_request.profiling_info[ 1].real_time.total_seconds() active_cycles = total_cycles - stall_cycles frequency = 10**6 if args.arch == 'CORE': frequency *= GNA_CORE_FREQUENCY else: frequency *= GNA_ATOM_FREQUENCY total_inference_time = total_cycles / frequency active_time = active_cycles / frequency stall_time = stall_cycles / frequency log.info('') log.info('Performance Statistics of GNA Hardware') log.info( f' Total Inference Time: {(total_inference_time * 1000):.4f} ms' ) log.info(f' Active Time: {(active_time * 1000):.4f} ms') log.info(f' Stall Time: {(stall_time * 1000):.4f} ms') log.info('') log.info(f'Total sample time: {total_infer_time * 1000:.2f}ms') if args.output: for i, name in enumerate(output_layer_names): data = [ results[i][name] for i in range(len(input_file_data[0].utterances)) ] write_utterance_file(output_file_names[i], input_file_data[0].keys, data) log.info(f'File {output_file_names[i]} was created!') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, ' 'for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
# Explicit preprocessing steps. Layout conversion will be done automatically as last step ppp.input().preprocess() \ .convert_element_type() \ .convert_color(ColorFormat.RGB) \ .resize(ResizeAlgorithm.RESIZE_LINEAR) \ .mean([123.675, 116.28, 103.53]) \ .scale([58.624, 57.12, 57.375]) # Dump preprocessor print(f'Dump preprocessor: {ppp}') model = ppp.build() # ======== Step 2: Change batch size ================ # In this example we also want to change batch size to increase throughput set_batch(model, 2) # ======== Step 3: Save the model ================ serialize(model, '/path/to/some_model_saved.xml', '/path/to/some_model_saved.bin') # ! [ov:preprocess:save] # ! [ov:preprocess:save_load] core = Core() core.set_property({'CACHE_DIR': '/path/to/cache/dir'}) # In case that no preprocessing is needed anymore, we can load model on target device directly # With cached model available, it will also save some time on reading original model compiled_model = core.compile_model('/path/to/some_model_saved.xml', 'CPU') # ! [ov:preprocess:save_load]
# Copyright (C) 2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 #! [import] from openvino.runtime import Core, set_batch from openvino.preprocess import PrePostProcessor #! [import] model_path = "model.xml" batch_size = 8 #! [ov_gna_read_model] core = Core() model = core.read_model(model=model_path) #! [ov_gna_read_model] #! [ov_gna_set_nc_layout] ppp = PrePostProcessor(model) for i in range(len(model.inputs)): input_name = model.input(i).get_any_name() ppp.input(i).model().set_layout("N?") model = ppp.build() #! [ov_gna_set_nc_layout] #! [ov_gna_set_batch_size] set_batch(model, batch_size) #! [ov_gna_set_batch_size]
# Copyright (C) 2018-2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from openvino.runtime import Core, Layout, set_batch ov = Core() model = ov.read_model("path/to/model") #! [picture_snippet] model.reshape([8, 3, 448, 448]) #! [picture_snippet] #! [set_batch] model.get_parameters()[0].set_layout(Layout("N...")) set_batch(model, 5) #! [set_batch] #! [simple_spatials_change] from cv2 import imread image = imread("path/to/image") model.reshape({1, 3, image.shape[0], image.shape[1]}) #! [simple_spatials_change] #! [obj_to_shape] port_to_shape = dict() for input_obj in model.inputs: shape = input_obj.get_partial_shape() # modify shape to fit your needs # ... port_to_shape[input_obj] = shape model.reshape(port_to_shape) #! [obj_to_shape]