def attrs_to_dict(attrs): attr_dict = OrderedDict() for attr in attrs: def process_attr(attr_str: str): processed = getattr(attr, ONNX_PYTHON_ATTR_MAPPING[attr_str]) if attr_str == "STRING": processed = processed.decode() elif attr_str == "TENSOR": tensor_str = "Tensor: [dtype={:}, shape={:}]".format(get_dtype(processed), get_shape(processed)) if mode == "full": tensor_str += " | Values:\n" + misc.indent_block(str(get_values(processed))) processed = tensor_str elif attr_str == "GRAPH": processed = "\n" + str_from_onnx_graph(processed, mode, tensors, indent_level=indent_level + 2) elif attr_str == "FLOATS" or attr_str == "INTS": # Proto hacky list to normal Python list processed = [p for p in processed] elif attr_str == "STRINGS": processed = [p.decode() for p in processed] return processed if attr.type in ATTR_TYPE_MAPPING: attr_str = ATTR_TYPE_MAPPING[attr.type] if attr_str in ONNX_PYTHON_ATTR_MAPPING: attr_dict[attr.name] = process_attr(attr_str) else: G_LOGGER.warning("Attribute of type {:} is currently unsupported. Skipping attribute.".format(attr_str)) else: G_LOGGER.warning("Attribute type: {:} was not recognized. Was the graph generated with a newer IR " "version than the installed `onnx` package? Skipping attribute.".format(attr.type)) return attr_dict
def __init__(self, sess, timeline_dir=None, name=None): """ Args: sess (Callable() -> Tuple[tf.Session, Sequence[str]]): A callable that can supply a tuple containing a TensorFlow session and output names. timeline_dir (str): Path to write a TensorFlow timeline. Note that profiling may affect execution time. name (str): The human-readable name prefix to use for this runner. A runner count and timestamp will be appended to this prefix. """ super().__init__(name=name, prefix="tf-runner") self._sess = sess self.timeline_dir = timeline_dir self.num_inferences = 0 self.run_options = None self.run_metadata = None if self.timeline_dir is not None: # Enable profiling G_LOGGER.warning( "Profiling is enabled. This will impact performance") self.run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) self.run_metadata = tf.RunMetadata()
def infer_impl(self, feed_dict): G_LOGGER.extra_verbose("Received feed_dict: {:}".format(feed_dict)) start = time.time() inference_outputs = self.sess.run(self.output_names, feed_dict=feed_dict, options=self.run_options, run_metadata=self.run_metadata) end = time.time() out_dict = OrderedDict() for name, out in zip(self.output_names, inference_outputs): out_dict[name] = out self.inference_time = end - start def generate_timeline(): from tensorflow.python.client import timeline t1 = timeline.Timeline(self.run_metadata.step_stats) return t1.generate_chrome_trace_format() if self.timeline_dir is not None: misc.lazy_write(contents=generate_timeline, path=os.path.join( self.timeline_dir, "run-{:}".format(self.num_inferences)), mode="w") self.num_inferences += 1 return out_dict
def mark_layerwise(network): # Layers within loops cannot be marked as network outputs. LOOP_START_NAMES = ["TRIP_LIMIT", "ITERATOR", "RECURRENCE"] LOOP_END_NAMES = ["LOOP_OUTPUT"] LOOP_START_LAYERS = [getattr(trt.LayerType, attr) for attr in LOOP_START_NAMES if hasattr(trt.LayerType, attr)] LOOP_END_LAYERS = [getattr(trt.LayerType, attr) for attr in LOOP_END_NAMES if hasattr(trt.LayerType, attr)] EXCLUDE_OUTPUT_LAYERS = [trt.LayerType.SHAPE, trt.LayerType.CONSTANT] outputs = [] in_loop = False for layer in network: if layer.type in LOOP_START_LAYERS: G_LOGGER.warning("Loop detected. Please ensure the network is topologically sorted so that layers within " "the loop body are not marked as network outputs in layerwise mode", mode=LogMode.ONCE) in_loop = True elif layer.type in LOOP_END_LAYERS: in_loop = False should_mark_layer = not in_loop and layer.type not in EXCLUDE_OUTPUT_LAYERS if should_mark_layer: for index in range(layer.num_outputs): tensor = layer.get_output(index) outputs.append(tensor.name) G_LOGGER.verbose("Marking {:} tensors as outputs".format(len(outputs))) mark_outputs(network, outputs)
def build_default_profile(builder, network, default_shape_value=None): default_shape_value = misc.default_value(default_shape_value, DEFAULT_SHAPE_VALUE) def override_shape(shape): return tuple([default_shape_value if misc.is_dimension_dynamic(dim) else dim for dim in shape]) trt_profile = builder.create_optimization_profile() for idx in range(network.num_inputs): inp = network.get_input(idx) with G_LOGGER.verbosity(G_LOGGER.CRITICAL): # WAR for spam from TRT is_shape_tensor = inp.is_shape_tensor if is_shape_tensor: rank = inp.shape[0] shape = (default_shape_value, ) * rank G_LOGGER.warning("Input shape-tensor: {:24} | Will use input values: {:} in profile.\n" "If this is incorrect, please provide a profile " "that sets the values for this input shape-tensor.".format(inp.name, shape, rank), mode=LogMode.ONCE) trt_profile.set_shape_input(inp.name, shape, shape, shape) else: shape = override_shape(inp.shape) if override_shape(inp.shape) != inp.shape: G_LOGGER.warning("Input tensor: {:24} | Will use shape: {:} in profile (tensor shape is: {:}).\n" "If this is incorrect, please provide a profile " "that sets the shape for this input tensor.".format(inp.name, shape, inp.shape), mode=LogMode.ONCE) trt_profile.set_shape(inp.name, shape, shape, shape) return check_profile(trt_profile)
def mark_outputs(network, outputs): """ Mark the specified outputs as network outputs. Args: network (trt.INetworkDefinition): The network in which to mark outputs. outputs (Sequence[str]): The names of tensors to mark as outputs. """ outputs = set(outputs) all_outputs = [] for layer in network: for index in range(layer.num_outputs): tensor = layer.get_output(index) all_outputs.append(tensor.name) # Clear all old outputs if tensor.is_network_output: network.unmark_output(tensor) if tensor.name in outputs: if not tensor.is_network_output: G_LOGGER.ultra_verbose("Marking {:} as an output".format(tensor.name)) network.mark_output(tensor) marked_outputs = set(_get_network_outputs(network)) not_found = outputs - marked_outputs check_outputs_not_found(not_found, all_outputs)
def register_callback(): from polygraphy.logger.logger import G_LOGGER def set_tf_logging_level(sev): import os import tensorflow as tf if sev > G_LOGGER.WARNING: tf_sev = tf.compat.v1.logging.ERROR tf_logging_level = "3" elif sev > G_LOGGER.INFO: tf_sev = tf.compat.v1.logging.WARN tf_logging_level = "2" elif sev > G_LOGGER.VERBOSE: tf_sev = tf.compat.v1.logging.INFO tf_logging_level = "1" else: tf_sev = tf.compat.v1.logging.DEBUG tf_logging_level = "0" tf.compat.v1.logging.set_verbosity(tf_sev) os.environ['TF_CPP_MIN_LOG_LEVEL'] = tf_logging_level G_LOGGER.register_callback( set_tf_logging_level ) # Will be registered when this runner is imported.
def add_onnx_loader(self, script, disable_outputs=None, suffix=None): if self.model_args.model_type == "onnx": script.add_import(imports=["OnnxFromPath"], frm="polygraphy.backend.onnx") loader_str = Script.invoke("OnnxFromPath", self.model_args.model_file) loader_name = script.add_loader(loader_str, "load_onnx", suffix=suffix) else: if self.tf2onnx_loader_args is None: G_LOGGER.critical( "Could not load: {:}. Is it an ONNX model?".format( self.model_args.model_file)) loader_name = self.tf2onnx_loader_args.add_to_script(script) modify_onnx_str = self._get_modify_onnx_str( script, loader_name, disable_outputs=disable_outputs) if modify_onnx_str is not None: loader_name = script.add_loader(modify_onnx_str, "modify_onnx") SAVE_ONNX = "SaveOnnx" save_onnx_str = Script.invoke(SAVE_ONNX, loader_name, path=self.save_onnx) if save_onnx_str != Script.invoke(SAVE_ONNX, loader_name): script.add_import(imports=[SAVE_ONNX], frm="polygraphy.backend.onnx") loader_name = script.add_loader(save_onnx_str, "save_onnx") return loader_name
def _check_type(self, elem): if not isinstance(elem, self.elem_type): G_LOGGER.critical( "Unsupported element type type in {:}. Element: {:} is type: {:} but type: {:} was expected" .format( type(self).__name__, repr(elem), type(elem).__name__, self.elem_type.__name__))
def determine_format(shape): """ Guesses the data format of a given shape. Args: shape (Tuple[int]): The shape, including batch dimension. Returns: DataFormat: The determined data format. """ # The smaller this ratio, the closer a and b are. def minmax_ratio(a, b): return abs(max(a, b) / min(a, b)) # Assume all shapes include batch dimension if len(shape) == 4: # Typically, H and W are quite close, so if minmax_ratio(0, 1) > minmax_ratio(1, 2), then we assume CHW. if minmax_ratio(shape[1], shape[2]) > minmax_ratio(shape[2], shape[3]): return DataFormat.NCHW return DataFormat.NHWC elif len(shape) == 3: return DataFormat.NHW elif len(shape) == 2: return DataFormat.NW else: G_LOGGER.warning("Cannot determine format for " + str(shape) + ". Currently only implemented for input_buffers with 1-3 non-batch dimensions. Please update this function!") return DataFormat.UNKNOWN
def infer_impl(self, feed_dict): start_binding, _ = self.set_shapes_from_feed_dict(feed_dict) start = time.time() for name, buffer in feed_dict.items(): self.device_buffers[name].copy_from(buffer, self.stream) # Need to offset bindings in case the active profile is not 0. status = self.context.execute_async_v2( bindings=[0] * start_binding + [buf.address() for buf in self.device_buffers.values()], stream_handle=self.stream.address()) if not status: G_LOGGER.critical( "Model execution failed. Please see the log messages above for details" ) for name, buffer in self.host_output_buffers.items(): self.host_output_buffers[name] = self.device_buffers[name].copy_to( buffer, self.stream) self.stream.synchronize() end = time.time() self.inference_time = end - start return self.host_output_buffers
def generate_buffer(name, dtype, shape): if is_shape_tensor(name, dtype): buffer = np.array(shape, dtype=dtype) G_LOGGER.info( "Assuming {:} is a shape tensor. Setting input values to: {:}. If this is not correct, " "please set it correctly in 'input_metadata' or by providing --input-shapes" .format(name, buffer), mode=LogMode.ONCE) elif np.issubdtype(dtype, np.integer): # high is 1 greater than the max int drawn buffer = rng.randint(low=self.int_range[0], high=self.int_range[1] + 1, size=shape, dtype=dtype) elif np.issubdtype(dtype, np.bool_): buffer = rng.randint(low=0, high=2, size=shape).astype(dtype) else: buffer = (rng.random_sample(size=shape) * (self.float_range[1] - self.float_range[0]) + self.float_range[0]).astype(dtype) buffer = np.array( buffer ) # To handle scalars, since the above functions return a float if shape is (). return buffer
def write_calibration_cache(self, cache): self.cache_contents = cache.tobytes() self.has_cached_scales = True if self._cache is None: return try: if self._cache.seekable(): self._cache.seek(0) bytes_written = self._cache.write(self.cache_contents) if bytes_written != len(self.cache_contents): G_LOGGER.warning( "Could not write entire cache. Note: cache contains {:} bytes, but only " "{:} bytes were written".format( len(self.cache_contents), bytes_written)) except AttributeError: G_LOGGER.info("Writing calibration cache to: {:}".format( self._cache)) with open(self._cache, "wb") as f: f.write(self.cache_contents) except: # Cache is not writable return else: self._cache.flush()
def __init__(self, max_workspace_size=None, tf32=None, fp16=None, int8=None, profiles=None, calibrator=None, strict_types=None): """ Functor that creates a TensorRT IBuilderConfig. Args: max_workspace_size (int): The maximum workspace size, in bytes, when building the engine. tf32 (bool): Whether to build the engine with TF32 precision enabled. Defaults to False. fp16 (bool): Whether to build the engine with FP16 precision enabled. Defaults to False. int8 (bool): Whether to build the engine with INT8 precision enabled. Defaults to False. profiles (List[Profile]): A list of optimization profiles to add to the configuration. Only needed for networks with dynamic input shapes. If this is omitted for a network with dynamic shapes, a default profile is created, where dynamic dimensions are replaced with Polygraphy's DEFAULT_SHAPE_VALUE (defined in util/constants.py). See `Profile` for details. calibrator (trt.IInt8Calibrator): An int8 calibrator. Only required in int8 mode when the network does not have explicit precision. For networks with dynamic shapes, the last profile provided (or default profile if no profiles are provided) is used during calibration. """ self.max_workspace_size = misc.default_value(max_workspace_size, 1 << 24) self.tf32 = misc.default_value(tf32, False) self.fp16 = misc.default_value(fp16, False) self.int8 = misc.default_value(int8, False) self.profiles = misc.default_value(profiles, []) self.calibrator = calibrator self.strict_types = misc.default_value(strict_types, False) if self.calibrator is not None and not self.int8: G_LOGGER.warning("A calibrator was provided to `CreateConfig`, but int8 mode was not enabled. " "Did you mean to set `int8=True` to enable building with int8 precision?")
def infer(self, feed_dict): start = time.time() [ self.input_buffers[name].device.copy_from(buffer, self.stream) for name, buffer in feed_dict.items() ] # We will not run with smaller batch sizes than whatever the builder chose. bindings = [ buf.device.address() for buf in self.input_buffers.values() ] + [buf.device.address() for buf in self.output_buffers.values()] status = self.context.execute_async( batch_size=self.context.engine.max_batch_size, bindings=bindings, stream_handle=self.stream.address()) if not status: G_LOGGER.critical( "Model execution failed. Please see the log messages above for details" ) for out in self.output_buffers.values(): out.host = out.device.copy_to(out.host, self.stream) self.stream.synchronize() end = time.time() out_dict = OrderedDict() for (name, out) in self.output_buffers.items(): out_dict[name] = out.host self.inference_time = end - start return out_dict
def read_calibration_cache(self): def load_from_cache(): if self._cache is None: return None try: if self._cache.seekable(): self._cache.seek(0) return self._cache.read() except AttributeError: if os.path.exists(self._cache): G_LOGGER.info( "Reading calibration cache from: {:}".format( self._cache), mode=LogMode.ONCE) with open(self._cache, "rb") as f: return f.read() except: # Cache is not readable return None if not self.has_cached_scales: self.cache_contents = load_from_cache() if not self.cache_contents: G_LOGGER.warning( "Calibration cache was provided, but is empty. Will regenerate scales by running calibration.", mode=LogMode.ONCE) self.cache_contents = None else: self.has_cached_scales = True return self.cache_contents
def fix_graph(graph, model): """ Fix the graph so it is valid ONNX. """ def fix_tensor_metadata(tensors, fix_shape=True): for tensor in tensors: if not tensor.shape and fix_shape: tensor.shape = layerwise(model)[tensor.name].shape if not tensor.dtype: tensor.dtype = layerwise(model)[tensor.name].dtype fix_tensor_metadata(graph.inputs) fix_tensor_metadata(graph.outputs, fix_shape=False) # If we're marking inputs, there may be cases where some other inputs are required - for # example, if the model is branchy. If, after cleanup(), there are any Variable tensors in # the graph without inputs, we'll replace them with constants and fold them away. tensor_map = graph.tensors() needs_const_fold = False for tensor in tensor_map.values(): if isinstance(tensor, gs.Variable) and not tensor.inputs and tensor not in graph.inputs: needs_const_fold = True G_LOGGER.info("Freezing model input: {:}".format(tensor)) tensor.to_constant(layerwise(model, include_data=True)[tensor.name]) if needs_const_fold: G_LOGGER.info("Folding constants to remove extraneous subgraphs") graph.fold_constants().cleanup() return graph
def __call__(self): """ Writes out artifacts from a TensorFlow Graph. Returns: Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs. """ (graph, outputs), _ = misc.try_call(self._graph) misc.lazy_write( contents=lambda: graph.as_graph_def().SerializeToString(), path=self.path) if self.tensorboard_dir: G_LOGGER.info("Writing tensorboard events to {:}".format( self.tensorboard_dir)) train_writer = tf.compat.v1.summary.FileWriter( self.tensorboard_dir) train_writer.add_graph(graph) if self.engine_dir is not None: graphdef = graph.as_graph_def() segment_number = 0 for node in graphdef.node: if node.op == "TRTEngineOp": engine = node.attr["serialized_segment"].s if self.engine_dir is not None: misc.lazy_write( contents=engine, path=os.path.join( self.engine_dir, "segment-{:}".format(segment_number))) segment_number += 1 return graph, outputs
def add_to_script(self, script): script.add_import(imports=["TrtLegacyRunner"], frm="polygraphy.backend.trt_legacy") G_LOGGER.warning("Legacy TensorRT runner only supports implicit batch TensorFlow/UFF, ONNX, and Caffe models") if self.model_args.model_type == "onnx": script.add_import(imports=["ParseNetworkFromOnnxLegacy"], frm="polygraphy.backend.trt_legacy") onnx_loader = self.onnx_loader_args.add_onnx_loader(script, disable_outputs=True) loader_name = script.add_loader(Script.format_str("ParseNetworkFromOnnxLegacy({:})", onnx_loader), "parse_network_from_onnx_legacy") elif self.model_args.model_type == "caffe": script.add_import(imports=["LoadNetworkFromCaffe"], frm="polygraphy.backend.trt_legacy") loader_name = script.add_loader(Script.format_str("LoadNetworkFromCaffe({:}, {:}, {:}, {:})", self.model_args.model_file, self.caffe_model, self.trt_outputs, self.batch_size), "parse_network_from_caffe") else: script.add_import(imports=["LoadNetworkFromUff"], frm="polygraphy.backend.trt_legacy") if self.model_args.model_type == "uff": script.add_import(imports=["LoadUffFile"], frm="polygraphy.backend.trt_legacy") shapes = {name: shape for name, (_, shape) in self.trt_loader_args.input_shapes.items()} loader_name = script.add_loader(Script.format_str("LoadUffFile({:}, {:}, {:})", self.model_args.model_file, misc.default_value(shapes, {}), self.trt_outputs), "load_uff_file") else: script.add_import(imports=["ConvertToUff"], frm="polygraphy.backend.trt_legacy") loader_name = script.add_loader(Script.format_str("ConvertToUff({:}, save_uff={:}, preprocessor={:})", self.tf_loader_args.add_to_script(script), self.save_uff, self.preprocessor), "convert_to_uff") loader_name = script.add_loader(Script.format_str("LoadNetworkFromUff({:}, uff_order={:})", loader_name, self.uff_order), "uff_network_loader") runner_str = Script.format_str("TrtLegacyRunner({:}, {:}, {:}, fp16={:}, tf32={:}, load_engine={:}, save_engine={:}, layerwise={:}, plugins={:})", loader_name, self.trt_loader_args.workspace, self.batch_size, self.trt_loader_args.fp16, self.trt_loader_args.tf32, self.model_args.model_file if self.model_args.model_type == "engine" else None, self.trt_runner_args.save_engine, self.trt_outputs==constants.MARK_ALL, self.trt_loader_args.plugins) runner_name = script.add_loader(runner_str, "trt_legacy_runner") script.add_runner(runner_name) return runner_name
def __call__(self): """ Converts a TensorFlow model into ONNX. Returns: onnx.ModelProto: The ONNX model. """ import tensorflow as tf import tf2onnx from polygraphy.backend.tf import util as tf_util misc.log_module_info(tf2onnx) (graph, output_names), _ = misc.try_call(self._graph) input_names = list(tf_util.get_input_metadata(graph).keys()) if self.fold_constant: G_LOGGER.info("Folding constants in graph using tf2onnx.tfonnx.tf_optimize") graphdef = graph.as_graph_def() if self.optimize: graphdef = tf2onnx.tfonnx.tf_optimize(input_names, output_names, graph.as_graph_def(), fold_constant=self.fold_constant) with tf.Graph().as_default() as graph, tf.compat.v1.Session(graph=graph) as sess: tf.import_graph_def(graphdef, name="") onnx_graph = tf2onnx.tfonnx.process_tf_graph(graph, input_names=input_names, output_names=output_names, opset=self.opset) if self.optimize: onnx_graph = tf2onnx.optimizer.optimize_graph(onnx_graph) return onnx_util.check_model(onnx_graph.make_model("model"))
def add_to_script(self, script, data_loader_name): script.add_import(imports=["Comparator"], frm="polygraphy.comparator") script.add_import(imports=["sys"]) RESULTS_VAR_NAME = Inline("results") comparator_run = Script.invoke("Comparator.run", script.get_runners(), warm_up=self.warm_up, data_loader=data_loader_name, use_subprocess=self.use_subprocess, save_inputs_path=self.save_inputs) script.append_suffix( Script.format_str("\n# Runner Execution\n{results} = {:}", Inline(comparator_run), results=RESULTS_VAR_NAME)) if self.save_results: G_LOGGER.verbose("Will save runner results to: {:}".format( self.save_results)) script.add_import(imports=["misc"], frm="polygraphy.util") script.append_suffix( Script.format_str( "\n# Save results\nmisc.pickle_save({:}, {results})", self.save_results, results=RESULTS_VAR_NAME)) return RESULTS_VAR_NAME
def allocate_buffers(engine): input_buffers = OrderedDict() output_buffers = OrderedDict() bindings = [] stream = cuda.Stream() G_LOGGER.verbose("Using batch size: " + str(engine.max_batch_size) + " during buffer allocation") for binding in engine: shape = (engine.max_batch_size, ) + tuple( engine.get_binding_shape(binding)) dtype = engine.get_binding_dtype(binding) device_mem = cuda.DeviceBuffer(shape=shape, dtype=trt.nptype(dtype)) G_LOGGER.extra_verbose("Tensor: " "{:40} | Allocated: {:}".format( binding, device_mem)) if engine.binding_is_input(binding): input_buffers[binding] = TrtLegacyRunner.HostDeviceMem( None, device_mem) else: host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype)) output_buffers[binding] = TrtLegacyRunner.HostDeviceMem( host_mem, device_mem) return input_buffers, output_buffers, stream
def is_output_node(node): # Make sure that we're not using hanging nodes as outputs - must have at least one input. if len(node_output_map[node.name]) != 0 or len(node.input) == 0: return False # Tensors with no shape cannot be outputs and TensorFlow doesn't like certain ops as outputs. EXCLUDE_OPS = [ "Switch", "FusedBatchNorm", "Assert", "NextIteration", "Enter", "LoopCond", "Exit", "Print", "Assign", "NoOp", "ReadVariableOp", "VarIsInitializedOp", "Const" ] # Additionally, we sometimes need to exclude entire namespaces e.g. while loops. EXCLUDE_NAMESPACES = ["while", "Assert"] if any([ex_op in node.op for ex_op in EXCLUDE_OPS]) or any([ns in node.name for ns in EXCLUDE_NAMESPACES]): G_LOGGER.extra_verbose("Excluding {:}, op {:} is not a valid output op or is part of an excluded namespace " "(Note: excluded namespaces: {:})".format(node.name, node.op, EXCLUDE_NAMESPACES)) return False return True
def parse_dtype(dtype): if dtype is not None: if dtype not in misc.NP_TYPE_FROM_STR: G_LOGGER.critical( "Could not understand data type: {:}. Please use one of: {:} or `auto`" .format(dtype, list(misc.NP_TYPE_FROM_STR.keys()))) dtype = misc.NP_TYPE_FROM_STR[dtype] return dtype
def get_input_metadata(graph): input_tensors = [] input_nodes = find_nodes_by_ops(graph.as_graph_def(), ["Placeholder", "FIFOQueue"]) G_LOGGER.verbose("Found input tensors: {:}".format(["{:}: {:}".format(n.name, n.op) for n in input_nodes])) for node in input_nodes: input_tensors.append(graph.get_tensor_by_name(node.name + ":0")) G_LOGGER.verbose("Retrieved TensorFlow input_tensors: {:}".format(input_tensors)) return get_tensor_metadata(input_tensors)
def layerwise(model, include_data=False): nonlocal _layerwise_outputs, _layerwise_meta if _layerwise_outputs is None or _layerwise_meta is None: G_LOGGER.info( "Running inference with ONNX-Runtime to determine metadata for intermediate tensors.\n" "This will cause intermediate models to have static shapes." ) _layerwise_outputs, _layerwise_meta = self.arg_groups[OnnxShapeInferenceArgs].fallback_inference(model) return _layerwise_outputs if include_data else _layerwise_meta
def parse_profile_shapes(default_shapes, min_args, opt_args, max_args): """ Parses TensorRT profile options from command-line arguments. Args: default_shapes (TensorMetadata): The inference input shapes. Returns: List[Tuple[OrderedDict[str, Shape]]]: A list of profiles with each profile comprised of three dictionaries (min, opt, max) mapping input names to shapes. """ def get_shapes(lst, idx): nonlocal default_shapes default_shapes = copy.copy(default_shapes) if idx < len(lst): default_shapes.update(parse_meta(lst[idx], includes_dtype=False)) # Don't care about dtype, and need to override dynamic dimensions shapes = { name: misc.override_dynamic_shape(shape) for name, (_, shape) in default_shapes.items() } for name, shape in shapes.items(): if tuple(shapes[name]) != tuple(shape): G_LOGGER.warning( "Input tensor: {:} | For TensorRT profile, overriding shape: {:} to: {:}" .format(name, shape, shapes[name]), mode=LogMode.ONCE) return shapes num_profiles = max(len(min_args), len(opt_args), len(max_args)) # For cases where input shapes are provided, we have to generate a profile if not num_profiles and default_shapes: num_profiles = 1 profiles = [] for idx in range(num_profiles): min_shapes = get_shapes(min_args, idx) opt_shapes = get_shapes(opt_args, idx) max_shapes = get_shapes(max_args, idx) if sorted(min_shapes.keys()) != sorted(opt_shapes.keys()): G_LOGGER.critical( "Mismatch in input names between minimum shapes ({:}) and optimum shapes " "({:})".format(list(min_shapes.keys()), list(opt_shapes.keys()))) elif sorted(opt_shapes.keys()) != sorted(max_shapes.keys()): G_LOGGER.critical( "Mismatch in input names between optimum shapes ({:}) and maximum shapes " "({:})".format(list(opt_shapes.keys()), list(max_shapes.keys()))) profiles.append((min_shapes, opt_shapes, max_shapes)) return profiles
def __call__(self, builder, network): """ Creates a TensorRT IBuilderConfig that can be used by the EngineFromNetwork. Args: builder (trt.Builder): The TensorRT builder to use to create the configuration. network (trt.INetworkDefinition): The TensorRT network for which to create the config. The network is used to automatically create a default optimization profile if none are provided. Returns: trt.IBuilderConfig: The TensorRT builder configuration. """ with misc.FreeOnException([builder.create_builder_config() ]) as (config, ): calibration_profile = None for profile in self.profiles: calibration_profile = trt_util.build_profile( builder, network, profile) config.add_optimization_profile(calibration_profile) if not self.profiles: calibration_profile = trt_util.build_default_profile( builder, network) config.add_optimization_profile(calibration_profile) if self.profiles: G_LOGGER.info("Configuring with profiles: {:}".format( self.profiles)) config.max_workspace_size = int(self.max_workspace_size) if self.strict_types: config.set_flag(trt.BuilderFlag.STRICT_TYPES) if not self.tf32: with contextlib.suppress(AttributeError): config.clear_flag(trt.BuilderFlag.TF32) if self.fp16: config.set_flag(trt.BuilderFlag.FP16) if self.int8: config.set_flag(trt.BuilderFlag.INT8) if not network.has_explicit_precision: if self.calibrator is not None: input_metadata = trt_util.get_input_metadata_from_profile( calibration_profile, network) with contextlib.suppress(AttributeError): self.calibrator.reset(input_metadata) config.int8_calibrator = self.calibrator with contextlib.suppress(AttributeError): config.set_calibration_profile(calibration_profile) else: G_LOGGER.warning( "Network does not have explicit precision and no calibrator was provided. Please ensure " "that tensors in the network have dynamic ranges set, or provide a calibrator in order to use int8 mode." ) return config
def run(self, command): G_LOGGER.info("Running: {:} from cwd: {:}".format(command, self.path)) env = copy.copy(os.environ) env["PYTHONPATH"] = ROOT_DIR env["PATH"] = os.path.join(ROOT_DIR, "bin") + os.path.pathsep + env["PATH"] # Remove whitespace args and escaped newlines command = [arg for arg in command.strip().split(" ") if arg.strip() and arg != "\\\n"] status = sp.run(command, cwd=self.path, env=env, stdout=sp.PIPE, stderr=sp.PIPE, universal_newlines=True) assert status.returncode == 0, status.stdout + "\n" + status.stderr return status
def __call__(self): """ Builds a TensorRT engine. Returns: trt.ICudaEngine: The engine that was created. """ # If network is a callable, then we own its return value ret, owns_network = misc.try_call(self._network) builder, network, parser = misc.unpack_args(ret, num=3) with contextlib.ExitStack() as stack: if owns_network: stack.enter_context(builder) stack.enter_context(network) if parser is not None: stack.enter_context(parser) else: provided = "Builder and Network" if parser is None else "Builder, Network, and Parser" G_LOGGER.verbose( "{:} were provided directly instead of via a Callable. This loader will not assume ownership. " "Please ensure that they are freed.".format(provided)) config, owns_config = misc.try_call(self._config, builder, network) if owns_config: stack.enter_context(config) else: G_LOGGER.verbose( "Builder configuration was provided directly instead of via a Callable. This loader will not assume " "ownership. Please ensure it is freed.") network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs" G_LOGGER.super_verbose( lambda: ("Displaying TensorRT Network:\n" + trt_util. str_from_network(network, mode=network_log_mode))) G_LOGGER.info("Building engine with configuration: {:}".format( trt_util.str_from_config(config))) if misc.version(trt.__version__) < misc.version("7.3"): engine = builder.build_engine(network, config) else: engine = func.invoke( EngineFromBytes( builder.build_serialized_network(network, config))) if hasattr(config.int8_calibrator, "free"): # Must go before engine check to ensure calibrator is freed on failures too. config.int8_calibrator.free() if not engine: G_LOGGER.critical( "Invalid Engine. Please ensure the engine was built correctly" ) return engine