def convert(model_dir: Path, spec: ModelSpec): uff_temp_file = str(model_dir / "temp.uff") trt_logger = Logger(Logger.INFO) init_libnvinfer_plugins(trt_logger, "") dynamic_graph = add_plugin( DynamicGraph(str(model_dir / "frozen_inference_graph.pb")), spec) uff.from_tensorflow( dynamic_graph.as_graph_def(), output_nodes=["NMS"], output_filename=uff_temp_file, text=True, debug_mode=False, ) with Builder(trt_logger) as builder, builder.create_network( ) as network, UffParser() as parser: builder.max_workspace_size = 1 << 32 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input("Input", spec.input_dim) parser.register_output("MarkOutput_0") parser.parse(uff_temp_file, network) engine = builder.build_cuda_engine(network) (model_dir / "trt_model.bin").write_bytes(engine.serialize())
def main(args): """ Loads a TensorFlow Frozen Graph (.pb), a Keras Model (.hdf5) or a Saved Model folder (loads, removes training nodes, optimizes for inference, converts to and saves TensorRT model). Arguments: args: the parsed command line arguments """ # load the model from args.input_model_path into graph_def if args.input_model_path.endswith(".pb"): graph_def = tf.GraphDef() with open(args.input_model_path, 'rb') as f: graph_def.ParseFromString(f.read()) elif args.input_model_path.endswith(".hdf5") or args.input_model_path.endswith(".h5"): K.backend.set_learning_phase(0) model = K.models.load_model(SAVED_MODEL_FP) session = K.backend.get_session() graph_def = session.graph.as_graph_def() else: with tf.Session(graph=tf.Graph()) as session: tf.saved_model.loader.load( session, [tf.saved_model.tag_constants.SERVING], args.input_model_path, strip_default_attrs=True) graph_def = session.graph.as_graph_def() if not args.out: args.out = os.path.splitext(args.input_model_path)[0] + ".uff" # attempt to deduce input nodes input_node_names, output_node_names = args.input_node_name, args.output_node_name if not args.input_node_name: print("No input node names provided, assuming (input.*) search pattern in the graph...") input_node_names = [n.name for n in graph_def.node if n.name.startswith("input")] if not args.output_node_name: print("No output node names provided, assuming (output.*) search pattern in the graph...") output_node_names = [n.name for n in graph_def.node if n.name.startswith("output")] print("Using %s as input nodes, %s as output nodes." % (input_node_names, output_node_names)) if not input_node_names or not output_node_names: sys.exit('Failed to identify input or output nodes in the graph. Exiting.') # attempt to optimize graph for inference (rm placeholder, etc) graph_def = optimize_for_inference_lib.optimize_for_inference( graph_def, input_node_names, output_node_names, tf.float32.as_datatype_enum) # convert variables to constants with tf.Session() as session: graph_def = tf.graph_util.convert_variables_to_constants(session, graph_def, output_node_names) # Convert inference graph to UFF uff.from_tensorflow(graph_def, output_node_names, output_filename=args.out)
def model_to_uff(model_path): # Transform graph using graphsurgeon to map unsupported TensorFlow # operations to appropriate TensorRT custom layer plugins dynamic_graph = gs.DynamicGraph(model_path) dynamic_graph.collapse_namespaces(prepare_namespace_plugin_map()) # Save resulting graph to UFF file output_uff_path = model_path_to_uff_path(model_path) uff.from_tensorflow(dynamic_graph.as_graph_def(), [ModelData.OUTPUT_NAME], output_filename=output_uff_path, text=True) return output_uff_path
def model_to_uff(model_path, uff_model_path): print("model_path:", model_path) dynamic_graph = gs.DynamicGraph(model_path) dynamic_graph = ModelParser.convert_unsupported_nodes_to_plugins(dynamic_graph) if os.path.exists(uff_model_path) is False: uff.from_tensorflow( dynamic_graph.as_graph_def(), [DetectionModel.output_name], output_filename=uff_model_path, text=True )
def model_to_uff(model_path, output_uff_path, silent=False): """Takes frozen .pb graph, converts it to .uff and saves it to file. Args: model_path (str): .pb model path output_uff_path (str): .uff path where the UFF file will be saved silent (bool): if True, writes progress messages to stdout """ dynamic_graph = gs.DynamicGraph(model_path) dynamic_graph = ssd_unsupported_nodes_to_plugin_nodes(dynamic_graph) uff.from_tensorflow(dynamic_graph.as_graph_def(), [ModelData.OUTPUT_NAME], output_filename=output_uff_path, text=True)
def main(): path = os.path.dirname(os.path.realpath(__file__)) tf_model = lenet5.learn() uff_model = uff.from_tensorflow(tf_model, ["fc2/Relu"]) #Convert Tensorflow model to TensorRT model parser = uffparser.create_uff_parser() parser.register_input("Placeholder", (1, 28, 28), 0) parser.register_output("fc2/Relu") engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, MAX_BATCHSIZE, MAX_WORKSPACE) assert (engine) # parser.destroy() context = engine.create_execution_context() print("\n| TEST CASE | PREDICTION |") for i in range(ITERATIONS): img, label = lenet5.get_testcase() img = img[0] label = label[0] out = infer(context, img, 1) print("|-----------|------------|") print("| " + str(label) + " | " + str(np.argmax(out)) + " |")
def convert_uff_from_tensorflow(sess, graph_def, model_output, dest_path='/tmp', dest_name='converted.uff'): """Convert Session GraphDef from TensorFlow to UFF format model This function is for converting directly from TensorFlow's Session GraphDef and Session object. The session should be built with graph and the variables are also restored from check-point files already. Args: sess: Session object graph: GraphDef object is from either pbtxt file or Python's model source code dest_path: The destination path of the output UFF file dest_name: The name of the UFF file Returns: None """ frozen_graph = tf.graph_util.convert_variables_to_constants( sess, graph_def, model_output) frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph) #Create UFF model and dump it on disk uff_model = uff.from_tensorflow(frozen_graph, model_output) dump = open(os.path.join(dest_path, dest_name), 'wb') dump.write(uff_model) dump.close()
def mk_TensorRT_engine(self): #モデルがない場合学習をさせる if not tf.train.get_checkpoint_state(os.path.join(save_dir, "model.ckpt")): self.fit() #学習済みモデルを読み込む with tf.Session() as sess: saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, "save/model.ckpt") graph_def = sess.graph_def() frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph_def, ["inference/softmax"]) tf_model _ tf.graph_util.remove_training_nodes(frozen_graph) # Tensorflowのモデル形式からUFFへ変換 uff_model = uff.from_tensorflow(tf_model, ["inference/softmax"]) # TensorRT EngineのためのUFF Streamを作る G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) # uff parserを作り,モデルの入出力に関する情報を加える parser = uffparser.create_uff_parser() # (channel, im_size, im_size) parser.register_input("Placeholder", (1,28,28), 0) parser.register_output("inference/softmax") # utility関数を用いてエンジンを作る(最後の引数はmax batch size と max workspace size) engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, MAX_BATCH_SIZE, MAX_WORKSPACE_SIZE) parser.destroy() return engine
def main(): TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = 'ssd_mobilenet_v2_coco' spec = MODEL_SPECS[model] if not os.path.exists(spec['tmp_uff']): dynamic_graph = add_plugin(gs.DynamicGraph(spec['input_pb']), spec) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=spec['tmp_uff'], text=True, debug_mode=DEBUG_UFF) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', INPUT_DIMS) parser.register_output('MarkOutput_0') parser.parse(spec['tmp_uff'], network) print("Building Tensorrt engine. This may take a few minutes.") engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(spec['output_bin'], 'wb') as f: f.write(buf) print("Save engine.")
def __call__(self): """ save_uff (bool): Whether to write the generated UFF and corresponding PBTXT files. """ from polygraphy.backend.tf import util as tf_util import uff misc.log_module_info(uff) graph, output_names = self.tf_loader() output_names = [name.split(":")[0] for name in output_names] # GraphDefs don't have names, so we have to name it something generic. output_filename = None if not self.uff_path else "out.uff" # Generate the UFF model and get information about the input_buffers/output_buffers. uff_model, input_nodes, _ = uff.from_tensorflow( graph.as_graph_def(), return_graph_info=True, quiet=(G_LOGGER.severity > G_LOGGER.VERBOSE), debug_mode=(G_LOGGER.severity == G_LOGGER.EXTRA_VERBOSE), text=self.uff_path, save_preprocessed=self.uff_path, output_filename=output_filename, preprocessor=self.preprocessor) input_names = [node.name for node in input_nodes] input_shapes = [ tuple(int(dim.size) for dim in node.attr["shape"].shape.dim) for node in input_nodes ] return uff_model, input_names, input_shapes, output_names
def create_trt_model_bin(): ctypes.CDLL(LIB_FLATTEN_PATH) # initialize trt_logger = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(trt_logger, '') # compile model into TensorRT if not os.path.isfile(MODEL_TRT_BIN_PATH): dynamic_graph = model.add_plugin(gs.DynamicGraph(MODEL_PATH)) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.output_name, output_filename='tmp.uff') with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', model.dims) parser.register_output('MarkOutput_0') parser.parse('tmp.uff', network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(MODEL_TRT_BIN_PATH, 'wb') as f: f.write(buf)
def main(): parser = argparse.ArgumentParser() parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys())) args = parser.parse_args() # initialize if trt.__version__[0] < '7': ctypes.CDLL(LIB_FILE) TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = args.model spec = MODEL_SPECS[model] dynamic_graph = add_plugin(gs.DynamicGraph(spec['input_pb']), model, spec) _ = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=spec['tmp_uff'], text=True, debug_mode=DEBUG_UFF) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', INPUT_DIMS) parser.register_output('MarkOutput_0') parser.parse(spec['tmp_uff'], network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(spec['output_bin'], 'wb') as f: f.write(buf)
def export_trt(pb_file, output_dir, num_classes=90, neuralet_adaptive_model=1): """ Exports the Tensorflow pb models to TensorRT engines. Args: pb_file: The path of input pb file output_dir: A directory to store the output files num_classes: Detector's number of classes """ lib_flatten_concat_file = "exporters/libflattenconcat.so.6" # initialize if trt.__version__[0] < '7': ctypes.CDLL(lib_flatten_concat_file) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = "ssd_mobilenet_v2_coco" if not os.path.isfile(pb_file): raise FileNotFoundError( 'model does not exist under: {}'.format(pb_file)) if not os.path.isdir(output_dir): print("the provided output directory : {0} is not exist".format( output_dir)) print("creating output directory : {0}".format(output_dir)) os.makedirs(output_dir, exist_ok=True) dynamic_graph = plugin.add_plugin_and_preprocess(gs.DynamicGraph(pb_file), model, num_classes, neuralet_adaptive_model) model_file_name = ".".join((pb_file.split("/")[-1]).split(".")[:-1]) uff_path = os.path.join(output_dir, model_file_name + ".uff") _ = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=uff_path, text=True, debug_mode=False) input_dims = (3, 300, 300) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, builder.create_builder_config( ) as builder_config, trt.UffParser() as parser: builder_config.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder_config.set_flag(trt.BuilderFlag.FP16) parser.register_input('Input', input_dims) parser.register_output('MarkOutput_0') parser.parse(uff_path, network) engine = builder.build_engine(network, builder_config) buf = engine.serialize() engine_path = os.path.join(output_dir, model_file_name + ".bin") with open(engine_path, 'wb') as f: f.write(buf) print( "your model has been converted to trt engine successfully under : {}" .format(engine_path))
def main(): args = parse_args() height, width, channel = 368, 432, 3 images = [] for name in args.images.split(','): x = read_imgfile( name, width, height, 'channels_first') # channels_first is required for tensorRT images.append(x) model_func = _get_model_func(args.base_model) model_inputs, model_outputs = model_func() input_names = [p.name[:-2] for p in model_inputs] output_names = [p.name[:-2] for p in model_outputs] print('input names: %s' % ','.join(input_names)) print('output names: %s' % ','.join(output_names)) # outputs/conf,outputs/paf # with tf.Session() as sess: sess = tf.InteractiveSession() measure(lambda: tl.files.load_and_assign_npz_dict(args.path_to_npz, sess), 'load npz') frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_names) tf_model = tf.graph_util.remove_training_nodes(frozen_graph) uff_model = measure(lambda: uff.from_tensorflow(tf_model, output_names), 'uff.from_tensorflow') print('uff model created') parser = uffparser.create_uff_parser() inputOrder = 0 # NCHW, https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/_nv_uff_parser_8h_source.html parser.register_input(input_names[0], (channel, height, width), inputOrder) for name in output_names: parser.register_output(name) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO) max_batch_size = 1 max_workspace_size = 1 << 30 engine = measure( lambda: trt.utils.uff_to_trt_engine( G_LOGGER, uff_model, parser, max_batch_size, max_workspace_size), 'trt.utils.uff_to_trt_engine') print('engine created') f_height, f_width = (height / 8, width / 8 ) # TODO: derive from model_outputs post_process = PostProcessor((height, width), (f_height, f_width), 'channels_first') for idx, x in enumerate(images): conf, paf = measure(lambda: infer(engine, x, 1), 'infer') humans, heat_up, paf_up = measure(lambda: post_process(conf, paf), 'post_process') print('got %d humans' % (len(humans))) plot_humans(x.transpose([1, 2, 0]), heat_up, paf_up, humans, '%02d' % (idx + 1))
def model_to_uff(model_path, output_uff_path, silent=False): """Takes frozen .pb graph, converts it to .uff and saves it to file. Args: model_path (str): .pb model path output_uff_path (str): .uff path where the UFF file will be saved silent (bool): if False, writes progress messages to stdout """ #获取相应的动态图 #DynamicGraph可以搜索和修改一个tensorflow GraphDef dynamic_graph = gs.DynamicGraph(model_path) #ssd_unsupported_nodes_to_plugin_nodes参考本文件下的实现 #修改相应的计算图,用自定义插件代替tensorrt中不支持的图层 dynamic_graph = ssd_unsupported_nodes_to_plugin_nodes(dynamic_graph) #完成相应的转换 uff.from_tensorflow(dynamic_graph.as_graph_def(), [ModelData.OUTPUT_NAME], output_filename=output_uff_path, text=True)
def optimize_pb_graph(graph_def, output_nodes, output_name, sess): """ :param graph_def: :param output_nodes: :param output_name: name of output file with .uff extension :param sess: :return: written file """ name = output_name.split(".")[0] output_name = "%s.uff" % name uff.from_tensorflow( graph_def, output_nodes, output_filename=output_name, # text=True, # list_nodes=True, # write_preprocessed=True, )
def convert_to_tensorrt(args, input_dims, graph_chars=None): TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') input_dims_corrected = (input_dims[3], input_dims[1], input_dims[2]) graph = add_plugin(gs.DynamicGraph(args.input), input_dims_corrected, graph_chars=graph_chars) print(graph.find_nodes_by_name("image_tensor")) try: uff.from_tensorflow(graph.as_graph_def(), output_nodes=['NMS'], output_filename=(args.output_dir + ".uff"), text=args.debug, write_preprocessed=args.debug, debug_mode=args.debug) except TypeError as e: if e.__str__() == "Cannot convert value 0 to a TensorFlow DType.": raise EnvironmentError( "Please modify your graphsurgeon package according to the following:\n" "https://github.com/AastaNV/TRT_object_detection#update-graphsurgeon-converter" ) if args.no_cuda: exit(0) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', input_dims_corrected) parser.register_output('MarkOutput_0') parser.parse(args.output_dir + ".uff", network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(args.output_dir + '_tensorrt.bin', 'wb') as f: f.write(buf)
def _main_(args): weights_path = args.weights output_fname = args.output uff_fname = output_fname + '.uff' # output_frozen_pb_fpath = os.path.join('frozen_pb', frozen_graph_fname) output_uff_fpath = os.path.join('TensorRT/uff', uff_fname) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4, allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) K.set_session(sess) K.set_learning_phase(0) network_input_shp = (config.NETWORK_INPUT_H, config.NETWORK_INPUT_W, config.NETWORK_INPUT_C) train_model, infer_model = create_model(input_shape=network_input_shp, lr=1e-4) print_summary(train_model) if weights_path: train_model.load_weights(weights_path) model_inputs = [train_model.input.name.split(':')[0]] model_outputs = [train_model.output.name.split(':')[0]] print(model_inputs) print(model_outputs) with K.get_session() as sess: graphdef = sess.graph.as_graph_def() dirpath = os.path.join('logs', 'laneseg_graph') shutil.rmtree(dirpath, ignore_errors=True) makedirs(dirpath) writer = tf.summary.FileWriter(dirpath, sess.graph) writer.close() frozen_graph = tf.graph_util.convert_variables_to_constants( sess, graphdef, model_outputs) frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph) # frozen_graph_filename = output_frozen_pb_fpath # with open(frozen_graph_filename, 'wb') as f: # f.write(frozen_graph.SerializeToString()) # f.close() uff_model = uff.from_tensorflow(frozen_graph, model_outputs, output_filename=output_uff_fpath)
def main(): config = configparser.ConfigParser() parser = argparse.ArgumentParser() parser.add_argument('--config', required=True) args = parser.parse_args() config.read(args.config) lib_flatten_concat_file = config['LIBFLATTENCONCAT']['Path'] # initialize if trt.__version__[0] < '7': ctypes.CDLL(lib_flatten_concat_file) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = config['MODEL']['Name'] model_path = config['MODEL']['Input'] url = config['MODEL']['DownloadPath'] if not os.path.isfile(model_path): print('model does not exist under: ', model_path, 'downloading from ', url) wget.download(url, model_path) dynamic_graph = plugin.add_plugin_and_preprocess( gs.DynamicGraph(config['MODEL']['Input']), model, config) _ = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=config['MODEL']['TmpUff'], text=True, debug_mode=False) input_dims = tuple( [int(x) for x in config['MODEL']['InputDims'].split(',')]) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', input_dims) parser.register_output('MarkOutput_0') parser.parse(config['MODEL']['TmpUff'], network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(config['MODEL']['OutputBin'], 'wb') as f: f.write(buf)
def __init__(self, file_path, input_shape): # Stop TF from occupying the whole GPU for nothing. sess = tf.Session(config=tf.ConfigProto(device_count={'GPU': 0})) K.set_session(sess) K.set_learning_phase(0) model = load_model(file_path, compile=False) K.set_learning_phase(0) output_name = model.output.op.name input_name = model.input.op.name frozen_graph = tf.graph_util.remove_training_nodes( tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), [output_name])) # Convert Tensorflow frozen graph to UFF file uff_buffer = uff.from_tensorflow(frozen_graph, output_file=file_path.replace('.h5', '.uff')) super().__init__(file_path, input_name, input_shape, output_name, uff_buffer)
def __init__(self, model, batch_size): # get Tensorflow graph object from Keras with K.get_session() as sess: image_batch_t = tf.placeholder(tf.float32, shape=(None, 1, 28, 28), name='image_tensor') K.set_learning_phase(0) conf_t = model(image_batch_t) output_names = [conf_t.name[:-2]] graphdef = sess.graph.as_graph_def() frozen_graph = tf.graph_util.convert_variables_to_constants( sess, graphdef, output_names) frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph) # convert TensorRT UFF object uff_model = uff.from_tensorflow(frozen_graph, output_names) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) parser = uffparser.create_uff_parser() input_shape = (1, 28, 28) parser.register_input("image_tensor", input_shape, 0) parser.register_output(output_names[0]) # create TensorRT inference engine engine = trt.utils.uff_to_trt_engine(G_LOGGER, stream=uff_model, parser=parser, max_batch_size=batch_size, max_workspace_size=1 << 25) # datatype='FP32') parser.destroy() # allocate needed device buffers dims = engine.get_binding_dimensions(0).to_DimsCHW() nbytes = batch_size * dims.C() * dims.H() * dims.W() * np.dtype( np.float32).itemsize self.d_src = cuda.mem_alloc(nbytes) dims = engine.get_binding_dimensions(1).to_DimsCHW() nbytes = batch_size * dims.C() * dims.H() * dims.W() * np.dtype( np.float32).itemsize self.d_dst = cuda.mem_alloc(nbytes) self.engine = engine self.ctx = engine.create_execution_context() self.batch_size = batch_size
def prepare_model(model=InceptionV2, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, calib_dataset=Path(__file__).parent / 'VOCdevkit' / 'VOC2007' / 'JPEGImages'): import uff from . import calibrator if not model.PATH.exists(): # initialize TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') runtime = trt.Runtime(TRT_LOGGER) # compile model into TensorRT dynamic_graph = gs.DynamicGraph(str(model.TF_PATH)) dynamic_graph = model.add_plugin(dynamic_graph) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.OUTPUT_NAME, output_filename='tmp.uff') with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 30 builder.max_batch_size = batch_size if trt_engine_datatype == trt.DataType.HALF: builder.fp16_mode = True elif trt_engine_datatype == trt.DataType.INT8: # TODO: download data if it doesn't exist # TODO: use DLA builder.fp16_mode = True builder.int8_mode = True builder.int8_calibrator = calibrator.SSDEntropyCalibrator( data_dir=calib_dataset, cache_file=Path(__file__).parent / 'INT8CacheFile') parser.register_input('Input', model.INPUT_SHAPE) parser.register_output('MarkOutput_0') parser.parse('tmp.uff', network) engine = builder.build_cuda_engine(network) # save engine buf = engine.serialize() with open(model.PATH, 'wb') as f: f.write(buf) Path('tmp.uff').unlink()
def build_engine(cls, trt_logger, batch_size, calib_dataset=Path.home() / 'VOCdevkit' / 'VOC2007' / 'JPEGImages'): import graphsurgeon as gs import uff from . import calibrator # compile model into TensorRT dynamic_graph = gs.DynamicGraph(str(cls.MODEL_PATH)) dynamic_graph = cls.add_plugin(dynamic_graph) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), [cls.OUTPUT_NAME], quiet=True) with trt.Builder(trt_logger) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 30 builder.max_batch_size = batch_size logging.info('Building engine with batch size: %d', batch_size) logging.info('This may take a while...') if builder.platform_has_fast_fp16: builder.fp16_mode = True if builder.platform_has_fast_int8: builder.int8_mode = True builder.int8_calibrator = calibrator.SSDEntropyCalibrator( cls.INPUT_SHAPE, data_dir=calib_dataset, cache_file=Path(__file__).parent / f'{cls.__name__}_calib_cache') parser.register_input('Input', cls.INPUT_SHAPE) parser.register_output('MarkOutput_0') parser.parse_buffer(uff_model, network) engine = builder.build_cuda_engine(network) if engine is None: return None logging.info("Completed creating Engine") with open(cls.ENGINE_PATH, 'wb') as engine_file: engine_file.write(engine.serialize()) return engine
def convert_keras_to_uff_model(model, uff_model_path): # have to make BatchNorm layers untrainable since they're not yet supported by tensorrt for entry in model.layers: if 'bn' in entry: entry.trainable = False model_input_name = model.input.name.strip(':0') model_output_name = model.output.name.strip(':0') input_size = model.input.shape print(input_size) graph = tf.get_default_graph().as_graph_def() init = tf.global_variables_initializer() sess = K.get_session() sess.run(init) frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph, [model_output_name]) frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph) uff_model = uff.from_tensorflow(frozen_graph, [model_output_name]) with open(uff_model_path, 'wb') as dump: dump.write(uff_model) return model_input_name, model_output_name
def convert_to_uff(model, frozen_filename, uff_filename): # First freeze the graph and remove training nodes. output_names = model.output.op.name # output_names = "dense_2/MatMul" sess = get_session() frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), [output_names]) frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph) # Save the model with open(frozen_filename, "wb") as fptr: fptr.write(frozen_graph.SerializeToString()) tf.io.write_graph( sess.graph_def, '/home/codesteller/workspace/ml_workspace/trt_ws/trt-custom-plugin/saved_model/frozen_model', 'train.pbtxt', as_text=True) print_graphdef( tf.get_default_graph().as_graph_def(), '/home/codesteller/workspace/ml_workspace/trt_ws/' 'trt-custom-plugin/saved_model/frozen_model/train.txt') # Transform graph using graphsurgeon to map unsupported TensorFlow # operations to appropriate TensorRT custom layer plugins dynamic_graph = gs.DynamicGraph(frozen_graph) create_plugin_node(dynamic_graph) print_dynamic_graph( dynamic_graph, filename= '/home/codesteller/workspace/ml_workspace/trt_ws/trt-custom-plugin/' 'saved_model/frozen_model/final_node_graph.txt') uff_model = uff.from_tensorflow(dynamic_graph, [output_names]) with open(uff_filename, "wb") as fptr: fptr.write(uff_model)
def convert(self): dynamic_graph = self.add_plugin(gs.DynamicGraph(self.spec['input_pb']), self.spec) _ = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=self.spec['tmp_uff'], text=True, debug_mode=DEBUG_UFF) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', self.inputDims) parser.register_output('MarkOutput_0') parser.parse(self.spec['tmp_uff'], network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(self.spec['output_bin'], 'wb') as f: f.write(buf)
labels_placeholder, data_sets.validation, summary) test_writer.add_summary(log, step) graphdef = tf.get_default_graph().as_graph_def() frozen_graph = tf.graph_util.convert_variables_to_constants( sess, graphdef, OUTPUT_NAMES) return tf.graph_util.remove_training_nodes(frozen_graph) # MNIST_DATASETS = input_data.read_data_sets('/tmp/tensorflow/mnist/input_data') tf_model = run_training(MNIST_DATASETS) tf.train.write_graph(tf_model, './', 'model.pb', as_text=False) # print(type(tf_model)) uff_model = uff.from_tensorflow(tf_model, ["fc2/Relu"]) print(type(uff_model)) # G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) parser = uffparser.create_uff_parser() parser.register_input("Placeholder", (1, 28, 28), 0) parser.register_output("fc2/Relu") engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1, 1 << 20) #host_mem = parser.hidden_plugin_memory() parser.destroy() # input data img, label = MNIST_DATASETS.test.next_batch(1) img = img[0] #convert input data to Float32 img = img.astype(np.float32) label = label[0]
checkpoint_file = os.path.join("/tmp/tensorflow/mnist/log", "model.ckpt") saver.save(sess, checkpoint_file, global_step=step) print('Validation Data Eval:') log = do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation, summary) test_writer.add_summary(log, step) #return sess graphdef = tf.get_default_graph().as_graph_def() frozen_graph = tf.graph_util.convert_variables_to_constants( sess, graphdef, OUTPUT_NAMES) return tf.graph_util.remove_training_nodes(frozen_graph) def learn(): return run_training(MNIST_DATASETS) def get_testcase(): return MNIST_DATASETS.test.next_batch(1) if __name__ == "__main__": frozen_graph = run_training(MNIST_DATASETS) uff.from_tensorflow(graphdef=frozen_graph, output_filename=UFF_OUTPUT_FILENAME, output_nodes=OUTPUT_NAMES, text=True)
def save_uff(sess, names, filename): import uff frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, names) tf_model = tf.graph_util.remove_training_nodes(frozen_graph) uff.from_tensorflow(tf_model, names, output_filename=filename)
def ssd_pipeline_to_uff(checkpoint_path, config_path, tmp_dir='exported_model'): import graphsurgeon as gs from object_detection import exporter import tensorflow as tf import uff # TODO(@jwelsh): Implement by extending model builders with # TensorRT plugin stubs. Currently, this method uses pattern # matching which is a bit hacky and subject to fail when TF # object detection API exporter changes. We should add object # detection as submodule to avoid versioning incompatibilities. config = _load_config(config_path) frozen_graph_path = os.path.join(tmp_dir, FROZEN_GRAPH_NAME) # get input shape channels = 3 height = config.model.ssd.image_resizer.fixed_shape_resizer.height width = config.model.ssd.image_resizer.fixed_shape_resizer.width tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True # export checkpoint and config to frozen graph with tf.Session(config=tf_config) as tf_sess: with tf.Graph().as_default() as tf_graph: subprocess.call(['mkdir', '-p', tmp_dir]) exporter.export_inference_graph('image_tensor', config, checkpoint_path, tmp_dir, input_shape=[1, None, None, 3]) dynamic_graph = gs.DynamicGraph(frozen_graph_path) # remove all assert nodes #all_assert_nodes = dynamic_graph.find_nodes_by_op("Assert") #dynamic_graph.remove(all_assert_nodes, remove_exclusive_dependencies=True) # forward all identity nodes all_identity_nodes = dynamic_graph.find_nodes_by_op("Identity") dynamic_graph.forward_inputs(all_identity_nodes) # create input plugin input_plugin = gs.create_plugin_node(name=TRT_INPUT_NAME, op="Placeholder", dtype=tf.float32, shape=[1, height, width, channels]) # create anchor box generator anchor_generator_config = config.model.ssd.anchor_generator.ssd_anchor_generator box_coder_config = config.model.ssd.box_coder.faster_rcnn_box_coder priorbox_plugin = gs.create_plugin_node( name="priorbox", op="GridAnchor_TRT", minSize=anchor_generator_config.min_scale, maxSize=anchor_generator_config.max_scale, aspectRatios=list(anchor_generator_config.aspect_ratios), variance=[ 1.0 / box_coder_config.y_scale, 1.0 / box_coder_config.x_scale, 1.0 / box_coder_config.height_scale, 1.0 / box_coder_config.width_scale ], featureMapShapes=_get_feature_map_shape(config), numLayers=config.model.ssd.anchor_generator.ssd_anchor_generator. num_layers) # create nms plugin nms_config = config.model.ssd.post_processing.batch_non_max_suppression nms_plugin = gs.create_plugin_node( name=TRT_OUTPUT_NAME, op="NMS_TRT", shareLocation=1, varianceEncodedInTarget=0, backgroundLabelId=0, confidenceThreshold=nms_config.score_threshold, nmsThreshold=nms_config.iou_threshold, topK=nms_config.max_detections_per_class, keepTopK=nms_config.max_total_detections, numClasses=config.model.ssd.num_classes + 1, # add background inputOrder=[1, 2, 0], confSigmoid=1, isNormalized=1, scoreConverter="SIGMOID", codeType=3) priorbox_concat_plugin = gs.create_node("priorbox_concat", op="ConcatV2", dtype=tf.float32, axis=2) boxloc_concat_plugin = gs.create_plugin_node( "boxloc_concat", op="FlattenConcat_TRT_jetbot", dtype=tf.float32, ) boxconf_concat_plugin = gs.create_plugin_node( "boxconf_concat", op="FlattenConcat_TRT_jetbot", dtype=tf.float32, ) namespace_plugin_map = { "MultipleGridAnchorGenerator": priorbox_plugin, "Postprocessor": nms_plugin, "Preprocessor": input_plugin, "ToFloat": input_plugin, "image_tensor": input_plugin, "Concatenate": priorbox_concat_plugin, "concat": boxloc_concat_plugin, "concat_1": boxconf_concat_plugin } dynamic_graph.collapse_namespaces(namespace_plugin_map) # fix name for i, name in enumerate( dynamic_graph.find_nodes_by_op('NMS_TRT')[0].input): if TRT_INPUT_NAME in name: dynamic_graph.find_nodes_by_op('NMS_TRT')[0].input.pop(i) dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False) uff_buffer = uff.from_tensorflow(dynamic_graph.as_graph_def(), [TRT_OUTPUT_NAME]) return uff_buffer