def export_tensorflow_model(model, name, output_path, version=1): """Exports a TensorFlow model for serving with Triton Parameters ---------- model: The tensorflow model that should be served name: The name of the triton model to export output_path: The path to write the exported model to """ tf_model_path = os.path.join(output_path, str(version), "model.savedmodel") model.save(tf_model_path) config = model_config.ModelConfig(name=name, backend="tensorflow", platform="tensorflow_savedmodel") for col in model.inputs: config.input.append( model_config.ModelInput(name=col.name, data_type=_convert_dtype(col.dtype), dims=[-1, 1])) for col in model.outputs: config.output.append( model_config.ModelOutput(name=col.name.split("/")[0], data_type=_convert_dtype(col.dtype), dims=[-1, 1])) with open(os.path.join(output_path, "config.pbtxt"), "w") as o: text_format.PrintMessage(config, o) return config
def _generate_pytorch_config(name, output_path, model_info, max_batch_size=None): """given a workflow generates the trton modelconfig proto object describing the inputs and outputs to that workflow""" config = model_config.ModelConfig(name=name, platform="onnxruntime_onnx", max_batch_size=max_batch_size) for col, val in model_info["input"].items(): config.input.append( model_config.ModelInput(name=col, data_type=_convert_dtype(val["dtype"]), dims=[-1, len(val["columns"])])) for col, val in model_info["output"].items(): if len(val["columns"]) == 1: dims = [-1] else: dims = [-1, len(val["columns"])] config.output.append( model_config.ModelOutput(name=col, data_type=_convert_dtype(val["dtype"]), dims=dims)) with open(os.path.join(output_path, "config.pbtxt"), "w") as o: text_format.PrintMessage(config, o) return config
def export_tensorflow_model(model, name, output_path, version=1): """Exports a TensorFlow model for serving with Triton Parameters ---------- model: The tensorflow model that should be served name: The name of the triton model to export output_path: The path to write the exported model to """ tf_model_path = os.path.join(output_path, str(version), "model.savedmodel") model.save(tf_model_path, include_optimizer=False) config = model_config.ModelConfig(name=name, backend="tensorflow", platform="tensorflow_savedmodel") inputs, outputs = model.inputs, model.outputs if not inputs or not outputs: signatures = getattr(model, "signatures", {}) or {} default_signature = signatures.get("serving_default") if not default_signature: # roundtrip saved model to disk to generate signature if it doesn't exist import tensorflow as tf reloaded = tf.keras.models.load_model(tf_model_path) default_signature = reloaded.signatures["serving_default"] inputs = list(default_signature.structured_input_signature[1].values()) outputs = list(default_signature.structured_outputs.values()) for col in inputs: config.input.append( model_config.ModelInput(name=col.name, data_type=_convert_dtype(col.dtype), dims=[-1, col.shape[1]])) for col in outputs: # this assumes the list columns are 1D tensors both for cats and conts config.output.append( model_config.ModelOutput( name=col.name.split("/")[0], data_type=_convert_dtype(col.dtype), dims=[-1, col.shape[1]], )) with open(os.path.join(output_path, "config.pbtxt"), "w") as o: text_format.PrintMessage(config, o) return config
def _generate_nvtabular_config(workflow, name, output_path, output_model=None, max_batch_size=None, cats=None, conts=None): """given a workflow generates the trton modelconfig proto object describing the inputs and outputs to that workflow""" config = model_config.ModelConfig(name=name, backend="python", max_batch_size=max_batch_size) if output_model == "hugectr": for column in workflow.column_group.input_column_names: dtype = workflow.input_dtypes[column] config.input.append( model_config.ModelInput(name=column, data_type=_convert_dtype(dtype), dims=[-1])) config.output.append( model_config.ModelOutput(name="DES", data_type=model_config.TYPE_FP32, dims=[-1])) config.output.append( model_config.ModelOutput(name="CATCOLUMN", data_type=model_config.TYPE_INT64, dims=[-1])) config.output.append( model_config.ModelOutput(name="ROWINDEX", data_type=model_config.TYPE_INT32, dims=[-1])) else: for column, dtype in workflow.input_dtypes.items(): _add_model_param(column, dtype, model_config.ModelInput, config.input) for column, dtype in workflow.output_dtypes.items(): _add_model_param(column, dtype, model_config.ModelOutput, config.output) with open(os.path.join(output_path, "config.pbtxt"), "w") as o: text_format.PrintMessage(config, o) return config
def _generate_model_config(workflow, name, output_path): """given a workflow generates the trton modelconfig proto object describing the inputs and outputs to that workflow""" config = model_config.ModelConfig(name=name, backend="python") for column in workflow.column_group.input_column_names: dtype = workflow.input_dtypes[column] config.input.append( model_config.ModelInput(name=column, data_type=_convert_dtype(dtype), dims=[-1])) for column, dtype in workflow.output_dtypes.items(): config.output.append( model_config.ModelOutput(name=column, data_type=_convert_dtype(dtype), dims=[-1])) with open(os.path.join(output_path, "config.pbtxt"), "w") as o: text_format.PrintMessage(config, o)
def _generate_tensorflow_config(model, name, output_path): """given a workflow generates the trton modelconfig proto object describing the inputs and outputs to that workflow""" config = model_config.ModelConfig(name=name, backend="tensorflow", platform="tensorflow_savedmodel") for col in model.inputs: config.input.append( model_config.ModelInput(name=col.name, data_type=_convert_dtype(col.dtype), dims=[-1, 1])) for col in model.outputs: config.output.append( model_config.ModelOutput(name=col.name.split("/")[0], data_type=_convert_dtype(col.dtype), dims=[-1, 1])) with open(os.path.join(output_path, "config.pbtxt"), "w") as o: text_format.PrintMessage(config, o) return config
def _generate_hugectr_config(name, output_path, hugectr_params, max_batch_size=None): config = model_config.ModelConfig(name=name, backend="hugectr", max_batch_size=max_batch_size) config.input.append( model_config.ModelInput(name="DES", data_type=model_config.TYPE_FP32, dims=[-1])) config.input.append( model_config.ModelInput(name="CATCOLUMN", data_type=model_config.TYPE_INT64, dims=[-1])) config.input.append( model_config.ModelInput(name="ROWINDEX", data_type=model_config.TYPE_INT32, dims=[-1])) for i in range(hugectr_params["n_outputs"]): config.output.append( model_config.ModelOutput(name="OUTPUT" + str(i), data_type=model_config.TYPE_FP32, dims=[-1])) config.instance_group.append( model_config.ModelInstanceGroup(gpus=[0], count=1, kind=1)) config_hugectr = model_config.ModelParameter( string_value=hugectr_params["config"]) config.parameters["config"].CopyFrom(config_hugectr) gpucache_val = hugectr_params.get("gpucache", "true") gpucache = model_config.ModelParameter(string_value=gpucache_val) config.parameters["gpucache"].CopyFrom(gpucache) gpucacheper_val = str(hugectr_params.get("gpucacheper_val", "0.5")) gpucacheper = model_config.ModelParameter(string_value=gpucacheper_val) config.parameters["gpucacheper"].CopyFrom(gpucacheper) label_dim = model_config.ModelParameter( string_value=str(hugectr_params["label_dim"])) config.parameters["label_dim"].CopyFrom(label_dim) slots = model_config.ModelParameter( string_value=str(hugectr_params["slots"])) config.parameters["slots"].CopyFrom(slots) des_feature_num = model_config.ModelParameter( string_value=str(hugectr_params["des_feature_num"])) config.parameters["des_feature_num"].CopyFrom(des_feature_num) cat_feature_num = model_config.ModelParameter( string_value=str(hugectr_params["cat_feature_num"])) config.parameters["cat_feature_num"].CopyFrom(cat_feature_num) max_nnz = model_config.ModelParameter( string_value=str(hugectr_params["max_nnz"])) config.parameters["max_nnz"].CopyFrom(max_nnz) embedding_vector_size = model_config.ModelParameter( string_value=str(hugectr_params["embedding_vector_size"])) config.parameters["embedding_vector_size"].CopyFrom(embedding_vector_size) embeddingkey_long_type_val = hugectr_params.get("embeddingkey_long_type", "true") embeddingkey_long_type = model_config.ModelParameter( string_value=embeddingkey_long_type_val) config.parameters["embeddingkey_long_type"].CopyFrom( embeddingkey_long_type) with open(os.path.join(output_path, "config.pbtxt"), "w") as o: text_format.PrintMessage(config, o) return config
def _generate_nvtabular_config( workflow, name, output_path, output_model=None, max_batch_size=None, cats=None, conts=None, output_info=None, backend="python", ): """given a workflow generates the trton modelconfig proto object describing the inputs and outputs to that workflow""" config = model_config.ModelConfig(name=name, backend=backend, max_batch_size=max_batch_size) config.parameters[ "python_module"].string_value = "nvtabular.inference.triton.model" config.parameters[ "output_model"].string_value = output_model if output_model else "" if output_model == "hugectr": config.instance_group.append(model_config.ModelInstanceGroup(kind=2)) for column in workflow.column_group.input_column_names: dtype = workflow.input_dtypes[column] config.input.append( model_config.ModelInput(name=column, data_type=_convert_dtype(dtype), dims=[-1])) config.output.append( model_config.ModelOutput(name="DES", data_type=model_config.TYPE_FP32, dims=[-1])) config.output.append( model_config.ModelOutput(name="CATCOLUMN", data_type=model_config.TYPE_INT64, dims=[-1])) config.output.append( model_config.ModelOutput(name="ROWINDEX", data_type=model_config.TYPE_INT32, dims=[-1])) elif output_model == "pytorch": for column, dtype in workflow.input_dtypes.items(): _add_model_param(column, dtype, model_config.ModelInput, config.input) for col, val in output_info.items(): _add_model_param( col, val["dtype"], model_config.ModelOutput, config.output, [-1, len(val["columns"])], ) else: for column, dtype in workflow.input_dtypes.items(): _add_model_param(column, dtype, model_config.ModelInput, config.input) for column, dtype in workflow.output_dtypes.items(): _add_model_param(column, dtype, model_config.ModelOutput, config.output) with open(os.path.join(output_path, "config.pbtxt"), "w") as o: text_format.PrintMessage(config, o) return config