def mlir_quantize(input_data_str, disable_per_channel=False, fully_quantize=False, inference_type=_types_pb2.INT8, enable_numeric_verify=False): """Quantize `input_data_str` with calibration results. Args: input_data_str: Input data in serialized form (e.g. a TFLITE model with calibration results). disable_per_channel: Bool indicating whether to do per-channel or per-tensor quantization fully_quantize: Bool indicating whether to fully quantize the model. Besides model body, the input/output will be quantized as well. inference_type: Data type for the activations. The default value is int8. enable_numeric_verify: Experimental. Subject to change. Bool indicating whether to add NumericVerify ops into the debug mode quantized model. Returns: Quantized model in serialized form (e.g. a TFLITE model) with floating-point inputs and outputs. """ return wrap_toco.wrapped_experimental_mlir_quantize( input_data_str, disable_per_channel, fully_quantize, inference_type, enable_numeric_verify)
def mlir_quantize(input_data_str): """Quantize `input_data_str` with calibration results. Args: input_data_str: Input data in serialized form (e.g. a TFLITE model with calibration results). Returns: Quantized model in serialized form (e.g. a TFLITE model) with floating-point inputs and outputs. """ return wrap_toco.wrapped_experimental_mlir_quantize(input_data_str)
def mlir_quantize(input_data_str, disable_per_channel=False): """Quantize `input_data_str` with calibration results. Args: input_data_str: Input data in serialized form (e.g. a TFLITE model with calibration results). disable_per_channel: Bool indicating whether to do per-channel or per-tensor quantization Returns: Quantized model in serialized form (e.g. a TFLITE model) with floating-point inputs and outputs. """ return wrap_toco.wrapped_experimental_mlir_quantize(input_data_str, disable_per_channel)
def mlir_quantize(input_data_str, disable_per_channel=False, inference_type=_types_pb2.INT8): """Quantize `input_data_str` with calibration results. Args: input_data_str: Input data in serialized form (e.g. a TFLITE model with calibration results). disable_per_channel: Bool indicating whether to do per-channel or per-tensor quantization inference_type: Data type for the activations. The default value is int8. Returns: Quantized model in serialized form (e.g. a TFLITE model) with floating-point inputs and outputs. """ return wrap_toco.wrapped_experimental_mlir_quantize( input_data_str, disable_per_channel, inference_type)
def mlir_quantize(input_data_str, disable_per_channel=False, fully_quantize=False, inference_type=_types_pb2.QUANTIZED_INT8, input_data_type=dtypes.float32, output_data_type=dtypes.float32, enable_numeric_verify=False, enable_whole_model_verify=False, denylisted_ops=None, denylisted_nodes=None): """Quantize `input_data_str` with calibration results. Args: input_data_str: Input data in serialized form (e.g. a TFLITE model with calibration results). disable_per_channel: Bool indicating whether to do per-channel or per-tensor quantization fully_quantize: Bool indicating whether to fully quantize the model. Besides model body, the input/output will be quantized as well. inference_type: Data type for the activations. The default value is int8. input_data_type: Data type for the inputs. The default value is float32. output_data_type: Data type for the outputs. The default value is float32. enable_numeric_verify: Experimental. Subject to change. Bool indicating whether to add NumericVerify ops into the debug mode quantized model. enable_whole_model_verify: Experimental. Subject to change. Bool indicating whether to add verification for layer by layer, or on whole model. When disabled (per-layer) float and quantized ops will be run from same input (output of previous quantized layer). When enabled, float and quantized ops will run with respective float and quantized output of previous ops. denylisted_ops: Experimental. Subject to change. Set of ops to denylist. denylisted_nodes: Experimental. Subject to change. Set of notes to denylist. Returns: Quantized model in serialized form (e.g. a TFLITE model) with floating-point inputs and outputs. """ return wrap_toco.wrapped_experimental_mlir_quantize( input_data_str, disable_per_channel, fully_quantize, inference_type, convert_tensor_tf_type_to_tflite_type(input_data_type), convert_tensor_tf_type_to_tflite_type(output_data_type), enable_numeric_verify, enable_whole_model_verify, denylisted_ops, denylisted_nodes)