def test_empty_calibrator_gen(self): model_path = resource_loader.get_path_to_datafile( 'test_data/mobilenet_like_model.bin') float_model = open(model_path, 'rb').read() quantizer = _calibrator.Calibrator(float_model) def empty_input_gen(): for i in (): yield i with self.assertRaises(RuntimeError): quantizer.calibrate_and_quantize(empty_input_gen, constants.FLOAT, constants.FLOAT, False)
def test_invalid_type_calibrator_gen(self): model_path = resource_loader.get_path_to_datafile( 'test_data/mobilenet_like_model.bin') float_model = open(model_path, 'rb').read() quantizer = _calibrator.Calibrator(float_model) # Input generator with incorrect shape. def input_gen(): for _ in range(10): yield np.ones(shape=(1, 5, 5, 3), dtype=np.int32) with self.assertRaises(ValueError): quantizer.calibrate_and_quantize(input_gen)
def test_calibration_with_quantization(self): model_path = resource_loader.get_path_to_datafile( 'test_data/mobilenet_like_model.bin') float_model = open(model_path, 'rb').read() quantizer = _calibrator.Calibrator(float_model) # Input generator for the model. def input_gen(): for _ in range(10): yield [np.ones(shape=(1, 5, 5, 3), dtype=np.float32)] quantized_model = quantizer.calibrate_and_quantize(input_gen) self.assertIsNotNone(quantized_model)
def test_calibration_with_string_input(self): model_path = resource_loader.get_path_to_datafile( 'test_data/string_input_flex_model.bin') with open(model_path, 'rb') as fp: model_with_string_input = fp.read() quantizer = _calibrator.Calibrator(model_with_string_input) # Input generator for the model. def input_gen(): for i in range(10): yield [np.array(u'Test' + str(i))] quantized_model = quantizer.calibrate_and_quantize_single( input_gen, dtypes.float32, dtypes.float32, True, 'Identity') self.assertIsNotNone(quantized_model)
def test_invalid_shape_calibrator_gen(self, enable_mlir): model_path = resource_loader.get_path_to_datafile( 'test_data/mobilenet_like_model.bin') float_model = open(model_path, 'rb').read() quantizer = _calibrator.Calibrator(float_model) # Input generator with incorrect shape. def input_gen(): for _ in range(10): yield [np.ones(shape=(1, 2, 2, 3), dtype=np.float32)] with self.assertRaisesRegex(ValueError, 'Size mismatch'): quantizer.calibrate_and_quantize(input_gen, constants.FLOAT, constants.FLOAT, False, enable_mlir)
def test_calibration_with_quantization_multiple_inputs(self): # Load multi add model from test data. # This model has 4 inputs of size (1, 8, 8, 3). model_path = resource_loader.get_path_to_datafile( '../../testdata/multi_add.bin') float_model = open(model_path, 'rb').read() quantizer = _calibrator.Calibrator(float_model) # Input generator for the model. def input_gen(): for _ in range(10): yield [np.ones(shape=(1, 8, 8, 3), dtype=np.float32) for _ in range(4)] quantized_model = quantizer.calibrate_and_quantize(input_gen, constants.FLOAT, constants.FLOAT, False) self.assertIsNotNone(quantized_model)
def run(args): """Calibrate and quantize model. """ import numpy as np import tensorflow as tf from tensorflow.lite.python.optimize import calibrator image_file_list = None with open(args.image_file_list_path) as f: image_file_list = [l.split()[0] for l in f.readlines()] model_preprocessor_fn = preprocessors()[args.model_preprocessor] def _input_gen(): """Setup a graph to yield one batch of images. Returns: [image] generator: A batch of N images, each image shape being exactly [1, model_input_size, model_input_size, num_channels]. """ import cv2 i = 0 for image_file in image_file_list: log.info('Image %d of %d: %s' % ( i, len(image_file_list) - 1, image_file, )) cv2_image = cv2.imread( os.path.join(args.dataset_split_path, image_file)) preprocessed_image = model_preprocessor_fn(cv2_image) image = np.array(preprocessed_image) yield [[image]] i += 1 float32_tflite_model = open(args.input_tflite_model_path, 'rb').read() quantizer = calibrator.Calibrator(float32_tflite_model) tflite_quantized_model = quantizer.calibrate_and_quantize( _input_gen, allow_float=False, input_type=tf.int8, output_type=tf.int8, ) with open(args.output_tflite_model_path, 'wb') as outfile: outfile.write(tflite_quantized_model)
def test_invalid_model_buffer(self): float_model = b'\0' * 100 with self.assertRaisesWithRegexpMatch(ValueError, 'Failed to parse the model'): _calibrator.Calibrator(float_model)
def convert(self): """Converts a TensorFlow GraphDef based on instance variables. Returns: The converted data in serialized format. Either a TFLite Flatbuffer or a Graphviz graph depending on value in `output_format`. Raises: ValueError: Input shape is not specified. None value for dimension in input_tensor. """ # Checks dimensions in input tensor. if self._has_valid_tensors(): for tensor in self._input_tensors: shape = tensor.get_shape() if not shape: raise ValueError("Provide an input shape for input array " "'{0}'.".format(_tensor_name(tensor))) # Note that shape_list might be empty for scalar shapes. shape_list = shape.as_list() if None in shape_list[1:]: raise ValueError( "None is only supported in the 1st dimension. Tensor '{0}' has " "invalid shape '{1}'.".format(_tensor_name(tensor), shape_list)) elif shape_list and shape_list[0] is None: self._set_batch_size(batch_size=1) # Get quantization stats. Ensures there is one stat per name if the stats # are specified. if self.quantized_input_stats: quantized_stats = [] invalid_stats = [] for name in self.get_input_arrays(): if name in self.quantized_input_stats: quantized_stats.append(self.quantized_input_stats[name]) else: invalid_stats.append(name) if invalid_stats: raise ValueError( "Quantization input stats are not available for input " "tensors '{0}'.".format(",".join(invalid_stats))) else: quantized_stats = None if self.representative_dataset: if not isinstance(self.representative_dataset, RepresentativeDataset): raise TypeError( "representative_dataset must be an instance of " "RepresentativeDataset") if self.representative_dataset.input_gen is None: raise ValueError( "Provide an input generator for representative_dataset") # TODO(shashishekhar): For now use optimizations order is ignored. # Both size and latency optimizations decide whether to apply post # training optimizations. post_training_optimize = bool( len( set(self.optimizations) & set([ Optimize.OPTIMIZE_FOR_LATENCY, Optimize.OPTIMIZE_FOR_SIZE ]))) # Do weights only quantization if there is no dataset for calibration. weights_only_quantize_flag = (post_training_optimize and (self.representative_dataset is None)) converter_kwargs = { "inference_type": self.inference_type, "inference_input_type": self.inference_input_type, "input_format": constants.TENSORFLOW_GRAPHDEF, "output_format": self.output_format, "quantized_input_stats": quantized_stats, "default_ranges_stats": self.default_ranges_stats, "drop_control_dependency": self.drop_control_dependency, "reorder_across_fake_quant": self.reorder_across_fake_quant, "change_concat_input_ranges": self.change_concat_input_ranges, "allow_custom_ops": self.allow_custom_ops, "post_training_quantize": weights_only_quantize_flag, "target_ops": self.target_ops, "dump_graphviz_dir": self.dump_graphviz_dir, "dump_graphviz_video": self.dump_graphviz_video } optimized_graph = None if self.inference_type == constants.QUANTIZED_UINT8: optimized_graph = self._graph_def else: try: optimized_graph = _run_graph_optimizations( self._graph_def, self._input_tensors, self._output_tensors) except Exception: optimized_graph = self._graph_def # Converts model. if self._has_valid_tensors(): result = _toco_convert_impl(input_data=optimized_graph, input_tensors=self._input_tensors, output_tensors=self._output_tensors, **converter_kwargs) else: result = _toco_convert_graph_def( input_data=optimized_graph, input_arrays_with_shape=self._input_arrays_with_shape, output_arrays=self._output_arrays, **converter_kwargs) if self.representative_dataset and post_training_optimize: calibrate_quantize = _calibrator.Calibrator(result) result = calibrate_quantize.calibrate_and_quantize( self.representative_dataset.input_gen) return result
def convert(self): """Converts a TensorFlow GraphDef based on instance variables. Returns: The converted data in serialized format. Raises: ValueError: Input shape is not specified. None value for dimension in input_tensor. """ graph_def = _convert_to_constants.convert_variables_to_constants_v2( self._func) input_tensors = [ tensor for tensor in self._func.inputs if tensor.dtype != _dtypes.resource ] output_tensors = self._func.outputs # Run a Grappler pass. graph_def = _run_graph_optimizations(graph_def, input_tensors, output_tensors, self._func.graph) # Checks dimensions in input tensor. for tensor in input_tensors: # Note that shape_list might be empty for scalar shapes. shape_list = tensor.get_shape().as_list() if None in shape_list[1:]: raise ValueError( "None is only supported in the 1st dimension. Tensor '{0}' has " "invalid shape '{1}'.".format(_tensor_name(tensor), shape_list)) elif shape_list and shape_list[0] is None: self._set_batch_size(batch_size=1) if self.representative_dataset: if not isinstance(self.representative_dataset, RepresentativeDataset): raise TypeError( "representative_dataset must be an instance of " "RepresentativeDataset") if self.representative_dataset.input_gen is None: raise ValueError( "Provide an input generator for representative_dataset") # TODO(shashishekhar): For now use optimizations order is ignored. # Both size and latency optimizations decide whether to apply post # training optimizations. post_training_optimize = bool( len( set(self.optimizations) & set([ Optimize.OPTIMIZE_FOR_LATENCY, Optimize.OPTIMIZE_FOR_SIZE ]))) # Do weights only quantization if there is no dataset for calibration. weights_only_quantize_flag = (post_training_optimize and (self.representative_dataset is None)) converter_kwargs = { "input_format": constants.TENSORFLOW_GRAPHDEF, "allow_custom_ops": self.allow_custom_ops, "post_training_quantize": weights_only_quantize_flag, "target_ops": self.target_ops, } # Converts model. result = _toco_convert_impl(input_data=graph_def, input_tensors=input_tensors, output_tensors=output_tensors, **converter_kwargs) if self.representative_dataset and post_training_optimize: calibrate_quantize = _calibrator.Calibrator(result) result = calibrate_quantize.calibrate_and_quantize( self.representative_dataset.input_gen) return result
def convert(self): """Converts a TensorFlow GraphDef based on instance variables. Returns: The converted data in serialized format. Either a TFLite Flatbuffer or a Graphviz graph depending on value in `output_format`. Raises: ValueError: Input shape is not specified. None value for dimension in input_tensor. """ # Checks dimensions in input tensor. if self._has_valid_tensors(): for tensor in self._input_tensors: shape = tensor.shape if not shape: raise ValueError("Provide an input shape for input array " "'{0}'.".format(_get_tensor_name(tensor))) # Note that shape_list might be empty for scalar shapes. shape_list = shape.as_list() if None in shape_list[1:]: raise ValueError( "None is only supported in the 1st dimension. Tensor '{0}' has " "invalid shape '{1}'.".format( _get_tensor_name(tensor), shape_list)) elif shape_list and shape_list[0] is None: self._set_batch_size(batch_size=1) # Get quantization stats. Ensures there is one stat per name if the stats # are specified. if self.quantized_input_stats: quantized_stats = [] invalid_stats = [] for name in self.get_input_arrays(): if name in self.quantized_input_stats: quantized_stats.append(self.quantized_input_stats[name]) else: invalid_stats.append(name) if invalid_stats: raise ValueError("Quantization input stats are not available for input " "tensors '{0}'.".format(",".join(invalid_stats))) else: quantized_stats = None if self.representative_dataset: if not isinstance(self.representative_dataset, RepresentativeDataset): raise TypeError( "representative_dataset must be an instance of " "RepresentativeDataset") if self.representative_dataset.input_gen is None: raise ValueError( "Provide an input generator for representative_dataset") post_training_optimize = bool( len(set(self.optimizations) & set([Optimize.OPTIMIZE_FOR_LATENCY, Optimize.OPTIMIZE_FOR_SIZE]))) # Do weights only quantization if there is no dataset for calibration. weights_only_quantize_flag = ( post_training_optimize and (self.representative_dataset is None)) toco_inference_input_type = self.inference_input_type inference_input_type = self.inference_input_type inference_output_type = self.inference_output_type if post_training_optimize: # Post training optimizations require that TOCO outputs a float model. if self.inference_type != constants.FLOAT: raise ValueError( "`optimizations` require that `inference_type` is set to float.") toco_inference_input_type = constants.FLOAT # Set up default values. if inference_input_type is None: inference_input_type = constants.FLOAT if inference_output_type is None: inference_output_type = constants.FLOAT if weights_only_quantize_flag: # Currently, weight only quantization requires float inputs and outputs. if (inference_input_type != constants.FLOAT or inference_output_type != constants.FLOAT): raise ValueError( "Provide an inference_input_type and inference_output_type of type " "tf.float32.") if not post_training_optimize and self.inference_output_type is not None: raise ValueError( "inference_output_type is currently not supported if optimizations " "are not enabled.") converter_kwargs = { "inference_type": self.inference_type, "inference_input_type": toco_inference_input_type, "input_format": constants.TENSORFLOW_GRAPHDEF, "output_format": self.output_format, "quantized_input_stats": quantized_stats, "default_ranges_stats": self.default_ranges_stats, "drop_control_dependency": self.drop_control_dependency, "reorder_across_fake_quant": self.reorder_across_fake_quant, "change_concat_input_ranges": self.change_concat_input_ranges, "allow_custom_ops": self.allow_custom_ops, "post_training_quantize": weights_only_quantize_flag, "target_ops": self.target_ops, "dump_graphviz_dir": self.dump_graphviz_dir, "dump_graphviz_video": self.dump_graphviz_video } optimized_graph = None if self.inference_type == constants.QUANTIZED_UINT8: optimized_graph = self._graph_def else: try: is_only_flex_enabled = set([OpsSet.SELECT_TF_OPS]) == self.target_ops config = _get_grappler_config( enable_layout_optimizer=is_only_flex_enabled) optimized_graph = _run_graph_optimizations( self._graph_def, self._input_tensors, self._output_tensors, config) except Exception: optimized_graph = self._graph_def # Converts model. if self._has_valid_tensors(): result = _toco_convert_impl( input_data=optimized_graph, input_tensors=self._input_tensors, output_tensors=self._output_tensors, **converter_kwargs) else: result = _toco_convert_graph_def( input_data=optimized_graph, input_arrays_with_shape=self._input_arrays_with_shape, output_arrays=self._output_arrays, **converter_kwargs) if self.representative_dataset and post_training_optimize: calibrate_quantize = _calibrator.Calibrator(result) result = calibrate_quantize.calibrate_and_quantize( self.representative_dataset.input_gen, inference_input_type, inference_output_type) return result
def convert(self): """Converts a TensorFlow GraphDef based on instance variables. Returns: The converted data in serialized format. Raises: ValueError: Multiple concrete functions are specified. Input shape is not specified. Invalid quantization parameters. """ # TODO(b/130297984): Add support for converting multiple function. if len(self._funcs) != 1: raise ValueError("This converter can only convert a single " "ConcreteFunction. Converting multiple functions is " "under development.") frozen_func = _convert_to_constants.convert_variables_to_constants_v2( self._funcs[0]) input_tensors = [ tensor for tensor in frozen_func.inputs if tensor.dtype != _dtypes.resource ] output_tensors = frozen_func.outputs # Run a Grappler pass. is_only_flex_enabled = set( [OpsSet.SELECT_TF_OPS]) == self.target_spec.supported_ops config = _get_grappler_config(enable_layout_optimizer=is_only_flex_enabled) graph_def = _run_graph_optimizations( frozen_func.graph.as_graph_def(), input_tensors, output_tensors, config, graph=frozen_func.graph) # Checks dimensions in input tensor. for tensor in input_tensors: # Note that shape_list might be empty for scalar shapes. shape_list = tensor.shape.as_list() if None in shape_list[1:]: raise ValueError( "None is only supported in the 1st dimension. Tensor '{0}' has " "invalid shape '{1}'.".format(_get_tensor_name(tensor), shape_list)) elif shape_list and shape_list[0] is None: # Set the batch size to 1 if undefined. shape = tensor.shape.as_list() shape[0] = 1 tensor.set_shape(shape) if self.representative_dataset: if not isinstance(self.representative_dataset, RepresentativeDataset): raise TypeError("`representative_dataset` must be an instance of " "`RepresentativeDataset`") if self.representative_dataset.input_gen is None: raise ValueError( "Provide an input generator for `representative_dataset`") # TODO(shashishekhar): For now use optimizations order is ignored. # Both size and latency optimizations decide whether to apply post # training optimizations. post_training_optimize = bool( len( set(self.optimizations) & set([Optimize.OPTIMIZE_FOR_LATENCY, Optimize.OPTIMIZE_FOR_SIZE]))) # Do weights only quantization if there is no dataset for calibration. weights_only_quantize_flag = ( post_training_optimize and (self.representative_dataset is None)) converter_kwargs = { "input_format": constants.TENSORFLOW_GRAPHDEF, "allow_custom_ops": self.allow_custom_ops, "post_training_quantize": weights_only_quantize_flag, "target_ops": self.target_spec.supported_ops, } # Converts model. result = _toco_convert_impl( input_data=graph_def, input_tensors=input_tensors, output_tensors=output_tensors, **converter_kwargs) if self.representative_dataset and post_training_optimize: calibrate_quantize = _calibrator.Calibrator(result) result = calibrate_quantize.calibrate_and_quantize( self.representative_dataset.input_gen) return result
def test_invalid_model_buffer(self, enable_mlir): float_model = b'\0' * 100 with self.assertRaisesRegex(ValueError, 'Failed to parse the model'): _calibrator.Calibrator(float_model)
def _calibrate_quantize_model(self, result, inference_input_type, inference_output_type): calibrate_quantize = _calibrator.Calibrator(result) return calibrate_quantize.calibrate_and_quantize( self.representative_dataset.input_gen, inference_input_type, inference_output_type)