def quantize_annotate(to_quantize, **kwargs): """Specify a layer or model to be quantized. This function does not actually quantize tensors. It merely wraps the keras layer (or each layer in the model) with `QuantizeAnnotate` to note which layers need to be quantized. Args: to_quantize: Keras layer or model to be quantized. **kwargs: Additional keyword arguments to be passed to the keras layer. Returns: Keras layer wrapped with `QuantizeAnnotate` if layer is passed. Else, a new keras model with each layer in the model wrapped with `QuantizeAnnotate`. """ def _add_quant_wrapper(layer): # Already annotated layer. No need to wrap. if isinstance(layer, quantize_annotate_mod.QuantizeAnnotate): return layer return quantize_annotate_mod.QuantizeAnnotate(layer) if isinstance(to_quantize, keras.Model): return keras.models.clone_model( to_quantize, input_tensors=None, clone_function=_add_quant_wrapper) elif isinstance(to_quantize, keras.layers.Layer): # TODO(pulkitb): Consider removing support for annotating a single layer. # Parameters for annotating a layer are different from annotating a model. # This creates a discrepancy. It'll be better to just have separate APIs # for layer vs model. return quantize_annotate_mod.QuantizeAnnotate( layer=to_quantize, quantize_provider=None, **kwargs)
def quantize_annotate_layer(to_annotate, quantize_config=None, **kwargs): """Annotate a layer to be quantized. This function does not actually quantize anything. It is merely to specify that the layer needs to be quantized. Annotate a layer: ```python model = keras.Sequential([ layers.Dense(10, activation='relu', input_shape=(100,)), quantize_annotate_layer(layers.Dense(2, activation='sigmoid')) ])) ``` Note that this function removes the optimizer from the original model. Args: to_annotate: tf.keras layer to annotate to be quantized. quantize_config: `QuantizeConfig` to quantize layer. **kwargs: Additional keyword arguments to be passed to the keras layer. Returns: tf.keras layer wrapped with `QuantizeAnnotate`. """ # Check against keras.Model since it is an instance of keras.layers.Layer. if not isinstance(to_annotate, keras.layers.Layer) or isinstance( to_annotate, keras.Model): raise ValueError('`to_annotate` can only be a tf.keras `layer` instance.') return quantize_annotate_mod.QuantizeAnnotate( layer=to_annotate, quantize_config=quantize_config, **kwargs)
def quantize_annotate(to_quantize, **kwargs): # pylint: disable=invalid-name """Specify a layer or model to be quantized. This function does not apply an quantization emulation operations. It merely wraps the keras layer (or each layer in the model) with `QuantizeAnnotate` to note which layers need to be quantized. Args: to_quantize: Keras layer or model to be quantized. **kwargs: Additional keyword arguments to be passed to the keras layer. Returns: Keras layer wrapped with `QuantizeAnnotate` if layer is passed. Else, a new keras model with each layer in the model wrapped with `QuantizeAnnotate`. """ def _add_quant_wrapper(layer): if isinstance(layer, quant_annotate.QuantizeAnnotate): return layer return quant_annotate.QuantizeAnnotate(layer) if isinstance(to_quantize, keras.Model): return keras.models.clone_model(to_quantize, input_tensors=None, clone_function=_add_quant_wrapper) elif isinstance(to_quantize, keras.layers.Layer): # TODO(pulkitb): Since annotation for model and layer have different # parameters, we should likely remove support for layers here. return quant_annotate.QuantizeAnnotate(to_quantize, **kwargs)
def testRaisesErrorForUnsupportedLayer(self): class CustomLayer(keras.layers.Dense): pass with self.assertRaises(ValueError): quantize_annotate.QuantizeAnnotate(CustomLayer(10), **self.quant_params)
def testQuantizeAnnotateModel_HasAnnotatedLayers(self): class TestQuantizeProvider(quantize_provider_mod.QuantizeProvider): def get_weights_and_quantizers(self, layer): pass def get_activations_and_quantizers(self, layer): pass def set_quantize_weights(self, layer, quantize_weights): pass def set_quantize_activations(self, layer, quantize_activations): pass def get_config(self): pass quantize_provider = TestQuantizeProvider() model = keras.Sequential([ keras.layers.Dense(10, input_shape=(5, )), quant_annotate.QuantizeAnnotate( keras.layers.Dense(5), quantize_provider=quantize_provider) ]) annotated_model = quantize_annotate(model) self._assertWrappedLayer(annotated_model.layers[0]) self._assertWrappedLayer(annotated_model.layers[1], quantize_provider) # Ensure an already annotated layer is not wrapped again. self.assertIsInstance(annotated_model.layers[1].layer, keras.layers.Dense) inputs = np.random.rand(1, 5) self.assertAllEqual(model.predict(inputs), annotated_model.predict(inputs))
def _add_quant_wrapper(layer): """Add annotation wrapper.""" # Already annotated layer. No need to wrap. if isinstance(layer, quantize_annotate_mod.QuantizeAnnotate): return layer if isinstance(layer, tf.keras.Model): raise ValueError( 'Quantizing a tf.keras Model inside another tf.keras Model is not supported.' ) return quantize_annotate_mod.QuantizeAnnotate(layer)
def testAppliesWrapperToAllClasses(self): layer = keras.layers.Dense(5, activation='relu', input_shape=(10, )) model = keras.Sequential([layer]) wrapped_model = keras.Sequential([ quantize_annotate.QuantizeAnnotate(layer, num_bits=8, input_shape=(10, )) ]) x_test = np.random.rand(10, 10) self.assertAllEqual(model.predict(x_test), wrapped_model.predict(x_test))
def testAnnotateLayerCallPassesTraningBoolean(self): class MockLayer(tf.keras.layers.Layer): self.training = None def call(self, training=None): self.training = training layer = MockLayer() wrapper = quantize_annotate.QuantizeAnnotate(layer=layer) wrapper.call(training=True) self.assertTrue(layer.training) wrapper.call(training=False) self.assertFalse(layer.training)
def quantize_annotate_layer(to_annotate, quantize_config=None): """Annotate a `tf.keras` layer to be quantized. This function does not actually quantize the layer. It is merely used to specify that the layer should be quantized. The layer then gets quantized accordingly when `quantize_apply` is used. This method should be used when the user wants to quantize only certain layers of the model, or change the default behavior of how a layer is quantized. Annotate a layer: ```python model = keras.Sequential([ layers.Dense(10, activation='relu', input_shape=(100,)), quantize_annotate_layer(layers.Dense(2, activation='sigmoid')) ]) # Only the second Dense layer is quantized. quantized_model = quantize_apply(model) ``` Args: to_annotate: `tf.keras` layer which needs to be quantized. quantize_config: optional `QuantizeConfig` which controls how the layer is quantized. In its absence, the default behavior for the layer is used. Returns: `tf.keras` layer wrapped with `QuantizeAnnotate`. """ if to_annotate is None: raise ValueError('`to_annotate` cannot be None') # Check against keras.Model since it is an instance of keras.layers.Layer. if not isinstance(to_annotate, keras.layers.Layer) or isinstance( to_annotate, keras.Model): raise ValueError( '`to_annotate` can only be a `tf.keras.layers.Layer` instance. ' 'You passed an instance of type: {input}.'.format( input=to_annotate.__class__.__name__)) if quantize_config is not None and not isinstance( quantize_config, quantize_config_mod.QuantizeConfig): raise ValueError( '`quantize_config` can only be a `tfmot.quantization.keras.QuantizeConfig` instance.' 'You passed an instance of type: {input}.'.format( input=quantize_config.__class__.__name__)) return quantize_annotate_mod.QuantizeAnnotate( layer=to_annotate, quantize_config=quantize_config)
def testAnnotatesCustomQuantizableLayer(self): class CustomLayerQuantizable(keras.layers.Dense, QuantizeEmulatableLayer): def get_quantizable_weights(self): # pylint: disable=g-wrong-blank-lines return [self.kernel] def set_quantizable_weights(self, weights): self.kernel = weights[0] annotated_layer = quantize_annotate.QuantizeAnnotate( CustomLayerQuantizable(10), **self.quant_params) self.assertIsInstance(annotated_layer.layer, CustomLayerQuantizable) self.assertEqual(self.quant_params, annotated_layer.get_quantize_params())
def testAnnotatesKerasLayer(self): layer = keras.layers.Dense(5, activation='relu', input_shape=(10,)) model = keras.Sequential([layer]) quantize_provider = self.TestQuantizeProvider() annotated_model = keras.Sequential([ quantize_annotate.QuantizeAnnotate( layer, quantize_provider=quantize_provider, input_shape=(10,))]) annotated_layer = annotated_model.layers[0] self.assertEqual(layer, annotated_layer.layer) self.assertEqual(quantize_provider, annotated_layer.quantize_provider) # Annotated model should not affect computation. Returns same results. x_test = np.random.rand(10, 10) self.assertAllEqual(model.predict(x_test), annotated_model.predict(x_test))
def quantize_annotate(to_quantize, **kwargs): """Specify a layer or model to be quantized. This function does not actually quantize anything. It merely wraps the tf.keras layer (or each layer in the model) with `QuantizeAnnotate` to note which layers need to be quantized. Annotate a layer: ```python model = keras.Sequential([ layers.Dense(10, activation='relu', input_shape=(100,)), quantize_annotate(layers.Dense(2, activation='sigmoid')) ])) ``` Note that this function removes the optimizer from the original model. Args: to_quantize: tf.keras layer to be quantized. **kwargs: Additional keyword arguments to be passed to the keras layer. Returns: tf.keras layer wrapped with `QuantizeAnnotate` if layer is passed. Else, a new tf.keras model with each layer in the model wrapped with `QuantizeAnnotate`. """ def _add_quant_wrapper(layer): # Already annotated layer. No need to wrap. if isinstance(layer, quantize_annotate_mod.QuantizeAnnotate): return layer return quantize_annotate_mod.QuantizeAnnotate(layer) if isinstance(to_quantize, keras.Model): return keras.models.clone_model(to_quantize, input_tensors=None, clone_function=_add_quant_wrapper) elif isinstance(to_quantize, keras.layers.Layer): # TODO(pulkitb): Consider removing support for annotating a single layer. # Parameters for annotating a layer are different from annotating a model. # This creates a discrepancy. It'll be better to just have separate APIs # for layer vs model. return quantize_annotate_mod.QuantizeAnnotate(layer=to_quantize, quantize_provider=None, **kwargs)
def quantize_annotate( to_quantize, num_bits, narrow_range=True, symmetric=True, **kwargs): # pylint: disable=invalid-name """Specify a layer or model to be quantized. This function does not apply an quantization emulation operations. It merely wraps the keras layer (or each layer in the model) with `QuantizeAnnotate` to note which layers need to be quantized. Args: to_quantize: Keras layer or model to be quantized. num_bits: Number of bits for quantization narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. symmetric: If true, use symmetric quantization limits instead of training the minimum and maximum of each quantization range separately. **kwargs: Additional keyword arguments to be passed to the keras layer. Returns: Keras layer wrapped with `QuantizeAnnotate` if layer is passed. Else, a new keras model with each layer in the model wrapped with `QuantizeAnnotate`. """ def _add_quant_wrapper(layer): if isinstance(layer, quant_annotate.QuantizeAnnotate): return layer return quant_annotate.QuantizeAnnotate(layer, **quant_params) quant_params = { 'num_bits': num_bits, 'narrow_range': narrow_range, 'symmetric': symmetric } if isinstance(to_quantize, keras.Model): return keras.models.clone_model( to_quantize, input_tensors=None, clone_function=_add_quant_wrapper) elif isinstance(to_quantize, keras.layers.Layer): quant_params.update(**kwargs) return quant_annotate.QuantizeAnnotate(to_quantize, **quant_params)
def testSerializationQuantizeAnnotate(self): input_shape = (2,) layer = keras.layers.Dense(3) wrapper = quantize_annotate.QuantizeAnnotate( layer=layer, quantize_provider=self.TestQuantizeProvider(), input_shape=input_shape) custom_objects = { 'QuantizeAnnotate': quantize_annotate.QuantizeAnnotate, 'TestQuantizeProvider': self.TestQuantizeProvider } serialized_wrapper = serialize_layer(wrapper) with keras.utils.custom_object_scope(custom_objects): wrapper_from_config = deserialize_layer(serialized_wrapper) self.assertEqual(wrapper_from_config.get_config(), wrapper.get_config())
def quantize_annotate_layer(to_annotate, quantize_config=None): """Annotate a layer to be quantized. This function does not actually quantize anything. It is merely to specify that the layer needs to be quantized. Annotate a layer: ```python model = keras.Sequential([ layers.Dense(10, activation='relu', input_shape=(100,)), quantize_annotate_layer(layers.Dense(2, activation='sigmoid')) ])) ``` Note that this function removes the optimizer from the original model. Args: to_annotate: tf.keras layer to annotate to be quantized. quantize_config: `QuantizeConfig` to quantize layer. Returns: tf.keras layer wrapped with `QuantizeAnnotate`. """ if to_annotate is None: raise ValueError('`to_annotate` cannot be None') # Check against keras.Model since it is an instance of keras.layers.Layer. if not isinstance(to_annotate, keras.layers.Layer) or isinstance( to_annotate, keras.Model): raise ValueError( '`to_annotate` can only be a `tf.keras.layers.Layer` instance. ' 'You passed an instance of type: {input}.'.format( input=to_annotate.__class__.__name__)) if quantize_config is not None and not isinstance( quantize_config, quantize_config_mod.QuantizeConfig): raise ValueError( '`quantize_config` can only be a `tfmot.quantization.keras.QuantizeConfig` instance.' 'You passed an instance of type: {input}.'.format( input=quantize_config.__class__.__name__)) return quantize_annotate_mod.QuantizeAnnotate( layer=to_annotate, quantize_config=quantize_config)
def testAnnotatesKerasLayer(self): layer = keras.layers.Dense(5, activation='relu', input_shape=(10, )) model = keras.Sequential([layer]) annotated_model = keras.Sequential([ quantize_annotate.QuantizeAnnotate(layer, input_shape=(10, ), **self.quant_params) ]) annotated_layer = annotated_model.layers[0] self.assertIsInstance(annotated_layer.layer, keras.layers.Dense) self.assertEqual(self.quant_params, annotated_layer.get_quantize_params()) # Annotated model should not affect computation. Returns same results. x_test = np.random.rand(10, 10) self.assertAllEqual(model.predict(x_test), annotated_model.predict(x_test))
def testQuantizeAnnotateModel_HasAnnotatedLayers(self): quantize_provider = _TestQuantizeProvider() model = keras.Sequential([ keras.layers.Dense(10, input_shape=(5, )), quantize_annotate_mod.QuantizeAnnotate( keras.layers.Dense(5), quantize_provider=quantize_provider) ]) annotated_model = quantize_annotate(model) self._assertWrappedLayer(annotated_model.layers[0]) self._assertWrappedLayer(annotated_model.layers[1], quantize_provider) # Ensure an already annotated layer is not wrapped again. self.assertIsInstance(annotated_model.layers[1].layer, keras.layers.Dense) inputs = np.random.rand(1, 5) self.assertAllEqual(model.predict(inputs), annotated_model.predict(inputs))
def _add_quant_wrapper(layer): if isinstance(layer, quant_annotate.QuantizeAnnotate): return layer return quant_annotate.QuantizeAnnotate(layer, **quant_params)
def testQuantizeAnnotate_FailsWithModel(self): layer = keras.layers.Dense(5, activation='relu', input_shape=(10, )) model = keras.Sequential([layer]) with self.assertRaises(ValueError): quantize_annotate.QuantizeAnnotate(model)
def _add_quant_wrapper(layer): # Already annotated layer. No need to wrap. if isinstance(layer, quantize_annotate_mod.QuantizeAnnotate): return layer return quantize_annotate_mod.QuantizeAnnotate(layer)