Python quantize_apply примеры, tensorflow_model_optimization.python.core.quantization.keras.quantize.quantize_apply Python примеры использования

Пример #1

0

Показать файл

def apply_quantization(model):
    # Helper function uses `quantize_annotate_layer` to annotate that only the
    # Dense layers should be quantized.
    def add_quantize_annotation(layer):
        kernelization_map = [
            # tf.keras.layers.Dense,
            tf.keras.layers.Conv2D
        ]

        for layer_type in kernelization_map:
            if isinstance(layer, layer_type):
                quantize_config = SLCQuantizeConfig()

                log.info(
                    "**Kernelization annotation added to layer {} of type {} with {}".format(layer.name, layer_type,
                                                                                             quantize_config))

                quantized_layer = quantize_annotate_layer(to_annotate=layer, quantize_config=quantize_config)
                return quantized_layer
        log.info("**Kernelization annotation not added to layer {} of type {}".format(layer.name, type(layer)))

        return layer

    # Use `tf.keras.models.clone_model` to apply `add_quantize_annotation`
    # to the layers of the model.
    log.info("Annotating model {}".format(model.name))
    annotated_model = tf.keras.models.clone_model(
        model,
        clone_function=add_quantize_annotation,
    )

    with quantize_scope({
        'SLCQuantizeConfig': SLCQuantizeConfig,
        "SLCWeightGenerator": SLCWeightGenerator,
        "SLCRegularizer": SLCRegularizer
    }):
        # Use `quantize_apply` to actually make the model kernelization aware.
        quant_aware_model = quantize_apply(annotated_model)

        original_size = 0
        compressed_size = 0
        for layer in quant_aware_model.layers:
            try:
                original_size = original_size + layer.original_size
                if layer.compressed is True:
                    compressed_size = compressed_size + layer.compressed_size
                else:
                    compressed_size = compressed_size + layer.original_size

            except AttributeError:
                pass
        try:
            ratio = compressed_size * 100.0 / original_size
            log.info(
                "Model original size: {}, compressed size: {}, ratio: {:.2f}%".format(original_size, compressed_size, ratio))
        except ZeroDivisionError:
            log.info(
                "Zero division error? Model original size: {}, compressed size: {}".format(original_size, compressed_size))

        return quant_aware_model

Пример #2

0

Показать файл

Файл: cluster_preserve_integration_test.py Проект: tensorflow/model-optimization

    def testEndToEndClusterPreserve(self):
        """Runs CQAT end to end and whole model is quantized."""
        original_model = tf.keras.Sequential(
            [layers.Dense(5, activation='softmax', input_shape=(10, ))])
        clustered_model = cluster.cluster_weights(original_model,
                                                  **self.cluster_params)
        self.compile_and_fit(clustered_model)
        clustered_model = cluster.strip_clustering(clustered_model)
        num_of_unique_weights_clustering = self._get_number_of_unique_weights(
            clustered_model, 0, 'kernel')

        quant_aware_annotate_model = (
            quantize.quantize_annotate_model(clustered_model))

        quant_aware_model = quantize.quantize_apply(
            quant_aware_annotate_model,
            scheme=default_8bit_cluster_preserve_quantize_scheme.
            Default8BitClusterPreserveQuantizeScheme())

        self.compile_and_fit(quant_aware_model)
        stripped_cqat_model = strip_clustering_cqat(quant_aware_model)

        # Check the unique weights of a certain layer of
        # clustered_model and pcqat_model
        num_of_unique_weights_cqat = self._get_number_of_unique_weights(
            stripped_cqat_model, 1, 'kernel')
        self.assertAllEqual(num_of_unique_weights_clustering,
                            num_of_unique_weights_cqat)

Пример #3

0

Показать файл

Файл: cluster_preserve_integration_test.py Проект: tensorflow/model-optimization

    def testPassingNonPrunedModelToPCQAT(self):
        """Runs PCQAT as CQAT if the input model is not pruned."""
        preserve_sparsity = False
        clustered_model = self._get_clustered_model(preserve_sparsity)

        clustered_model = cluster.strip_clustering(clustered_model)
        nr_of_unique_weights_after = self._get_number_of_unique_weights(
            clustered_model, 0, 'kernel')

        # Check after plain clustering, if there are no zero weights,
        # PCQAT falls back to CQAT
        quant_aware_annotate_model = (
            quantize.quantize_annotate_model(clustered_model))

        quant_aware_model = quantize.quantize_apply(
            quant_aware_annotate_model,
            scheme=default_8bit_cluster_preserve_quantize_scheme.
            Default8BitClusterPreserveQuantizeScheme(True))

        self.compile_and_fit(quant_aware_model)
        stripped_pcqat_model = strip_clustering_cqat(quant_aware_model)

        # Check the unique weights of clustered_model and pcqat_model
        num_of_unique_weights_pcqat = self._get_number_of_unique_weights(
            stripped_pcqat_model, 1, 'kernel')
        self.assertAllEqual(nr_of_unique_weights_after,
                            num_of_unique_weights_pcqat)

Пример #4

0

Показать файл

def measure_sparsity(model):
    assert quantize_base.SET_CUSTOM_TNH_FLAG, log.info(
        "TFMOD needs to be modified with quantizer disabled for proper "
        "running")

    # Helper function uses `quantize_annotate_layer` to annotate that only the
    # Dense layers should be quantized.
    def add_sparsity_annotation(layer):
        quantize_config = SparsityMeter()
        log.info(
            "**Sparsity Measure annotation added to layer {} with {}".format(
                layer.name, quantize_config))
        quantized_layer = quantize_annotate_layer(
            to_annotate=layer, quantize_config=quantize_config)
        return quantized_layer

    log.info("Annotating model {}".format(model.name))
    tf.keras.backend.clear_session()
    annotated_model = tf.keras.models.clone_model(
        model, clone_function=add_sparsity_annotation)

    with quantize_scope({
            'SparsityMeter': SparsityMeter,
            "ActivSparsityMeasure ": ActivSparsityMeasure,
            "WeightsSparsityMeasure ": WeightsSparsityMeasure
    }):
        # Use `quantize_apply` to actually make the model Sparsity Measure aware.
        quant_aware_model = quantize_apply(annotated_model)

        return quant_aware_model

Пример #5

0

Показать файл

def cluster_preserve_quantize_model(clustered_model, train_images,
                                    train_labels):
    """Cluster-preserve QAT model."""
    quant_aware_annotate_model = (
        quantize.quantize_annotate_model(clustered_model))
    quant_aware_model = quantize.quantize_apply(
        quant_aware_annotate_model,
        scheme=default_8bit_cluster_preserve_quantize_scheme.
        Default8BitClusterPreserveQuantizeScheme())
    quant_aware_model.summary()
    compile_and_fit(quant_aware_model, train_images, train_labels, 1)

    return quant_aware_model

Пример #6

0

Показать файл

Файл: SLG.py Проект: alessandroaimar/doctoral_thesis

def apply_quantization(model):
    assert quantize_base.SET_CUSTOM_TNH_FLAG, log.info(
        "TFMOD needs to be modified with quantizer disabled for proper "
        "running")

    # Helper function uses `quantize_annotate_layer` to annotate that only the
    # Dense layers should be quantized.
    def add_quantize_annotation(layer):
        # create new layer to break link with old model
        layer = layer.__class__.from_config(layer.get_config())

        quantization_map = [
            # tf.keras.layers.Dense,
            tf.keras.layers.Conv2D
            # tf.keras.layers.Input: BFPInputQuantizerConfig()
        ]

        for layer_type in quantization_map:

            if isinstance(layer, layer_type):
                quantize_config = SLGQuantizeConfig()

                log.info(
                    "**SLG annotation added to layer {} of type {} with {}".
                    format(layer.name, layer_type, quantize_config))

                quantized_layer = quantize_annotate_layer(
                    to_annotate=layer, quantize_config=quantize_config)
                return quantized_layer
        log.info("**SLG annotation not added to layer {} of type {}".format(
            layer.name, type(layer)))

        return layer

    # Use `tf.keras.models.clone_model` to apply `add_quantize_annotation`
    # to the layers of the model.
    log.info("Annotating model {}".format(model.name))

    tf.keras.backend.clear_session()
    annotated_model = tf.keras.models.clone_model(
        model,
        clone_function=add_quantize_annotation,
    )

    with quantize_scope({
            "SLGWeightGenerator": SLGWeightGenerator,
            "SLGQuantizeConfig": SLGQuantizeConfig,
    }):
        # Use `quantize_apply` to actually make the model quantization aware.
        quant_aware_model = quantize_apply(annotated_model)
        return quant_aware_model

Пример #7

0

Показать файл

Файл: quantize_integration_test.py Проект: tensorflow/model-optimization

  def testCustomWeightQuantizers_Run(self, quantizer_type):
    init_params = self._get_quant_params(quantizer_type)

    # Additional test that same quantizer object can be shared
    # between Configs, though we don't expicitly promote this
    # anywhere in the documentation.
    quantizer = quantizer_type(**init_params)

    class DenseQuantizeConfig(QuantizeConfig):
      """Custom QuantizeConfig for Dense layer."""

      def get_weights_and_quantizers(self, layer):
        return [(layer.kernel, quantizer)]

      def get_activations_and_quantizers(self, layer):
        # Defaults.
        return [(layer.activation,
                 MovingAverageQuantizer(
                     num_bits=8,
                     per_axis=False,
                     symmetric=False,
                     narrow_range=False))]

      def set_quantize_weights(self, layer, quantize_weights):
        layer.kernel = quantize_weights[0]

      def set_quantize_activations(self, layer, quantize_activations):
        return

      def get_output_quantizers(self, layer):
        return []

      def get_config(self):
        return {}

    annotated_model = tf.keras.Sequential([
        quantize.quantize_annotate_layer(
            l.Dense(8, input_shape=(10,)), DenseQuantizeConfig()),
        quantize.quantize_annotate_layer(
            l.Dense(5), DenseQuantizeConfig())
    ])

    with quantize.quantize_scope(
        {'DenseQuantizeConfig': DenseQuantizeConfig}):
      quant_model = quantize.quantize_apply(annotated_model)

    # Check no error happens.
    self._train_model(quant_model)

Пример #8

0

Показать файл

Файл: mnist_prune_cluster_preserve_qat_test.py Проект: tensorflow/model-optimization

def prune_cluster_preserve_quantize_model(clustered_model, preserve_sparsity):
    """Prune_cluster_preserve QAT model."""

    pcqat_epoch = 1
    quant_aware_annotate_model = quantize.quantize_annotate_model(
        clustered_model)
    quant_aware_model = quantize.quantize_apply(
        quant_aware_annotate_model,
        scheme=default_8bit_cluster_preserve_quantize_scheme.
        Default8BitClusterPreserveQuantizeScheme(preserve_sparsity))

    callbacks = []
    quant_aware_model = _train_model(quant_aware_model, callbacks, pcqat_epoch)
    pcqat_stripped = cluster_utils.strip_clustering_cqat(quant_aware_model)

    return quant_aware_model, pcqat_stripped

Пример #9

0

Показать файл

Файл: Quantizer.py Проект: M2theJJ/NN

def apply_quantization(model):
    # Helper function uses `quantize_annotate_layer` to annotate that only the
    # Dense layers should be quantized.
    def add_quantize_annotation(layer):
        # create new layer to break link with old model
        layer = layer.__class__.from_config(layer.get_config())

        quantization_map = {
            tf.keras.layers.Dense: BFPQuantizeConfig(),
            tf.keras.layers.Conv2D: BFPQuantizeConfig()
        }

        for layer_type, quantize_config in quantization_map.items():
            if isinstance(layer, layer_type):
                print(
                    "**Quantization annotation added to layer {} of type {} with {}"
                    .format(layer.name, layer_type, quantize_config))

                quantized_layer = quantize_annotate_layer(
                    to_annotate=layer, quantize_config=quantize_config)
                return quantized_layer
        print("**Quantization annotation not added to layer {} of type {}".
              format(layer.name, type(layer)))

        return layer

    # Use `tf.keras.models.clone_model` to apply `add_quantize_annotation`
    # to the layers of the model.
    print("Annotating model {}".format(model.name))

    annotated_model = tf.keras.models.clone_model(
        model,
        clone_function=add_quantize_annotation,
    )

    with quantize_scope({
            'BFPQuantizeConfig': BFPQuantizeConfig,
            "BFPActivQuantizer": BFPActivQuantizer,
            "BFPWeightQuantizer": BFPWeightQuantizer,
            "BFPBiasQuantizer": BFPBiasQuantizer,
            "PolynomialDecay": PolynomialDecay
    }):
        # Use `quantize_apply` to actually make the model quantization aware.
        quant_aware_model = quantize_apply(annotated_model)
        return quant_aware_model

Пример #10

0

Показать файл

Файл: cluster_preserve_integration_test.py Проект: tensorflow/model-optimization

    def _pcqat_training(self, preserve_sparsity, quant_aware_annotate_model):
        """PCQAT training on the input model."""
        quant_aware_model = quantize.quantize_apply(
            quant_aware_annotate_model,
            scheme=default_8bit_cluster_preserve_quantize_scheme.
            Default8BitClusterPreserveQuantizeScheme(preserve_sparsity))

        self.compile_and_fit(quant_aware_model)

        stripped_pcqat_model = strip_clustering_cqat(quant_aware_model)

        # Check the unique weights of clustered_model and pcqat_model
        # layer 0 is the quantize_layer
        num_of_unique_weights_pcqat = self._get_number_of_unique_weights(
            stripped_pcqat_model, 1, 'kernel')

        sparsity_pcqat = self._get_sparsity(stripped_pcqat_model)

        return sparsity_pcqat, num_of_unique_weights_pcqat

Пример #11

0

Показать файл

Файл: cluster_preserve_integration_test.py Проект: tensorflow/model-optimization

    def testEndToEndClusterPreserveOneLayer(self):
        """Runs CQAT end to end and model is quantized only for a single layer."""
        original_model = tf.keras.Sequential([
            layers.Dense(5, activation='relu', input_shape=(10, )),
            layers.Dense(5,
                         activation='softmax',
                         input_shape=(10, ),
                         name='qat')
        ])
        clustered_model = cluster.cluster_weights(original_model,
                                                  **self.cluster_params)
        self.compile_and_fit(clustered_model)
        clustered_model = cluster.strip_clustering(clustered_model)
        num_of_unique_weights_clustering = self._get_number_of_unique_weights(
            clustered_model, 1, 'kernel')

        def apply_quantization_to_dense(layer):
            if isinstance(layer, tf.keras.layers.Dense):
                if layer.name == 'qat':
                    return quantize.quantize_annotate_layer(layer)
            return layer

        quant_aware_annotate_model = tf.keras.models.clone_model(
            clustered_model,
            clone_function=apply_quantization_to_dense,
        )

        quant_aware_model = quantize.quantize_apply(
            quant_aware_annotate_model,
            scheme=default_8bit_cluster_preserve_quantize_scheme.
            Default8BitClusterPreserveQuantizeScheme())

        self.compile_and_fit(quant_aware_model)

        stripped_cqat_model = strip_clustering_cqat(quant_aware_model)

        # Check the unique weights of a certain layer of
        # clustered_model and pcqat_model
        num_of_unique_weights_cqat = self._get_number_of_unique_weights(
            stripped_cqat_model, 1, 'kernel')
        self.assertAllEqual(num_of_unique_weights_clustering,
                            num_of_unique_weights_cqat)

Пример #12

0

Показать файл

Файл: quantize_models_test.py Проект: Vooblin/model-optimization

    def testModelEndToEnd(self, model_type):
        # 1. Check whether quantized model graph can be constructed.
        model = self._get_model(model_type)
        model = quantize.quantize_apply(quantize.quantize_annotate(model))

        # 2. Sanity check to ensure basic training on random data works.
        x_train, y_train = self._create_test_data(model)
        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['accuracy'])
        model.fit(x_train, y_train)

        # 3. Ensure conversion to TFLite works.
        _, tflite_file = tempfile.mkstemp('.tflite')
        print('TFLite File: ', tflite_file)
        with quantize.quantize_scope():
            utils.convert_keras_to_tflite(
                model,
                tflite_file,
                inference_input_type=tf.lite.constants.FLOAT)

        # 4. Verify input runs on converted model.
        self._verify_tflite(tflite_file, x_train, y_train)

Пример #13

0

Показать файл

Файл: mnist_cnn.py Проект: urantialife/model-optimization

def prune_preserve_quantize_model(pruned_model, train_images, train_labels):
  batch_size = 256
  epochs = 5

  pruned_model = prune.strip_pruning(pruned_model)
  # Prune preserve QAT model
  quant_aware_annotate_model = quantize.quantize_annotate_model(pruned_model)
  quant_aware_model = quantize.quantize_apply(
      quant_aware_annotate_model,
      scheme=default_8bit_prune_preserve_quantize_scheme
      .Default8BitPrunePreserveQuantizeScheme())
  quant_aware_model.summary()

  fit_kwargs = {
      'batch_size': batch_size,
      'epochs': epochs,
  }
  compile_and_fit(quant_aware_model,
                  train_images,
                  train_labels,
                  compile_kwargs={},
                  fit_kwargs=fit_kwargs)

  return quant_aware_model

Пример #14

0

Показать файл

Файл: mnist_cnn.py Проект: Crissal1995/IPCV

                                              padding='same',
                                              activation='relu'),
                                     input_shape=input_shape),
    l.MaxPooling2D((2, 2), (2, 2), padding='same'),
    quantize.quantize_annotate_layer(
        l.Conv2D(64, 5, padding='same', activation='relu')),
    l.MaxPooling2D((2, 2), (2, 2), padding='same'),
    l.Flatten(),
    quantize.quantize_annotate_layer(l.Dense(1024, activation='relu')),
    l.Dropout(0.4),
    quantize.quantize_annotate_layer(l.Dense(num_classes)),
    # TODO(alanchiao): fuse softmax once we've handled it.
    l.Softmax(),
])

model = quantize.quantize_apply(model)

# Dump graph to /tmp for verification on tensorboard.
graph_def = tf.get_default_graph().as_graph_def()
with open('/tmp/mnist_model.pbtxt', 'w') as f:
    f.write(str(graph_def))

model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,

Пример #15

0

Показать файл

Файл: cluster_preserve_integration_test.py Проект: tensorflow/model-optimization

    def testTrainableWeightsBehaveCorrectlyDuringPCQAT(self):
        """PCQAT zero centroid masks stay the same and trainable variables are updating between epochs."""
        preserve_sparsity = True
        clustered_model = self._get_clustered_model(preserve_sparsity)
        clustered_model = cluster.strip_clustering(clustered_model)

        # Apply PCQAT
        quant_aware_annotate_model = (
            quantize.quantize_annotate_model(clustered_model))

        quant_aware_model = quantize.quantize_apply(
            quant_aware_annotate_model,
            scheme=default_8bit_cluster_preserve_quantize_scheme.
            Default8BitClusterPreserveQuantizeScheme(True))

        quant_aware_model.compile(
            loss=tf.keras.losses.categorical_crossentropy,
            optimizer='adam',
            metrics=['accuracy'],
        )

        class CheckCentroidsAndTrainableVarsCallback(
                tf.keras.callbacks.Callback):
            """Check the updates of trainable variables and centroid masks."""
            def on_epoch_begin(self, batch, logs=None):
                # Check cluster centroids have the zero in the right position
                vars_dictionary = self.model.layers[1]._weight_vars[0][2]
                self.centroid_mask = vars_dictionary['centroids_mask']
                self.zero_centroid_index_begin = np.where(
                    self.centroid_mask == 0)[0]

                # Check trainable weights before training
                self.layer_kernel = (self.model.layers[1].weights[3].numpy())
                self.original_weight = vars_dictionary[
                    'ori_weights_vars_tf'].numpy()
                self.centroids = vars_dictionary['cluster_centroids_tf'].numpy(
                )

            def on_epoch_end(self, batch, logs=None):
                # Check the index of the zero centroids are not changed after training
                vars_dictionary = self.model.layers[1]._weight_vars[0][2]
                self.zero_centroid_index_end = np.where(
                    vars_dictionary['centroids_mask'] == 0)[0]
                assert np.array_equal(self.zero_centroid_index_begin,
                                      self.zero_centroid_index_end)

                # Check trainable variables after training are updated
                assert not np.array_equal(
                    self.layer_kernel, self.model.layers[1].weights[3].numpy())
                assert not np.array_equal(
                    self.original_weight,
                    vars_dictionary['ori_weights_vars_tf'].numpy())
                assert not np.array_equal(
                    self.centroids,
                    vars_dictionary['cluster_centroids_tf'].numpy())

        # Use many epochs to verify layer's kernel weights are updating because
        # they can stay the same after being trained using only the first batch
        # of data for instance
        quant_aware_model.fit(
            np.random.rand(20, 10),
            tf.keras.utils.to_categorical(np.random.randint(5, size=(20, 1)),
                                          5),
            steps_per_epoch=5,
            epochs=3,
            callbacks=[CheckCentroidsAndTrainableVarsCallback()])

Пример #16

0

Показать файл

Файл: Quantizer.py Проект: alessandroaimar/doctoral_thesis

def apply_quantization(model,
                       pruning_policy=None,
                       weight_precision=None,
                       activation_precision=None,
                       activation_margin=None):
    # assert quantize_base.SET_CUSTOM_TNH_FLAG, log.info("TFMOD needs to be modified with quantizer disabled for proper "
    #                                                    "running")

    if weight_precision is not None:
        global _WEIGHTS_NUM_BITS  # need to declare when you want to change the value
        _WEIGHTS_NUM_BITS = weight_precision

    if activation_precision is not None:
        global _ACTIV_NUM_BITS
        _ACTIV_NUM_BITS = activation_precision

    if activation_margin is not None:
        global _ACTIV_MARGIN
        _ACTIV_MARGIN = activation_margin

    log.info(
        "Weights num bits: {} - Activ num bits: {} - Activ margin: {}".format(
            _WEIGHTS_NUM_BITS, _ACTIV_NUM_BITS, _ACTIV_MARGIN))

    # Helper function uses `quantize_annotate_layer` to annotate that only the
    # Dense layers should be quantized.
    def add_quantize_annotation(layer):
        # create new layer to break link with old model
        try:
            layer = layer.__class__.from_config(layer.get_config())
        except:
            pass

        for layer_type in quantization_map:

            if isinstance(layer, layer_type):

                if isinstance(pruning_policy, float) or pruning_policy is None:
                    layer_pruning = pruning_policy
                elif isinstance(pruning_policy, dict):
                    layer_pruning = pruning_policy[layer.name]
                else:
                    raise ValueError("Illegal layer pruning policy {}".format(
                        pruning_policy))

                quantize_config = BFPQuantizeConfig(
                    pruning_policy=layer_pruning)

                log.info(
                    "**Quantization annotation added to layer {} of type {} with {}"
                    .format(layer.name, layer_type, quantize_config))

                quantized_layer = quantize_annotate_layer(
                    to_annotate=layer, quantize_config=quantize_config)
                return quantized_layer
        log.info("**Quantization annotation not added to layer {} of type {}".
                 format(layer.name, type(layer)))

        return layer

    # Use `tf.keras.models.clone_model` to apply `add_quantize_annotation`
    # to the layers of the model.
    log.info("Annotating model {}".format(model.name))

    tf.keras.backend.clear_session()
    annotated_model = tf.keras.models.clone_model(
        model,
        clone_function=add_quantize_annotation,
    )

    with quantize_scope({
            'BFPQuantizeConfig': BFPQuantizeConfig,
            "BFPActivQuantizer": BFPActivQuantizer,
            "BFPWeightQuantizer": BFPWeightQuantizer,
            "BFPBiasQuantizer": BFPBiasQuantizer,
            "PolynomialDecay": PolynomialDecay
    }):
        # Use `quantize_apply` to actually make the model quantization aware.
        quant_aware_model = quantize_apply(annotated_model)

    for q_layer in quant_aware_model.layers:
        if isinstance(q_layer, QuantizeWrapper):
            for quant_type in quantization_map:
                if isinstance(q_layer.layer, quant_type):
                    original_name = q_layer.name.replace("quant_", "")
                    old_layer = model.get_layer(original_name)

                    q_weights = q_layer.get_weights()
                    orig_weights = old_layer.get_weights()

                    q_weights[0] = orig_weights[0]
                    try:
                        q_weights[1] = orig_weights[1]
                    except IndexError:
                        pass
                    q_layer.set_weights(q_weights)

    return quant_aware_model

Пример #17

0

Показать файл

Файл: dropin_base.py Проект: altostratous/dnnfault

    def quantize(self):
        model = self.get_model()

        def apply_quantization_to_dense(layer):
            if isinstance(layer, tf.keras.layers.Dense) or isinstance(
                    layer, tf.keras.layers.Conv2D):
                return quantize_annotate_layer(layer)
            return layer

        # Use `tf.keras.models.clone_model` to apply `apply_quantization_to_dense`
        # to the layers of the model.
        annotated_model = tf.keras.models.clone_model(
            model,
            clone_function=apply_quantization_to_dense,
        )

        # Now that the Dense layers are annotated,
        # `quantize_apply` actually makes the model quantization aware.
        quant_aware_model = quantize_apply(annotated_model)
        quant_aware_model.summary()
        (train_images,
         train_labels), (test_images,
                         test_labels) = self.get_dataset().load_data()
        self.compile_model(quant_aware_model)
        self.compile_model(model)
        CLASS_NAMES = [
            'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
            'horse', 'ship', 'truck'
        ]

        for x, y in tf.data.Dataset.from_tensor_slices(
            (test_images, test_labels)).map(self.process_images).shuffle(
                buffer_size=1024).take(10).batch(1):
            plt.title(CLASS_NAMES[y[0][0]])
            plt.imshow(x[0])
            print([
                CLASS_NAMES[i]
                for i in tf.argsort(quant_aware_model.predict(x))[0]
            ])
            print([CLASS_NAMES[i] for i in tf.argsort(model.predict(x))[0]])
            plt.show()
        exit()
        converter = tf.lite.TFLiteConverter.from_keras_model(model)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        (train_images,
         train_labels), (test_images,
                         test_labels) = self.get_dataset().load_data()
        train_images, train_labels = train_images[5000:], train_labels[5000:]

        def representative_dataset():
            for data, label in tf.data.Dataset.from_tensor_slices(
                (train_images,
                 train_labels)).map(self.process_images).batch(1).take(100):
                yield [tf.cast(data, tf.float32)]

        converter.representative_dataset = representative_dataset
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
        ]
        converter.inference_input_type = tf.int8  # or tf.uint8
        converter.inference_output_type = tf.int8  # or tf.uint8
        tflite_quant_model = converter.convert()
        logger.info('Model quantized for tensorflow lite successfully')

Python quantize_apply примеры использования