Пример #1
0
    def test_quantize_simple_rnn_export(self):
        """ Test model export for recurrent models """
        tf.reset_default_graph()

        sess = tf.Session()
        np.random.seed(0)
        tf.set_random_seed(0)

        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(3, 100))

            # Add an RNN layer with 12 internal units.
            x = tf.keras.layers.SimpleRNN(10,
                                          name='rnn1',
                                          return_sequences=True)(inputs)
            x = tf.keras.layers.SimpleRNN(10, name='rnn2')(x)

            _ = tf.keras.layers.Dense(10, activation=tf.nn.softmax,
                                      name="fc")(x)

            init = tf.global_variables_initializer()
            sess.run(init)

        sim = QuantizationSimModel(sess, ['input_1'], ['fc/Softmax'],
                                   use_cuda=False)

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name('fc/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            dummy_input = np.random.randn(1, 3, 100)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)
        sim.export('./data', 'rnn_quantsim')

        new_sess = load_model_from_meta('./data/rnn_quantsim.meta')

        dummy_forward_pass(new_sess, None)

        all_op_types = [op.type for op in new_sess.graph.get_operations()]
        self.assertNotIn('QcQuantize', all_op_types)
        self.assertNotIn('QcQuantizeRecurrentParam', all_op_types)

        # Load the encodings file to check if the encodings were exported correctly
        with open("./data/rnn_quantsim.encodings", "r") as encodings_file:
            encodings = json.load(encodings_file)
            self.assertEqual(8, len(encodings['activation_encodings']))
            self.assertEqual(5, len(encodings['param_encodings']))

        # close tf sessions
        sess.close()
        sim.session.close()
        del sim
Пример #2
0
    def test_quantize_simple_rnn_save_and_load_checkpoint(self):
        """ Test model export for recurrent models """
        tf.reset_default_graph()

        sess = tf.Session()
        np.random.seed(0)
        tf.set_random_seed(0)

        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(3, 100))

            # Add an RNN layer with 12 internal units.
            x = tf.keras.layers.SimpleRNN(10,
                                          name='rnn1',
                                          return_sequences=True)(inputs)
            x = tf.keras.layers.SimpleRNN(10, name='rnn2')(x)

            _ = tf.keras.layers.Dense(10, activation=tf.nn.softmax,
                                      name="fc")(x)

            init = tf.global_variables_initializer()
            sess.run(init)

        sim = QuantizationSimModel(sess, ['input_1'], ['fc/Softmax'],
                                   use_cuda=False)

        def eval(sess, input_tensor):
            model_output = sess.graph.get_tensor_by_name('fc/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            out = sess.run(model_output, feed_dict={model_input: input_tensor})
            return out

        def dummy_forward_pass(sess, args):
            dummy_input = np.random.randn(1, 3, 100)
            eval(sess, dummy_input)

        sim.compute_encodings(dummy_forward_pass, None)
        random_tensor = np.random.randn(1, 3, 100)
        old_out = eval(sim.session, random_tensor)

        save_checkpoint(sim, './data/', 'simple_rnn_save')
        new_sim = load_checkpoint('./data', 'simple_rnn_save')

        # Check to make sure that inference through the new sim produces exactly the same output as the old sim
        # This checks that quantization parameters have been restored correctly
        # Also checks that we are able to invoke quantize-dequantize ops in the new session (so pymo objects were
        # restored correctly etc.)
        new_out = eval(new_sim.session, random_tensor)
        self.assertTrue(np.allclose(old_out, new_out))
        sim.session.close()
        del sim
Пример #3
0
    def test_compute_encodings(self):
        """ Test that ops not evaluated during compute encodings are set to passThrough mode. """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        test_inp = np.ndarray((1, 32, 32, 3))

        def dummy_forward_func(sess, _):
            input_tensor = sess.graph.get_tensor_by_name('input_1:0')
            output_tensor = sess.graph.get_tensor_by_name('flatten/Reshape:0')
            sess.run(output_tensor, feed_dict={input_tensor: test_inp})

        with sess.as_default():
            _ = keras_model_functional()
            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)
            sim = QuantizationSimModel(sess, ['input_1'],
                                       ['keras_model_functional/Softmax'])
            sim.compute_encodings(dummy_forward_func, None)

            for name, quant_info in sim._activation_quantizers.items():
                if name in [
                        'keras_model_functional/Softmax_quantized',
                        'keras_model_functional/BiasAdd_quantized'
                ]:
                    # Check that quantizers after op evaluated in compute_encodings are in passThrough (3) mode
                    self.assertEqual(quant_info.get_op_mode(), 3)
                    self.assertFalse(
                        quant_info.tensor_quantizer.isEncodingValid)
                elif name in ['scope_1/conv2d_3/BiasAdd_quantized']:
                    # Check that passThrough quantizers remain as passThrough (3)
                    self.assertEqual(quant_info.get_op_mode(), 3)
                    self.assertFalse(
                        quant_info.tensor_quantizer.isEncodingValid)
                else:
                    # Check that all other quantizers are in quantizeDequantize (2) mode
                    self.assertEqual(quant_info.get_op_mode(), 2)
                    self.assertTrue(
                        quant_info.tensor_quantizer.isEncodingValid)

            input_tensor = sim.session.graph.get_tensor_by_name('input_1:0')
            output_tensor = sim.session.graph.get_tensor_by_name(
                'keras_model_functional/Softmax:0')
            sim.session.run(output_tensor, feed_dict={input_tensor: test_inp})
            sim.session.close()
            del sim
Пример #4
0
    def run_inference(self,
                      ckpt_path,
                      image_files,
                      labels,
                      enable_ema=True,
                      export_ckpt=None):
        """Build and run inference on the target images and labels."""
        label_offset = 1 if self.include_background_label else 0
        with tf.Graph().as_default():
            sess = tf.Session()
            images, labels = self.build_dataset(image_files, labels, False)
            probs = self.build_model(images, is_training=False)
            if isinstance(probs, tuple):
                probs = probs[0]

            if not self.ckpt_bn_folded:
                saver = tf.train.Saver()
                saver.restore(sess, ckpt_path)
            else:
                sess.run(tf.global_variables_initializer())

        # Fold all BatchNorms before QuantSim
        sess, folded_pairs = fold_all_batch_norms(sess, ['IteratorGetNext'],
                                                  ['logits'])

        if self.ckpt_bn_folded:
            with sess.graph.as_default():
                checkpoint = ckpt_path
                saver = tf.train.Saver()
                saver.restore(sess, checkpoint)

        sess.run('MakeIterator')

        # Define an eval function to use during compute encodings
        def eval_func(sess, iterations):
            sess.run('MakeIterator')
            for _ in range(iterations):
                out_probs = sess.run('Squeeze:0')

        # Select the right quant_scheme
        if self.quant_scheme == 'range_learning_tf':
            quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_init
        elif self.quant_scheme == 'range_learning_tf_enhanced':
            quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_enhanced_init
        elif self.quant_scheme == 'tf':
            quant_scheme = aimet_common.defs.QuantScheme.post_training_tf
        elif self.quant_scheme == 'tf_enhanced':
            quant_scheme = aimet_common.defs.QuantScheme.post_training_tf_enhanced
        else:
            raise ValueError("Got unrecognized quant_scheme: " +
                             self.quant_scheme)

        # Create QuantizationSimModel
        sim = QuantizationSimModel(
            session=sess,
            starting_op_names=['IteratorGetNext'],
            output_op_names=['logits'],
            quant_scheme=quant_scheme,
            rounding_mode=self.round_mode,
            default_output_bw=self.default_output_bw,
            default_param_bw=self.default_param_bw,
            config_file=self.quantsim_config_file,
        )

        # Run compute_encodings
        sim.compute_encodings(eval_func, forward_pass_callback_args=500)

        # Run final evaluation
        sess = sim.session
        sess.run('MakeIterator')
        prediction_idx = []
        prediction_prob = []
        for _ in range(len(image_files) // self.batch_size):
            out_probs = sess.run('Squeeze:0')
            idx = np.argsort(out_probs)[::-1]
            prediction_idx.append(idx[:5] - label_offset)
            prediction_prob.append([out_probs[pid] for pid in idx[:5]])

        # Return the top 5 predictions (idx and prob) for each image.
        return prediction_idx, prediction_prob
Пример #5
0
    def test_save_load_ckpt_after_compute_encoding_on_orig_object(self):
        """
        Create QuantSim for a CPU model, test save and load on a quantsim model
        when encodings have been computed on original quantsim object
        """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        def dummy_forward_pass(n_sess, args):
            model_output = n_sess.graph.get_tensor_by_name(model.output.name)
            model_output = model_output.consumers()[0].outputs[0]
            model_input = n_sess.graph.get_tensor_by_name(model.input.name)
            dummy_input = np.random.randn(20, 28, 28, 3)
            n_sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # save quantsim model
        save_checkpoint(sim, './test_3', 'orig_quantsim_model')

        new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model')

        # validations
        assert (sim is not new_quantsim)

        # as we have performed computeEncodings() on saved quantsim object, these must be set to True/False
        # in loaded quantsim object as on orig model
        for quantize_op in new_quantsim._param_quantizers:
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].tensor_quantizer.
                isEncodingValid == sim._param_quantizers[quantize_op].
                tensor_quantizer.isEncodingValid)
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_min) ==
                sim._param_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_min))
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_max) ==
                sim._param_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_max))

        for quantize_op in new_quantsim._activation_quantizers:
            self.assertTrue(new_quantsim._activation_quantizers[quantize_op].
                            tensor_quantizer.isEncodingValid ==
                            sim._activation_quantizers[quantize_op].
                            tensor_quantizer.isEncodingValid)
            self.assertTrue(
                new_quantsim._activation_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_min) ==
                sim._activation_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_min))
            self.assertTrue(
                new_quantsim._activation_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_max) ==
                sim._activation_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_max))

        # delete temp folder created and close sessions
        shutil.rmtree('./test_3')
        sess.close()
        sim.session.close()
        new_quantsim.session.close()
        del sim
        del new_quantsim
Пример #6
0
    def test_export_cpu_model(self):
        """
        Create QuantSim for a CPU model, compute encodings and export out a resulting model
        """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(model.output.name)
            model_output = model_output.consumers()[0].outputs[0]
            model_input = sess.graph.get_tensor_by_name(model.input.name)
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # Make some changes to model parameters to see if they are part of the exported model
        with sim.session.graph.as_default():
            first_bias_tensor = sim.session.graph.get_tensor_by_name(
                'conv2d/BiasAdd/ReadVariableOp:0')
            first_bias_tensor_val = sim.session.run(first_bias_tensor)
            self.assertTrue(np.any(first_bias_tensor_val == 0))
            first_bias_tensor_var = [
                var for var in tf.compat.v1.global_variables()
                if var.name == 'conv2d/bias:0'
            ][0]
            first_bias_tensor_var.load(np.ones(32), sim.session)

        all_op_types = [op.type for op in sim.session.graph.get_operations()]
        self.assertIn('QcQuantize', all_op_types)

        sim.export('/tmp', 'quant_sim_model')

        with open('/tmp/quant_sim_model.encodings') as json_file:
            encoding_data = json.load(json_file)
        activation_keys = list(encoding_data["activation_encodings"].keys())
        self.assertTrue(activation_keys[0] == "conv2d/Relu:0")
        self.assertTrue(
            isinstance(encoding_data["activation_encodings"]["conv2d/Relu:0"],
                       list))
        act_encoding_keys = encoding_data["activation_encodings"][
            "conv2d/Relu:0"][0].keys()
        self.assertTrue("bitwidth" in act_encoding_keys)
        self.assertTrue("is_symmetric" in act_encoding_keys)
        self.assertTrue("max" in act_encoding_keys)
        self.assertTrue("min" in act_encoding_keys)
        self.assertTrue("offset" in act_encoding_keys)
        self.assertTrue("scale" in act_encoding_keys)

        param_keys = list(encoding_data["param_encodings"].keys())
        self.assertTrue(param_keys[0] == "conv2d/Conv2D/ReadVariableOp:0")
        self.assertTrue(
            isinstance(
                encoding_data["param_encodings"]
                ["conv2d/Conv2D/ReadVariableOp:0"], list))
        param_encoding_keys = encoding_data["param_encodings"][
            "conv2d/Conv2D/ReadVariableOp:0"][0].keys()
        self.assertTrue("bitwidth" in param_encoding_keys)
        self.assertTrue("is_symmetric" in param_encoding_keys)
        self.assertTrue("max" in param_encoding_keys)
        self.assertTrue("min" in param_encoding_keys)
        self.assertTrue("offset" in param_encoding_keys)
        self.assertTrue("scale" in param_encoding_keys)

        new_sess = load_model_from_meta('/tmp/quant_sim_model.meta')
        first_bias_tensor = new_sess.graph.get_tensor_by_name(
            'conv2d/BiasAdd/ReadVariableOp:0')
        first_bias_tensor_val = new_sess.run(first_bias_tensor)
        self.assertTrue(np.any(first_bias_tensor_val == 1))

        all_op_types = [op.type for op in new_sess.graph.get_operations()]
        self.assertNotIn('QcQuantize', all_op_types)
        sess.close()
        sim.session.close()
        del sim
Пример #7
0
    def test_compute_encodings_quant_scheme_update(self):
        """
        Create QuantSim model and update quantScheme using property interface
        """

        tf.compat.v1.reset_default_graph()
        np.random.seed(0)
        tf.compat.v1.set_random_seed(0)

        with tf.device('/gpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=True)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))

        def dummy_forward_pass(sess, args):
            np.random.seed(0)
            tf.compat.v1.set_random_seed(0)
            model_output = sess.graph.get_tensor_by_name(
                'conv2d_1/Relu_quantized:0')
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        p_quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        old_p_encoding_min = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        old_p_encoding_max = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                         p_quantizer.quant_scheme)
        p_quantizer.quant_scheme = QuantScheme.post_training_tf
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF,
                         p_quantizer.quant_scheme)

        # invoke compute encoding after quantScheme update
        sim.compute_encodings(dummy_forward_pass, None)
        new_p_encoding_min = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        new_p_encoding_max = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        # validate
        self.assertNotEqual(old_p_encoding_min, new_p_encoding_min)
        self.assertNotEqual(old_p_encoding_max, new_p_encoding_max)

        sess.close()
        sim.session.close()
        del sim
Пример #8
0
    def test_compute_encodings_gpu_model(self):
        """
        Create QuantSim for a CPU model and test that activation encodings are computed
        """

        tf.compat.v1.reset_default_graph()
        with tf.device('/gpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=True)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')
        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(
                'conv2d_1/Relu_quantized:0')
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # Check if encodings have been calculated
        deactivated_quantizers = [
            'conv2d_input_quantized', 'conv2d/BiasAdd_quantized',
            'conv2d_1/BiasAdd_quantized'
        ]
        for name, quantizer in sim._activation_quantizers.items():
            if name in deactivated_quantizers:
                self.assertTrue(int(libpymo.TensorQuantizerOpMode.passThrough),
                                sim.session.run(name + '_op_mode/read:0'))
            else:
                self.assertTrue(
                    quantizer.tensor_quantizer.isEncodingValid,
                    "quantizer: {} does not have a valid encoding".format(
                        name))

        # Check that op-mode is set correctly
        # Check that quantized ops got added for all params
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')

        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        sess.close()
        sim.session.close()
        del sim
Пример #9
0
    def _save_to_keras_common_test_code(self, use_cuda):
        tf.compat.v1.reset_default_graph()
        if not use_cuda:
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()
        else:
            with tf.device('/cpu:0'):
                model = tf.keras.Sequential()
                model.add(
                    tf.keras.layers.Conv2D(32,
                                           kernel_size=3,
                                           input_shape=(28, 28, 3),
                                           activation='relu'))
                model.add(tf.keras.layers.MaxPooling2D((2, 2)))
                model.add(
                    tf.keras.layers.Conv2D(64,
                                           kernel_size=3,
                                           activation='relu'))
                model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=use_cuda)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')
        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        def dummy_forward_pass(sess, eval_tensor_name):
            model_output = sess.graph.get_tensor_by_name(eval_tensor_name)
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, 'conv2d_1/Relu_quantized:0')
        mod_sess = sim.save_to_keras()

        # Check 1: The new graph is well formed. Try forward pass through the graph.
        dummy_forward_pass(mod_sess, 'conv2d_1/Relu_quantized_static:0')

        # Check 2: All the QcQuantizeOp nodes have no output - meaning are disconnected from the main graph
        op_count = 0
        for op in mod_sess.graph.get_operations():
            if op.type == "QcQuantize":
                op_count += 1
                self.assertFalse(op.outputs[0].consumers())

        # Check 3: One QcQuantizeStatic for each QcQuantize op
        static_op_count = 0
        for op in mod_sess.graph.get_operations():
            if op.type == "QcQuantizeStatic":
                static_op_count += 1
        self.assertEqual(op_count, static_op_count)

        # Check 4: Make sure the attributes are set correctly
        op = mod_sess.graph.get_operation_by_name(
            "conv2d/Conv2D/ReadVariableOp_quantized_static")
        self.assertEqual(8, op.get_attr("bitwidth"))
        self.assertEqual(1, op.get_attr("quant_scheme"))  # TF-Enhanced
        self.assertEqual(1,
                         op.get_attr("op_mode"))  # oneShotQuantizeDequantize

        op = mod_sess.graph.get_operation_by_name(
            "conv2d/BiasAdd_quantized_static")
        self.assertEqual(3, op.get_attr("op_mode"))  # passThrough

        op = mod_sess.graph.get_operation_by_name(
            "conv2d/Relu_quantized_static")
        self.assertEqual(8, op.get_attr("bitwidth"))
        self.assertEqual(1, op.get_attr("quant_scheme"))  # TF-Enhanced
        self.assertEqual(2, op.get_attr("op_mode"))  # quantizeDequantize

        sess.close()
        sim.session.close()
        del sim
Пример #10
0
    def test_backward_pass_time_taken_lstm(self,
                                           is_quantized=True,
                                           iterations=1):
        """ perform backward pass with quantized lstm block"""

        tf.reset_default_graph()

        sess = tf.Session()
        np.random.seed(0)
        tf.set_random_seed(0)
        timesteps = 5
        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(timesteps, 100))

            # Add a lstm layer with 12 internal units.
            x = tf.keras.layers.LSTM(12)(inputs)

            _ = tf.keras.layers.Dense(10,
                                      activation=tf.nn.softmax,
                                      name="lstm_model")(x)

            init = tf.global_variables_initializer()
            sess.run(init)
        curr_sess = sess
        if is_quantized:
            sim = QuantizationSimModel(sess, ['input_1'],
                                       ['lstm_model/Softmax'],
                                       use_cuda=False)

            def dummy_forward_pass(sess, args):
                model_output = sess.graph.get_tensor_by_name(
                    'lstm_model/Softmax:0')
                model_input = sess.graph.get_tensor_by_name('input_1:0')
                dummy_input = np.random.randn(32, 5, 100)  # time_steps = 5
                sess.run(model_output, feed_dict={model_input: dummy_input})

            sim.compute_encodings(dummy_forward_pass, None)

            curr_sess = sim.session

        inp_tensor = curr_sess.graph.get_tensor_by_name('input_1:0')
        np.random.seed(0)
        w_shape = inp_tensor.shape
        batches = 32
        inp_data = np.random.rand(batches, w_shape[1], w_shape[2])
        logits = curr_sess.graph.get_tensor_by_name('lstm_model/MatMul:0')

        labels = np.random.randint(10, size=batches)
        one_hot_labels = np.eye(10)[labels]

        with curr_sess.graph.as_default():
            var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            labels_placeholder = tf.placeholder(tf.float32, [None, 10],
                                                name='labels')
            loss = tf.losses.softmax_cross_entropy(
                onehot_labels=labels_placeholder, logits=logits)

            update_ops = []
            global_step = tf.train.create_global_step()
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-3)
            gradients = optimizer.compute_gradients(loss, var_list)

            init_global = tf.global_variables_initializer()
            init_local = tf.local_variables_initializer()
            init = tf.group(init_global, init_local)
            curr_sess.run(init)

            grad_updates = optimizer.apply_gradients(gradients,
                                                     global_step=global_step)
            update_ops.append(grad_updates)
            update_op = tf.group(*update_ops)

            with tf.control_dependencies([update_op]):
                train_op = tf.identity(loss, name='train_op')

            # start training
            time_taken_by_default_grad = 0
            for i in range(iterations):
                start_time = time.perf_counter()
                _ = curr_sess.run(train_op,
                                  feed_dict={
                                      inp_tensor: inp_data,
                                      labels_placeholder: one_hot_labels
                                  })
                exec_time = time.perf_counter() - start_time
                time_taken_by_default_grad = time_taken_by_default_grad + exec_time

            default_grad_avg_time = time_taken_by_default_grad / iterations

        # close session
        sess.close()
        if is_quantized:
            sim.session.close()
            del sim

        return default_grad_avg_time
Пример #11
0
    def validate_simple_rnn_auto_insertion_and_forward_pass(self, sess):
        """
        common api to validate auto quant node insertion and forward pass for simple rnn layer
        :param sess: TensorFlow session
        :return:
        """

        np.random.seed(0)
        tf.set_random_seed(0)

        ops = sess.graph.get_operations()
        matmul_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul/ReadVariableOp_quantized"
        self.assertFalse(matmul_param_quant_op_inside_while_block_name in
                         [op.name for op in ops])
        # _ = tf.summary.FileWriter('./test_simple_rnn_keras', sess.graph)
        # construct a quantization sim model
        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['simplernn_model/Softmax'],
                                   use_cuda=False)

        # params that must have quantizers
        matmul_2_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul_1/ReadVariableOp_quantized"
        # check biasadd param quantizers are disabled
        param_quantizers = sim._param_quantizers
        for p_quantizer in param_quantizers.keys():
            if 'BiasAdd' in p_quantizer:
                p_quant_config = sim.quantizer_config(p_quantizer)
                self.assertFalse(p_quant_config.enabled)

        # activations with quantizers
        activation_bias_add_op_inside_while_block_name = "simple_rnn/while/BiasAdd_quantized"
        add_op_inside_while_block_name = "simple_rnn/while/add_quantized"

        # these should not have activation quantizers
        activation_matmul_op_inside_while_block_name = "simple_rnn/while/MatMul_quantized"
        activation_matmul_2_op_inside_while_block_name = "simple_rnn/while/MatMul_1_quantized"

        # get ops and make sure we have a quantized op added to the conditional block
        quantized_graph_op_names = self._get_quant_ops_from_tf_graph(
            sim.session.graph)

        # while block ops
        # bias and kernel quantizers
        self.assertTrue(matmul_param_quant_op_inside_while_block_name in
                        quantized_graph_op_names)
        self.assertTrue(matmul_2_param_quant_op_inside_while_block_name in
                        quantized_graph_op_names)

        # output quantizers
        self.assertFalse(activation_bias_add_op_inside_while_block_name in
                         quantized_graph_op_names)
        self.assertFalse(
            add_op_inside_while_block_name in quantized_graph_op_names)

        self.assertFalse(activation_matmul_op_inside_while_block_name in
                         quantized_graph_op_names)
        self.assertFalse(activation_matmul_2_op_inside_while_block_name in
                         quantized_graph_op_names)

        # check for input quantizers
        input_matmul_op_inside_while_block_name = "simple_rnn/while/TensorArrayReadV3_quantized"
        input_matmul_2_op_inside_while_block_name = "simple_rnn/while/Identity_2_quantized"
        self.assertTrue(input_matmul_op_inside_while_block_name in
                        quantized_graph_op_names)
        self.assertTrue(input_matmul_2_op_inside_while_block_name in
                        quantized_graph_op_names)

        # validate encodings
        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(
                'simplernn_model/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            dummy_input = np.random.randn(16, 3, 100)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        def eval(sess, input_tensor):
            model_output = sess.graph.get_tensor_by_name(
                'simplernn_model/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            out = sess.run(model_output, feed_dict={model_input: input_tensor})
            return out

        sim.compute_encodings(dummy_forward_pass, None)
        random_tensor = np.random.randn(16, 3, 100)
        orig_out = eval(sess, random_tensor)

        sim.compute_encodings(dummy_forward_pass, None)

        # check encoding min and max got updated
        with sim.session.graph.as_default():
            quantized_out = eval(sim.session, random_tensor)

        # check quantized output with orig output
        self.assertFalse(np.allclose(orig_out, quantized_out))

        # close tf sessions
        sess.close()
        sim.session.close()
        del sim
Пример #12
0
def run_evaluation(args):
    # Build graph definition
    with tf.Graph().as_default():
        # Create iterator
        tf_records = glob(args.dataset_dir + '/validation*')
        preprocessing_fn = preprocessing_factory.get_preprocessing(
            args.model_name, is_training=False)
        parse_function = wrap_preprocessing(preprocessing_fn,
                                            height=args.image_size,
                                            width=args.image_size,
                                            num_classes=(1001 -
                                                         args.labels_offset),
                                            labels_offset=args.labels_offset)

        dataset = tf.data.TFRecordDataset(tf_records).repeat(1)
        dataset = dataset.map(parse_function, num_parallel_calls=1).apply(
            tf.contrib.data.batch_and_drop_remainder(args.batch_size))
        iterator = dataset.make_initializable_iterator()
        images, labels = iterator.get_next()

        network_fn = nets_factory.get_network_fn(
            args.model_name,
            num_classes=(1001 - args.labels_offset),
            is_training=False)
        with tf.device('/cpu:0'):
            images = tf.placeholder_with_default(images,
                                                 shape=(None, args.image_size,
                                                        args.image_size, 3),
                                                 name='input')
            labels = tf.placeholder_with_default(labels,
                                                 shape=(None, 1001 -
                                                        args.labels_offset),
                                                 name='labels')
        logits, end_points = network_fn(images)
        confidences = tf.nn.softmax(logits, axis=1, name='confidences')
        categorical_preds = tf.argmax(confidences,
                                      axis=1,
                                      name='categorical_preds')
        categorical_labels = tf.argmax(labels,
                                       axis=1,
                                       name='categorical_labels')
        correct_predictions = tf.equal(categorical_labels, categorical_preds)
        top1_acc = tf.reduce_mean(tf.cast(correct_predictions, tf.float32),
                                  name='top1-acc')
        top5_acc = tf.reduce_mean(tf.cast(
            tf.nn.in_top_k(predictions=confidences,
                           targets=tf.cast(categorical_labels, tf.int32),
                           k=5), tf.float32),
                                  name='top5-acc')

        saver = tf.train.Saver()
        sess = tf.Session()

        # Load model from checkpoint
        if not args.ckpt_bn_folded:
            saver.restore(sess, args.checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

    # Fold all BatchNorms before QuantSim
    sess, folded_pairs = fold_all_batch_norms(sess, ['IteratorGetNext'],
                                              [logits.name[:-2]])

    if args.ckpt_bn_folded:
        with sess.graph.as_default():
            saver = tf.train.Saver()
            saver.restore(sess, args.checkpoint_path)
    else:
        # Do Cross Layer Equalization and Bias Correction if not loading from a batchnorm folded checkpoint
        sess = equalize_model(sess, ['input'], [logits.op.name])
        conv_bn_dict = BiasCorrection.find_all_convs_bn_with_activation(
            sess, ['input'], [logits.op.name])
        quant_params = QuantParams(quant_mode=args.quant_scheme)
        bias_correction_dataset = tf.data.TFRecordDataset(tf_records).repeat(1)
        bias_correction_dataset = bias_correction_dataset.map(
            lambda x: parse_function(x)[0], num_parallel_calls=1).apply(
                tf.contrib.data.batch_and_drop_remainder(args.batch_size))
        bias_correction_params = BiasCorrectionParams(
            batch_size=args.batch_size,
            num_quant_samples=10,
            num_bias_correct_samples=512,
            input_op_names=['input'],
            output_op_names=[logits.op.name])

        sess = BiasCorrection.correct_bias(
            reference_model=sess,
            bias_correct_params=bias_correction_params,
            quant_params=quant_params,
            data_set=bias_correction_dataset,
            conv_bn_dict=conv_bn_dict,
            perform_only_empirical_bias_corr=True)

    # Define eval_func to use for compute encodings in QuantSim
    def eval_func(session, iterations):
        cnt = 0
        avg_acc_top1 = 0
        session.run('MakeIterator')
        while cnt < iterations or iterations == -1:
            try:
                avg_acc_top1 += session.run('top1-acc:0')
                cnt += 1
            except:
                return avg_acc_top1 / cnt

        return avg_acc_top1 / cnt

    # Select the right quant_scheme
    if args.quant_scheme == 'range_learning_tf':
        quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_init
    elif args.quant_scheme == 'range_learning_tf_enhanced':
        quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_enhanced_init
    elif args.quant_scheme == 'tf':
        quant_scheme = aimet_common.defs.QuantScheme.post_training_tf
    elif args.quant_scheme == 'tf_enhanced':
        quant_scheme = aimet_common.defs.QuantScheme.post_training_tf_enhanced
    else:
        raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme)

    # Create QuantizationSimModel
    sim = QuantizationSimModel(
        session=sess,
        starting_op_names=['IteratorGetNext'],
        output_op_names=[logits.name[:-2]],
        quant_scheme=quant_scheme,
        rounding_mode=args.round_mode,
        default_output_bw=args.default_output_bw,
        default_param_bw=args.default_param_bw,
        config_file=args.quantsim_config_file,
    )

    # Run compute_encodings
    sim.compute_encodings(eval_func,
                          forward_pass_callback_args=args.encodings_iterations)

    # Run final evaluation
    sess = sim.session

    top1_acc = eval_func(sess, -1)
    print('Avg accuracy  Top 1: {}'.format(top1_acc))