def training_helper(sim, generator): """A Helper function to fine-tune MNIST model""" g = sim.session.graph sess = sim.session with g.as_default(): x = sim.session.graph.get_tensor_by_name("reshape_input:0") y = g.get_tensor_by_name("labels:0") fc1_w = g.get_tensor_by_name("dense_1/MatMul/ReadVariableOp:0") ce = g.get_tensor_by_name("xent:0") # Using Adam optimizer train_step = tf.compat.v1.train.AdamOptimizer( 1e-3, name="TempAdam").minimize(ce) graph_eval.initialize_uninitialized_vars(sess) # Input data for MNIST mnist = input_data.read_data_sets('./data', one_hot=True) # Using 100 iterations and batch of size 50 for i in range(100): batch = mnist.train.next_batch(50) sess.run([train_step, fc1_w], feed_dict={x: batch[0], y: batch[1]}) if i % 10 == 0: # Find accuracy of model every 10 iterations perf = graph_eval.evaluate_graph(sess, generator, ['accuracy'], graph_eval.default_eval_func, 1) print('Quantized performance: ' + str(perf * 100)) # close session sess.close()
def test_construction_cpu_model(self): """ Create QuantSim for a CPU model and check that quantizers have been added to the graph """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=False) # One run through the model to check if the ops got added correctly model_output = sess.graph.get_tensor_by_name( 'conv2d_1/BiasAdd_quantized:0') model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) # Check that quantized ops got added for all params quant_ops = [ op for op in sess.graph.get_operations() if op.type == 'QcQuantize' ] for op in quant_ops: print(op.name) self.assertEqual(10, len(quant_ops)) # Check that the quant ops are correctly connected in the graph self.assertEqual('Conv2D', quant_ops[0].outputs[0].consumers()[0].type) self.assertEqual('BiasAdd', quant_ops[1].outputs[0].consumers()[0].type) self.assertEqual(int(libpymo.TensorQuantizerOpMode.passThrough), sess.run(quant_ops[1].inputs[1])) # Check that op-mode is set correctly self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sess.run(quant_ops[0].inputs[1])) sess.close() sim.session.close() del sim
def test_skip_quantizing_dtype_int(self): """ Test that op with dtype int32 is skipped during quantization """ tf.compat.v1.reset_default_graph() with tf.compat.v1.Session() as sess: _ = model_with_dtype_int() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['input_1', 'input_2'], ['model_with_dtype_int/Softmax'], use_cuda=False) self.assertEqual(6, len(sim._activation_quantizers)) self.assertTrue( 'input_1_quantized' not in sim._activation_quantizers) self.assertTrue('input_2_quantized' in sim._activation_quantizers) sim.session.close() del sim
def _load_graph(graph, meta_graph, checkpoint): """ Load a TF graph given the meta and checkpoint files :param graph: Graph to load into :param meta_graph: Meta file :param checkpoint: Checkpoint file :return: Newly created tf.compat.v1.Session """ _log.info('Loading graph: %s', meta_graph) sess = tf.compat.v1.Session(graph=graph) # Open the graph and restore the parameters saver = tf.compat.v1.train.import_meta_graph(meta_graph) saver.restore(sess, checkpoint) # Initialize any uninitialized variables graph_eval.initialize_uninitialized_vars(sess) return sess, saver
def prune_model(self, layer_db: LayerDatabase, layer_comp_ratio_list: List[LayerCompRatioPair], cost_metric: CostMetric, trainer): # sort all the layers in layer_comp_ratio_list based on occurrence layer_comp_ratio_list = self._sort_on_occurrence( layer_db.model, layer_comp_ratio_list) # Copy the db comp_layer_db = copy.deepcopy(layer_db) current_sess = comp_layer_db.model # Dictionary to map original layer name to list of most recent pruned layer name and output mask. # Masks remain at the original length and specify channels winnowed after each round of winnower. orig_layer_name_to_pruned_name_and_mask_dict = {} # Dictionary to map most recent pruned layer name to the original layer name pruned_name_to_orig_name_dict = {} # List to hold original layers to reconstruct layers_to_reconstruct = [] detached_op_names = set() # Prune layers which have comp ratios less than 1 for layer_comp_ratio in layer_comp_ratio_list: orig_layer = layer_db.find_layer_by_name( layer_comp_ratio.layer.name) if layer_comp_ratio.comp_ratio is not None and layer_comp_ratio.comp_ratio < 1.0: # 1) channel selection prune_indices = self._select_inp_channels( orig_layer, layer_comp_ratio.comp_ratio) if not prune_indices: continue # 2) Winnowing the model current_sess, ordered_modules_list = winnow.winnow_tf_model( current_sess, self._input_op_names, self._output_op_names, [(orig_layer.module, prune_indices)], reshape=self._allow_custom_downsample_ops, in_place=True, verbose=False) if not ordered_modules_list: continue layers_to_reconstruct.append(orig_layer) # Update dictionaries with new info about pruned ops and new masks self._update_pruned_ops_and_masks_info( ordered_modules_list, orig_layer_name_to_pruned_name_and_mask_dict, pruned_name_to_orig_name_dict, detached_op_names) # Save and reload modified graph to allow changes to take effect # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during # winnow_tf_model, and all other newly winnowed ops are not. with current_sess.graph.as_default(): initialize_uninitialized_vars(current_sess) current_sess = save_and_load_graph('./saver', current_sess) comp_layer_db.update_database(current_sess, detached_op_names, update_model=True) # Perform reconstruction self._reconstruct_layers(layers_to_reconstruct, orig_layer_name_to_pruned_name_and_mask_dict, layer_db, comp_layer_db) return comp_layer_db
def test_spatial_svd_compress_auto_with_finetuning(self): """ End to end test with MNIST model following fine tuning :return: """ tf.compat.v1.set_random_seed(10) AimetLogger.set_level_for_all_areas(logging.DEBUG) # load the meta file meta_path = os.path.join('models', 'mnist_save.meta') sess = aimet_tensorflow.utils.graph_saver.load_model_from_meta( meta_path) # ignore first Conv2D op conv2d = sess.graph.get_operation_by_name('conv1/Conv2D') modules_to_ignore = [conv2d] greedy_params = aimet_common.defs.GreedySelectionParameters( target_comp_ratio=Decimal(0.5), num_comp_ratio_candidates=10, use_monotonic_fit=True, saved_eval_scores_dict=None) auto_params = aimet_tensorflow.defs.SpatialSvdParameters.AutoModeParams( greedy_select_params=greedy_params, modules_to_ignore=modules_to_ignore) params = aimet_tensorflow.defs.SpatialSvdParameters( input_op_names=['reshape_input'], output_op_names=['dense_1/BiasAdd'], mode=aimet_tensorflow.defs.SpatialSvdParameters.Mode.auto, params=auto_params, multiplicity=8) input_shape = (1, 1, 28, 28) compr_model_sess, stats = ModelCompressor.compress_model( sess=sess, working_dir=None, eval_callback=evaluate, eval_iterations=5, input_shape=input_shape, compress_scheme=aimet_common.defs.CompressionScheme.spatial_svd, cost_metric=aimet_common.defs.CostMetric.mac, parameters=params) print(stats) self.assertEqual(evaluate(compr_model_sess, 1, True), float(stats.compressed_model_accuracy)) all_ops = compr_model_sess.graph.get_operations() conv_ops = [op for op in all_ops if op.type == 'Conv2D'] self.assertEqual(len(conv_ops), 4) self.assertTrue( math.isclose(float(stats.mac_compression_ratio), 0.5, abs_tol=0.1)) # get the weights after fine tuning conv2d_1_a_op = compr_model_sess.graph.get_operation_by_name( 'conv2_a/Conv2D') conv2d_1_a_op_weights_before = conv2d_1_a_op.inputs[1].eval( session=compr_model_sess) # fine tune the model # get the input and validation place holders x = compr_model_sess.graph.get_tensor_by_name('reshape_input:0') y = compr_model_sess.graph.get_tensor_by_name('labels:0') cross_entropy = compr_model_sess.graph.get_tensor_by_name('xent:0') with compr_model_sess.graph.as_default(): # new optimizer and back propagation Op optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3, name='Adam_new') train_step = optimizer.minimize(loss=cross_entropy, name='train_step_new') # initialize only uninitialized variables # only needed when fine tuning, because we are adding new optimizer graph_eval.initialize_uninitialized_vars(compr_model_sess) mnist = input_data.read_data_sets(os.path.join(str('./'), 'data'), one_hot=True) for i in range(1): batch = mnist.train.next_batch(batch_size=32, shuffle=True) _, loss_val = compr_model_sess.run([train_step, cross_entropy], feed_dict={ x: batch[0], y: batch[1] }) # get the weights after fine tuning conv2d_1_a_op = compr_model_sess.graph.get_operation_by_name( 'conv2_a/Conv2D') conv2d_1_a_op_weights_after = conv2d_1_a_op.inputs[1].eval( session=compr_model_sess) # weight should be different after one iteration self.assertFalse( np.allclose(conv2d_1_a_op_weights_before, conv2d_1_a_op_weights_after)) # close original session sess.close() # close compressed model session compr_model_sess.close() # delete temp directory shutil.rmtree(str('./temp_meta/'))
def test_manual_quantize(self): """ Test quantizing a model by manually specifying ops to quantize """ def get_manual_activations(_graph, _starting_ops, _ending_ops): """ Overriding function for getting a list of ops to insert activation quantizers for :param _graph: Unused argument :param _starting_ops: Unused argument :param _ending_ops: Unused argument :return: List of ops to insert activation quantizers for, None for placeholder """ return ['conv2d/Relu'], None def get_manual_params(_graph, _starting_ops, _ending_ops): """ Overriding function for getting a list of ops to insert param quantizers for :param _graph: Unused argument :param _starting_ops: Unused argument :param _ending_ops: Unused argument :return: List of ops to insert param quantizers for, and list of param indices for these ops """ return ['conv2d_1/Conv2D'], [1] def configure_quantization_ops(self, _conn_graph, _ops_with_param_names, _indices, _activation_op_names, _config_file): """ Overriding function for configuring quantization ops inserted by QuantizationSimModel :param self: Self refers to QuantizationSimModel object :param _conn_graph: Unused argument :param _ops_with_param_names: Unused argument :param _indices: Unused argument :param _activation_op_names: Unused argument :param _config_file: Unused argument """ conv2d_relu_quant_info = self._activation_quantizers[ 'conv2d/Relu_quantized'] conv2d_relu_quant_info.enabled = False conv2d_relu_quant_info.enabled = True conv2d_1_weight_quant_info = self._param_quantizers[ 'conv2d_1/Conv2D/ReadVariableOp_quantized'] conv2d_1_weight_quant_info.enabled = False conv2d_1_weight_quant_info.enabled = True tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) orig_get_ops_to_quantize_activations_for = QuantizationSimModel._get_ops_to_quantize_activations_for orig_get_ops_to_quantize_weights_for = QuantizationSimModel._get_ops_to_quantize_params_for orig_configure_quantization_ops = QuantizationSimModel.configure_quantization_ops QuantizationSimModel._get_ops_to_quantize_activations_for = get_manual_activations QuantizationSimModel._get_ops_to_quantize_params_for = get_manual_params QuantizationSimModel.configure_quantization_ops = configure_quantization_ops sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=False) self.assertEqual(1, len(sim._activation_quantizers)) self.assertEqual(1, len(sim._param_quantizers)) sess.close() sim.session.close() QuantizationSimModel._get_ops_to_quantize_activations_for = orig_get_ops_to_quantize_activations_for QuantizationSimModel._get_ops_to_quantize_params_for = orig_get_ops_to_quantize_weights_for QuantizationSimModel.configure_quantization_ops = orig_configure_quantization_ops sim.session.close() del sim
def test_set_get_quantizer_params_using_properties(self): """ Create QuantSim for a CPU model, test param read and write using properties """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) p_quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') o_quantizer = sim.quantizer_config('conv2d/Relu_quantized') bias_quantizer = sim.quantizer_config( 'conv2d/BiasAdd/ReadVariableOp_quantized') # check if __str__ can print the object info print(p_quantizer) bitwidth = p_quantizer.bitwidth self.assertEqual(8, bitwidth) p_quantizer.bitwidth = 6 bitwidth = p_quantizer.bitwidth self.assertEqual(6, bitwidth) bitwidth = o_quantizer.bitwidth self.assertEqual(8, bitwidth) o_quantizer.bitwidth = 6 bitwidth = o_quantizer.bitwidth self.assertEqual(6, bitwidth) sym_encoding = bias_quantizer.use_symmetric_encoding self.assertFalse(sym_encoding) bias_quantizer.use_symmetric_encoding = True sym_encoding = bias_quantizer.use_symmetric_encoding self.assertTrue(sym_encoding) rounding_mode = o_quantizer.rounding_mode self.assertEqual(libpymo.RoundingMode.ROUND_NEAREST, rounding_mode) o_quantizer.rounding_mode = libpymo.RoundingMode.ROUND_STOCHASTIC rounding_mode = o_quantizer.rounding_mode self.assertEqual(libpymo.RoundingMode.ROUND_STOCHASTIC, rounding_mode) quant_scheme = o_quantizer.quant_scheme self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED, quant_scheme) o_quantizer.quant_scheme = QuantScheme.post_training_tf quant_scheme = o_quantizer.quant_scheme self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF, quant_scheme) self.assertFalse(o_quantizer.tensor_quantizer.isEncodingValid) is_enabled = p_quantizer.enabled self.assertTrue(is_enabled) p_quantizer.enabled = False is_enabled = p_quantizer.enabled self.assertFalse(is_enabled) sim.session.close() del sim
def test_save_load_ckpt_after_compute_encoding_on_orig_object(self): """ Create QuantSim for a CPU model, test save and load on a quantsim model when encodings have been computed on original quantsim object """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) def dummy_forward_pass(n_sess, args): model_output = n_sess.graph.get_tensor_by_name(model.output.name) model_output = model_output.consumers()[0].outputs[0] model_input = n_sess.graph.get_tensor_by_name(model.input.name) dummy_input = np.random.randn(20, 28, 28, 3) n_sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) # save quantsim model save_checkpoint(sim, './test_3', 'orig_quantsim_model') new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model') # validations assert (sim is not new_quantsim) # as we have performed computeEncodings() on saved quantsim object, these must be set to True/False # in loaded quantsim object as on orig model for quantize_op in new_quantsim._param_quantizers: self.assertTrue( new_quantsim._param_quantizers[quantize_op].tensor_quantizer. isEncodingValid == sim._param_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) self.assertTrue( new_quantsim._param_quantizers[quantize_op]. get_variable_from_op(QuantizeOpIndices.encoding_min) == sim._param_quantizers[quantize_op].get_variable_from_op( QuantizeOpIndices.encoding_min)) self.assertTrue( new_quantsim._param_quantizers[quantize_op]. get_variable_from_op(QuantizeOpIndices.encoding_max) == sim._param_quantizers[quantize_op].get_variable_from_op( QuantizeOpIndices.encoding_max)) for quantize_op in new_quantsim._activation_quantizers: self.assertTrue(new_quantsim._activation_quantizers[quantize_op]. tensor_quantizer.isEncodingValid == sim._activation_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) self.assertTrue( new_quantsim._activation_quantizers[quantize_op]. get_variable_from_op(QuantizeOpIndices.encoding_min) == sim._activation_quantizers[quantize_op].get_variable_from_op( QuantizeOpIndices.encoding_min)) self.assertTrue( new_quantsim._activation_quantizers[quantize_op]. get_variable_from_op(QuantizeOpIndices.encoding_max) == sim._activation_quantizers[quantize_op].get_variable_from_op( QuantizeOpIndices.encoding_max)) # delete temp folder created and close sessions shutil.rmtree('./test_3') sess.close() sim.session.close() new_quantsim.session.close() del sim del new_quantsim
def test_save_load_ckpt_cpu_model(self): """ Create QuantSim for a CPU model, test save and load on a quantsim model. """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) # save quantsim model save_checkpoint(sim, './test_3', 'orig_quantsim_model') new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model') # validations assert (sim is not new_quantsim) self.assertTrue(new_quantsim.session is not None) self.assertTrue(new_quantsim._quant_scheme == sim._quant_scheme) self.assertTrue(new_quantsim._rounding_mode == sim._rounding_mode) self.assertTrue(new_quantsim._use_cuda == sim._use_cuda) self.assertTrue( len(new_quantsim._param_quantizers) == len(sim._param_quantizers)) self.assertTrue( len(new_quantsim._activation_quantizers) == len( sim._activation_quantizers)) for quantize_op in new_quantsim._param_quantizers: self.assertFalse( sim._param_quantizers[quantize_op].session == new_quantsim._param_quantizers[quantize_op].session) self.assertTrue( sim._param_quantizers[quantize_op].tensor_quantizer. getQuantScheme() == new_quantsim._param_quantizers[quantize_op] .tensor_quantizer.getQuantScheme()) self.assertTrue( sim._param_quantizers[quantize_op].tensor_quantizer. roundingMode == new_quantsim._param_quantizers[quantize_op]. tensor_quantizer.roundingMode) self.assertFalse(sim._param_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) self.assertFalse(new_quantsim._param_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) for quantize_op in new_quantsim._activation_quantizers: self.assertFalse( sim._activation_quantizers[quantize_op].session == new_quantsim._activation_quantizers[quantize_op].session) self.assertTrue(sim._activation_quantizers[quantize_op]. tensor_quantizer.getQuantScheme() == new_quantsim._activation_quantizers[quantize_op]. tensor_quantizer.getQuantScheme()) self.assertTrue(sim._activation_quantizers[quantize_op]. tensor_quantizer.roundingMode == new_quantsim._activation_quantizers[quantize_op]. tensor_quantizer.roundingMode) self.assertFalse(sim._activation_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) self.assertFalse(new_quantsim._activation_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) # remove the old quant sim reference and session # to test that everything is loaded correctly on new quantsim including tensor quantizer references sim.session.close() del sim # delete temp folder created and close sessions shutil.rmtree('./test_3') sess.close() new_quantsim.session.close() del new_quantsim
def test_export_cpu_model(self): """ Create QuantSim for a CPU model, compute encodings and export out a resulting model """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name(model.output.name) model_output = model_output.consumers()[0].outputs[0] model_input = sess.graph.get_tensor_by_name(model.input.name) dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) # Make some changes to model parameters to see if they are part of the exported model with sim.session.graph.as_default(): first_bias_tensor = sim.session.graph.get_tensor_by_name( 'conv2d/BiasAdd/ReadVariableOp:0') first_bias_tensor_val = sim.session.run(first_bias_tensor) self.assertTrue(np.any(first_bias_tensor_val == 0)) first_bias_tensor_var = [ var for var in tf.compat.v1.global_variables() if var.name == 'conv2d/bias:0' ][0] first_bias_tensor_var.load(np.ones(32), sim.session) all_op_types = [op.type for op in sim.session.graph.get_operations()] self.assertIn('QcQuantize', all_op_types) sim.export('/tmp', 'quant_sim_model') with open('/tmp/quant_sim_model.encodings') as json_file: encoding_data = json.load(json_file) activation_keys = list(encoding_data["activation_encodings"].keys()) self.assertTrue(activation_keys[0] == "conv2d/Relu:0") self.assertTrue( isinstance(encoding_data["activation_encodings"]["conv2d/Relu:0"], list)) act_encoding_keys = encoding_data["activation_encodings"][ "conv2d/Relu:0"][0].keys() self.assertTrue("bitwidth" in act_encoding_keys) self.assertTrue("is_symmetric" in act_encoding_keys) self.assertTrue("max" in act_encoding_keys) self.assertTrue("min" in act_encoding_keys) self.assertTrue("offset" in act_encoding_keys) self.assertTrue("scale" in act_encoding_keys) param_keys = list(encoding_data["param_encodings"].keys()) self.assertTrue(param_keys[0] == "conv2d/Conv2D/ReadVariableOp:0") self.assertTrue( isinstance( encoding_data["param_encodings"] ["conv2d/Conv2D/ReadVariableOp:0"], list)) param_encoding_keys = encoding_data["param_encodings"][ "conv2d/Conv2D/ReadVariableOp:0"][0].keys() self.assertTrue("bitwidth" in param_encoding_keys) self.assertTrue("is_symmetric" in param_encoding_keys) self.assertTrue("max" in param_encoding_keys) self.assertTrue("min" in param_encoding_keys) self.assertTrue("offset" in param_encoding_keys) self.assertTrue("scale" in param_encoding_keys) new_sess = load_model_from_meta('/tmp/quant_sim_model.meta') first_bias_tensor = new_sess.graph.get_tensor_by_name( 'conv2d/BiasAdd/ReadVariableOp:0') first_bias_tensor_val = new_sess.run(first_bias_tensor) self.assertTrue(np.any(first_bias_tensor_val == 1)) all_op_types = [op.type for op in new_sess.graph.get_operations()] self.assertNotIn('QcQuantize', all_op_types) sess.close() sim.session.close() del sim
def test_compute_encodings_quant_scheme_update(self): """ Create QuantSim model and update quantScheme using property interface """ tf.compat.v1.reset_default_graph() np.random.seed(0) tf.compat.v1.set_random_seed(0) with tf.device('/gpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=True) # Check that op-mode is set correctly conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) def dummy_forward_pass(sess, args): np.random.seed(0) tf.compat.v1.set_random_seed(0) model_output = sess.graph.get_tensor_by_name( 'conv2d_1/Relu_quantized:0') model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) p_quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') old_p_encoding_min = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) old_p_encoding_max = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED, p_quantizer.quant_scheme) p_quantizer.quant_scheme = QuantScheme.post_training_tf self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF, p_quantizer.quant_scheme) # invoke compute encoding after quantScheme update sim.compute_encodings(dummy_forward_pass, None) new_p_encoding_min = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) new_p_encoding_max = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) # validate self.assertNotEqual(old_p_encoding_min, new_p_encoding_min) self.assertNotEqual(old_p_encoding_max, new_p_encoding_max) sess.close() sim.session.close() del sim
def test_compute_encodings_gpu_model(self): """ Create QuantSim for a CPU model and test that activation encodings are computed """ tf.compat.v1.reset_default_graph() with tf.device('/gpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=True) # Check that op-mode is set correctly conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') conv2d_output_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Relu_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats), sim.session.run(conv2d_output_quant_op.inputs[1])) def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name( 'conv2d_1/Relu_quantized:0') model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) # Check if encodings have been calculated deactivated_quantizers = [ 'conv2d_input_quantized', 'conv2d/BiasAdd_quantized', 'conv2d_1/BiasAdd_quantized' ] for name, quantizer in sim._activation_quantizers.items(): if name in deactivated_quantizers: self.assertTrue(int(libpymo.TensorQuantizerOpMode.passThrough), sim.session.run(name + '_op_mode/read:0')) else: self.assertTrue( quantizer.tensor_quantizer.isEncodingValid, "quantizer: {} does not have a valid encoding".format( name)) # Check that op-mode is set correctly # Check that quantized ops got added for all params conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') conv2d_output_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Relu_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize), sim.session.run(conv2d_output_quant_op.inputs[1])) sess.close() sim.session.close() del sim
def _save_to_keras_common_test_code(self, use_cuda): tf.compat.v1.reset_default_graph() if not use_cuda: model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() else: with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=use_cuda) # Check that op-mode is set correctly conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') conv2d_output_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Relu_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats), sim.session.run(conv2d_output_quant_op.inputs[1])) def dummy_forward_pass(sess, eval_tensor_name): model_output = sess.graph.get_tensor_by_name(eval_tensor_name) model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, 'conv2d_1/Relu_quantized:0') mod_sess = sim.save_to_keras() # Check 1: The new graph is well formed. Try forward pass through the graph. dummy_forward_pass(mod_sess, 'conv2d_1/Relu_quantized_static:0') # Check 2: All the QcQuantizeOp nodes have no output - meaning are disconnected from the main graph op_count = 0 for op in mod_sess.graph.get_operations(): if op.type == "QcQuantize": op_count += 1 self.assertFalse(op.outputs[0].consumers()) # Check 3: One QcQuantizeStatic for each QcQuantize op static_op_count = 0 for op in mod_sess.graph.get_operations(): if op.type == "QcQuantizeStatic": static_op_count += 1 self.assertEqual(op_count, static_op_count) # Check 4: Make sure the attributes are set correctly op = mod_sess.graph.get_operation_by_name( "conv2d/Conv2D/ReadVariableOp_quantized_static") self.assertEqual(8, op.get_attr("bitwidth")) self.assertEqual(1, op.get_attr("quant_scheme")) # TF-Enhanced self.assertEqual(1, op.get_attr("op_mode")) # oneShotQuantizeDequantize op = mod_sess.graph.get_operation_by_name( "conv2d/BiasAdd_quantized_static") self.assertEqual(3, op.get_attr("op_mode")) # passThrough op = mod_sess.graph.get_operation_by_name( "conv2d/Relu_quantized_static") self.assertEqual(8, op.get_attr("bitwidth")) self.assertEqual(1, op.get_attr("quant_scheme")) # TF-Enhanced self.assertEqual(2, op.get_attr("op_mode")) # quantizeDequantize sess.close() sim.session.close() del sim
def test_reducing_inceptionV3(self): """ Test module reduction in inceptionV3 """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() module_zero_channels_list = [] _ = InceptionV3(weights=None) init = tf.compat.v1.global_variables_initializer() sess.run(init) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2d_12/Conv2D") input_channels_to_winnow = [0, 1, 64, 128, 224] module_mask_pair = (tf_op, input_channels_to_winnow) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2d_13/Conv2D") input_channels_to_winnow_1 = [0, 64, 65, 66, 128, 224] module_mask_pair = (tf_op, input_channels_to_winnow_1) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2d_15/Conv2D") input_channels_to_winnow_2 = [0, 64, 128, 129, 130, 131, 224] module_mask_pair = (tf_op, input_channels_to_winnow_2) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2d_18/Conv2D") input_channels_to_winnow_3 = [0, 64, 128, 224, 225, 226, 227, 228] module_mask_pair = (tf_op, input_channels_to_winnow_3) module_zero_channels_list.append(module_mask_pair) input_op_names = ["input_1"] output_op_names = ['predictions/Softmax'] new_sess, ordered_modules_list = winnow.winnow_tf_model( sess, input_op_names, output_op_names, module_zero_channels_list, reshape=True, in_place=True, verbose=True) # Save and reload modified graph to allow changes to take effect # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during # winnow_tf_model, and all other newly winnowed ops are not. with new_sess.graph.as_default(): initialize_uninitialized_vars(new_sess) new_sess = save_and_load_graph('./saver', new_sess) # _ = tf.compat.v1.summary.FileWriter('./reduced_graph', new_sess.graph) with new_sess.graph.as_default(): inp = tf.random.uniform(shape=(1, 299, 299, 3)) inp_array = inp.eval(session=new_sess) model_input = new_sess.graph.get_tensor_by_name("input_1:0") model_output = new_sess.graph.get_tensor_by_name( "predictions/Softmax:0") # check that reduced tensor shapes are as expected reduced_conv2d_12_input = new_sess.graph.get_operation_by_name( "reduced_conv2d_12/Conv2D").inputs[0] reduced_conv2d_13_input = new_sess.graph.get_operation_by_name( "reduced_conv2d_13/Conv2D").inputs[0] reduced_conv2d_15_input = new_sess.graph.get_operation_by_name( "reduced_conv2d_15/Conv2D").inputs[0] reduced_conv2d_18_input = new_sess.graph.get_operation_by_name( "reduced_conv2d_18/Conv2D").inputs[0] reduced_conv2d_5_output = new_sess.graph.get_tensor_by_name( "reduced_conv2d_5/Conv2D:0") reduced_conv2d_7_output = new_sess.graph.get_tensor_by_name( "reduced_conv2d_7/Conv2D:0") reduced_conv2d_10_output = new_sess.graph.get_tensor_by_name( "reduced_conv2d_10/Conv2D:0") reduced_conv2d_11_output = new_sess.graph.get_tensor_by_name( "reduced_conv2d_11/Conv2D:0") self.assertEqual(251, reduced_conv2d_12_input.shape.as_list()[-1]) self.assertEqual(250, reduced_conv2d_13_input.shape.as_list()[-1]) self.assertEqual(249, reduced_conv2d_15_input.shape.as_list()[-1]) self.assertEqual(248, reduced_conv2d_18_input.shape.as_list()[-1]) self.assertEqual(63, reduced_conv2d_5_output.shape.as_list()[-1]) self.assertEqual(63, reduced_conv2d_7_output.shape.as_list()[-1]) self.assertEqual(95, reduced_conv2d_10_output.shape.as_list()[-1]) self.assertEqual(31, reduced_conv2d_11_output.shape.as_list()[-1]) self.assertEqual(17, len(ordered_modules_list)) # run through entire model to check no error is produced _ = new_sess.run(model_output, feed_dict={model_input: inp_array}) new_sess.close() sess.close()
def test_reducing_resnet_50(self): """ Test module reduction in resnet_50 """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() module_zero_channels_list = [] _ = ResNet50(weights=None) init = tf.compat.v1.global_variables_initializer() sess.run(init) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2_block1_1_conv/Conv2D") input_channels_to_winnow_1 = [3, 5, 7] module_mask_pair = (tf_op, input_channels_to_winnow_1) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2_block1_0_conv/Conv2D") input_channels_to_winnow_2 = [3, 5, 7, 8] module_mask_pair = (tf_op, input_channels_to_winnow_2) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv3_block1_1_conv/Conv2D") input_channels_to_winnow_3 = [3, 5, 7] module_mask_pair = (tf_op, input_channels_to_winnow_3) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv3_block1_0_conv/Conv2D") input_channels_to_winnow_4 = [3, 5, 7, 8] module_mask_pair = (tf_op, input_channels_to_winnow_4) module_zero_channels_list.append(module_mask_pair) input_op_names = ["input_1"] output_op_names = ['probs/Softmax'] new_sess, ordered_modules_list = winnow.winnow_tf_model( sess, input_op_names, output_op_names, module_zero_channels_list, reshape=True, in_place=True, verbose=True) # Save and reload modified graph to allow changes to take effect # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during # winnow_tf_model, and all other newly winnowed ops are not. with new_sess.graph.as_default(): initialize_uninitialized_vars(new_sess) new_sess = save_and_load_graph('./saver', new_sess) # _ = tf.compat.v1.summary.FileWriter('./reduced_graph', new_sess.graph) with new_sess.graph.as_default(): inp = tf.random.uniform(shape=(1, 224, 224, 3)) inp_array = inp.eval(session=new_sess) model_input = new_sess.graph.get_tensor_by_name("input_1:0") model_output = new_sess.graph.get_tensor_by_name("probs/Softmax:0") # check that reduced tensor shapes are as expected reduced_conv3_block1_1_input = new_sess.graph.get_operation_by_name( "reduced_conv3_block1_1_conv/" "Conv2D").inputs[0] reduced_conv3_block1_0_input = new_sess.graph.get_operation_by_name( "reduced_conv3_block1_0_conv/" "Conv2D").inputs[0] reduced_conv2_block3_3_output = new_sess.graph.get_tensor_by_name( "reduced_conv2_block3_3_conv/" "Conv2D:0") reduced_conv2_block1_1_input = new_sess.graph.get_operation_by_name( "reduced_conv2_block1_1_conv/" "Conv2D").inputs[0] reduced_conv2_block1_0_input = new_sess.graph.get_operation_by_name( "reduced_conv2_block1_0_conv/" "Conv2D").inputs[0] reduced_conv1_output = new_sess.graph.get_tensor_by_name( "reduced_conv1_conv/Conv2D:0") self.assertEqual(253, reduced_conv3_block1_1_input.shape.as_list()[-1]) self.assertEqual(252, reduced_conv3_block1_0_input.shape.as_list()[-1]) self.assertEqual(253, reduced_conv2_block3_3_output.shape.as_list()[-1]) self.assertEqual(61, reduced_conv2_block1_1_input.shape.as_list()[-1]) self.assertEqual(60, reduced_conv2_block1_0_input.shape.as_list()[-1]) self.assertEqual(61, reduced_conv1_output.shape.as_list()[-1]) # run through entire model to check no error is produced _ = new_sess.run(model_output, feed_dict={model_input: inp_array}) self.assertEqual(11, len(ordered_modules_list)) new_sess.close() sess.close()