def conv(self, input_tensor, filter_tensor): q_input = array_ops.fake_quant_with_min_max_args( input_tensor, min=-0.1, max=0.2, num_bits=8, narrow_range=False) q_filters = array_ops.fake_quant_with_min_max_args( filter_tensor, min=-1.0, max=2.0, num_bits=8, narrow_range=False) bias = array_ops.constant([0, 0], dtype=dtypes.float32) out = nn_ops.conv2d(q_input, q_filters, strides=[1, 1, 2, 1], dilations=[1, 1, 1, 1], padding='SAME', data_format='NHWC') if has_bias: out = nn_ops.bias_add(out, bias, data_format='NHWC') if activation_fn is not None: out = activation_fn(out) q_out = array_ops.fake_quant_with_min_max_args( out, min=-0.3, max=0.4, num_bits=8, narrow_range=False) return {'output': q_out}
def add(self, x, y): float_res = math_ops.add(x, y) x = array_ops.fake_quant_with_min_max_args(x, min=-0.1, max=0.2, num_bits=8, narrow_range=False) y = array_ops.fake_quant_with_min_max_args(y, min=-0.3, max=0.4, num_bits=8, narrow_range=False) res = math_ops.add(x, y) res = array_ops.fake_quant_with_min_max_args( res, min=-0.4, max=0.6, num_bits=8, narrow_range=False) return {'output': res, 'float_output': float_res}
def quantize(graph, quantize_info): """Quantize the graph with quantize_info. Args: graph: Graph to be modified. quantize_info: Quantization info in dictionary format. Raises: ValueError: When quantization fails. """ for tensor_name, min_max in quantize_info.items(): tensor = graph.get_tensor_by_name(tensor_name) name = tensor_name.split(':')[0] consumers = tensor.consumers() quant = array_ops.fake_quant_with_min_max_args(tensor, min=min_max[0], max=min_max[1], name=name + '/fakequant') if consumers: modified_count = common.RerouteTensor(quant, tensor, can_modify=consumers) # Some operations can have multiple output tensors going to the same # consumer. Since consumers is a set, we need to ensure that # modified_count is greater than or equal to the length of the set # of consumers. if modified_count < len(consumers): raise ValueError( 'No inputs quantized for ops: [%s]' % ', '.join([consumer.name for consumer in consumers]))
def testQATFrozenGraphDefInt8(self): with ops.Graph().as_default(): in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') _ = array_ops.fake_quant_with_min_max_args( in_tensor_1 + in_tensor_2, min=0., max=1., name='output', num_bits=16 ) # INT8 inference type works for 16 bits fake quant. sess = session.Session() # Write graph to file. graph_def_file = self._getFilepath('model.pb') write_graph(sess.graph_def, '', graph_def_file, False) sess.close() flags_str = ('--inference_type=INT8 --std_dev_values=128,128 ' '--mean_values=128,128 ' '--graph_def_file={0} --input_arrays={1},{2} ' '--output_arrays={3}'.format(graph_def_file, 'inputA', 'inputB', 'output')) self._run(flags_str, should_succeed=True) os.remove(graph_def_file)
def testQATFrozenGraphDefUInt8(self): with ops.Graph().as_default(): in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') _ = array_ops.fake_quant_with_min_max_args(in_tensor_1 + in_tensor_2, min=0., max=1., name='output') sess = session.Session() # Write graph to file. graph_def_file = self._getFilepath('model.pb') write_graph(sess.graph_def, '', graph_def_file, False) sess.close() # Define converter flags flags_str = ('--std_dev_values=128,128 --mean_values=128,128 ' '--graph_def_file={0} --input_arrays={1} ' '--output_arrays={2}'.format(graph_def_file, 'inputA,inputB', 'output')) # Set inference_type UINT8 and (default) inference_input_type UINT8 flags_str_1 = flags_str + ' --inference_type=UINT8' self._run(flags_str_1, should_succeed=True) # Set inference_type UINT8 and inference_input_type FLOAT flags_str_2 = flags_str_1 + ' --inference_input_type=FLOAT' self._run(flags_str_2, should_succeed=True) os.remove(graph_def_file)
def testQuantizationInvalid(self): in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor_1 + in_tensor_2, min=0., max=1., name='output') sess = session.Session() # Convert model and ensure model is not None. converter = lite.TocoConverter.from_session(sess, [in_tensor_1, in_tensor_2], [out_tensor]) converter.inference_type = lite_constants.QUANTIZED_UINT8 converter.quantized_input_stats = {'inputA': (0., 1.)} # mean, std_dev with self.assertRaises(ValueError) as error: converter.convert() self.assertEqual( 'Quantization input stats are not available for input tensors ' '\'inputB\'.', str(error.exception))
def testQuantizationInvalid(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32) out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor, min=0., max=1.) sess = session.Session() with self.assertRaises(ValueError) as error: convert.toco_convert(sess.graph_def, [in_tensor], [out_tensor], inference_type=lite_constants.QUANTIZED_UINT8) self.assertEqual( "std_dev and mean must be defined when inference_type or " "inference_input_type is QUANTIZED_UINT8 or INT8.", str(error.exception)) with self.assertRaises(ValueError) as error: convert.toco_convert(sess.graph_def, [in_tensor], [out_tensor], inference_type=lite_constants.QUANTIZED_UINT8, inference_input_type=lite_constants.FLOAT) self.assertEqual( "std_dev and mean must be defined when inference_type or " "inference_input_type is QUANTIZED_UINT8 or INT8.", str(error.exception))
def testGraphDefQuantizationInvalid(self): in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputA") in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputB") _ = array_ops.fake_quant_with_min_max_args(in_tensor_1 + in_tensor_2, min=0., max=1., name="output") sess = session.Session() input_arrays_map = [("inputA", [1, 16, 16, 3]), ("inputB", [1, 16, 16, 3])] output_arrays = ["output"] with self.assertRaises(ValueError) as error: convert.toco_convert_graph_def( sess.graph_def, input_arrays_map, output_arrays, inference_type=lite_constants.QUANTIZED_UINT8) self.assertEqual( "std_dev and mean must be defined when inference_input_type is " "QUANTIZED_UINT8.", str(error.exception))
def testQuantization(self): in_tensor = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32, name='input') out_tensor = array_ops.fake_quant_with_min_max_args( in_tensor + in_tensor, min=0., max=1., name='output') sess = session.Session() # Convert model and ensure model is not None. converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) converter.inference_type = lite_constants.QUANTIZED_UINT8 converter.quantized_input_stats = [(0., 1.)] # mean, std_dev tflite_model = converter.convert() self.assertTrue(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(1, len(input_details)) self.assertEqual('input', input_details[0]['name']) self.assertEqual(np.uint8, input_details[0]['dtype']) self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) self.assertEqual((1., 0.), input_details[0]['quantization']) # scale, zero_point output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual('output', output_details[0]['name']) self.assertEqual(np.uint8, output_details[0]['dtype']) self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) self.assertTrue(output_details[0]['quantization'][0] > 0) # scale
def testGraphDefQuantizationInvalid(self): with ops.Graph().as_default(): in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputA") in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputB") _ = array_ops.fake_quant_with_min_max_args(in_tensor_1 + in_tensor_2, min=0., max=1., name="output") sess = session.Session() input_arrays_map = [("inputA", [1, 16, 16, 3]), ("inputB", [1, 16, 16, 3])] output_arrays = ["output"] with self.assertRaises(ValueError) as error: convert.toco_convert_graph_def(sess.graph_def, input_arrays_map, output_arrays, enable_mlir_converter=False, inference_type=dtypes.uint8) self.assertEqual( "The `quantized_input_stats` flag must be defined when either " "`inference_type` flag or `inference_input_type` flag is set to " "tf.int8 or tf.uint8.", str(error.exception))
def testQuantization(self): in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32) out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor, min=0., max=1.) sess = session.Session() result = lite.toco_convert(sess.graph_def, [in_tensor], [out_tensor], inference_type=lite.QUANTIZED_UINT8, quantized_input_stats=[(0., 1.)]) self.assertTrue(result)
def testQuantization(self): in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32) out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor, min=0., max=1.) sess = session.Session() tflite_model = convert.toco_convert( sess.graph_def, [in_tensor], [out_tensor], inference_type=lite_constants.QUANTIZED_UINT8, quantized_input_stats=[(0., 1.)]) self.assertTrue(tflite_model)
def testGraphDefQuantization(self): with ops.Graph().as_default(): in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputA") in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputB") _ = array_ops.fake_quant_with_min_max_args(in_tensor_1 + in_tensor_2, min=0., max=1., name="output") sess = session.Session() input_arrays_map = [("inputA", [1, 16, 16, 3]), ("inputB", [1, 16, 16, 3])] output_arrays = ["output"] tflite_model = convert.toco_convert_graph_def( sess.graph_def, input_arrays_map, output_arrays, enable_mlir_converter=False, control_output_arrays=None, inference_type=dtypes.uint8, quantized_input_stats=[(0., 1.), (0., 1.)]) self.assertTrue(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(2, len(input_details)) self.assertEqual("inputA", input_details[0]["name"]) self.assertEqual(np.uint8, input_details[0]["dtype"]) self.assertTrue(([1, 16, 16, 3] == input_details[0]["shape"]).all()) self.assertEqual((1., 0.), input_details[0]["quantization"]) # scale, zero_point self.assertEqual("inputB", input_details[1]["name"]) self.assertEqual(np.uint8, input_details[1]["dtype"]) self.assertTrue(([1, 16, 16, 3] == input_details[1]["shape"]).all()) self.assertEqual((1., 0.), input_details[1]["quantization"]) # scale, zero_point output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual("output", output_details[0]["name"]) self.assertEqual(np.uint8, output_details[0]["dtype"]) self.assertTrue(([1, 16, 16, 3] == output_details[0]["shape"]).all()) self.assertGreater(output_details[0]["quantization"][0], 0) # scale
def testQuantization(self): in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor_1 + in_tensor_2, min=0., max=1., name='output') sess = session.Session() # Convert model and ensure model is not None. converter = lite.TFLiteConverter.from_session( sess, [in_tensor_1, in_tensor_2], [out_tensor]) converter.experimental_enable_mlir_converter = True converter.inference_type = lite_constants.QUANTIZED_UINT8 converter.quantized_input_stats = { 'inputA': (0., 1.), 'inputB': (0., 1.) } # mean, std_dev tflite_model = mlir_convert_and_check_for_unsupported(self, converter) if tflite_model is None: return # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(2, len(input_details)) self.assertEqual('inputA', input_details[0]['name']) self.assertEqual(np.uint8, input_details[0]['dtype']) self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) self.assertEqual((1., 0.), input_details[0]['quantization']) # scale, zero_point self.assertEqual('inputB', input_details[1]['name']) self.assertEqual(np.uint8, input_details[1]['dtype']) self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) self.assertEqual((1., 0.), input_details[1]['quantization']) # scale, zero_point output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.uint8, output_details[0]['dtype']) self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) self.assertGreater(output_details[0]['quantization'][0], 0) # scale
def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None): """Adds a fake quantize layer with fixed quantization interval. Args: inputs: a tensor containing values to be quantized. init_min: the lower end of quantization interval. init_max: the upper end of quantization interval. scope: Optional scope for name_scope. Returns: a tensor containing quantized values. """ with ops.name_scope(scope, 'FixedQuantize', values=[inputs]): return array_ops.fake_quant_with_min_max_args( inputs, min=init_min, max=init_max)
def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None): """Adds a fake quantize layer with fixed quantization interval. Args: inputs: a tensor containing values to be quantized. init_min: the lower end of quantization interval. init_max: the upper end of quantization interval. scope: Optional scope for name_scope. Returns: a tensor containing quantized values. """ with ops.name_scope(scope, 'FixedQuantize', values=[inputs]): return array_ops.fake_quant_with_min_max_args( inputs, min=init_min, max=init_max)
def testQuantizationInvalid(self): in_tensor = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32) out_tensor = array_ops.fake_quant_with_min_max_args( in_tensor + in_tensor, min=0., max=1.) sess = session.Session() with self.assertRaises(ValueError) as error: convert.toco_convert( sess.graph_def, [in_tensor], [out_tensor], inference_type=lite_constants.QUANTIZED_UINT8) self.assertEqual( "std_dev and mean must be defined when inference_input_type is " "QUANTIZED_UINT8.", str(error.exception))
def testQuantization(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32) out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor, min=0., max=1.) sess = session.Session() tflite_model = convert.convert_graphdef(sess.graph_def, input_tensors=[in_tensor], output_tensors=[out_tensor], inference_type=dtypes.uint8, quantized_input_stats=[(0., 1.) ]) self.assertTrue(tflite_model)
def testGraphDefQuantization(self): in_tensor_1 = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputA") in_tensor_2 = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputB") _ = array_ops.fake_quant_with_min_max_args( in_tensor_1 + in_tensor_2, min=0., max=1., name="output") sess = session.Session() input_arrays_map = [("inputA", [1, 16, 16, 3]), ("inputB", [1, 16, 16, 3])] output_arrays = ["output"] tflite_model = convert.toco_convert_graph_def( sess.graph_def, input_arrays_map, output_arrays, inference_type=lite_constants.QUANTIZED_UINT8, quantized_input_stats=[(0., 1.), (0., 1.)]) self.assertTrue(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(2, len(input_details)) self.assertEqual("inputA", input_details[0]["name"]) self.assertEqual(np.uint8, input_details[0]["dtype"]) self.assertTrue(([1, 16, 16, 3] == input_details[0]["shape"]).all()) self.assertEqual((1., 0.), input_details[0]["quantization"]) # scale, zero_point self.assertEqual("inputB", input_details[1]["name"]) self.assertEqual(np.uint8, input_details[1]["dtype"]) self.assertTrue(([1, 16, 16, 3] == input_details[1]["shape"]).all()) self.assertEqual((1., 0.), input_details[1]["quantization"]) # scale, zero_point output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual("output", output_details[0]["name"]) self.assertEqual(np.uint8, output_details[0]["dtype"]) self.assertTrue(([1, 16, 16, 3] == output_details[0]["shape"]).all()) self.assertTrue(output_details[0]["quantization"][0] > 0) # scale
def testQuantizationInvalid(self): in_tensor_1 = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') in_tensor_2 = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') out_tensor = array_ops.fake_quant_with_min_max_args( in_tensor_1 + in_tensor_2, min=0., max=1., name='output') sess = session.Session() # Convert model and ensure model is not None. converter = lite.TFLiteConverter.from_session( sess, [in_tensor_1, in_tensor_2], [out_tensor]) converter.inference_type = lite_constants.QUANTIZED_UINT8 converter.quantized_input_stats = {'inputA': (0., 1.)} # mean, std_dev with self.assertRaises(ValueError) as error: converter.convert() self.assertEqual( 'Quantization input stats are not available for input tensors ' '\'inputB\'.', str(error.exception))
def _TestOp(self, input_min, input_max, num_bits, narrow_range, expected_nudged_input_min, expected_nudged_input_max, expected_step): inputs = np.array([ expected_nudged_input_min - expected_step, expected_nudged_input_min - 0.01, expected_nudged_input_min, expected_nudged_input_min + 0.01, expected_nudged_input_min + expected_step - 0.01, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step + 0.01, expected_nudged_input_max - 0.01, expected_nudged_input_max, expected_nudged_input_max + 0.01, expected_nudged_input_max + expected_step ], dtype=np.float32) expected = np.array([ expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step, expected_nudged_input_max, expected_nudged_input_max, expected_nudged_input_max, expected_nudged_input_max ], dtype=np.float32) with self.test_session() as session: with self.test_scope(): input_placeholder = array_ops.placeholder(dtypes.float32, inputs.shape, name="inputs") outputs = array_ops.fake_quant_with_min_max_args( input_placeholder, min=input_min, max=input_max, num_bits=num_bits, narrow_range=narrow_range) result = session.run(outputs, {input_placeholder: inputs}) self.assertAllCloseAccordingToType(result, expected, rtol=1e-3, atol=1e-5, bfloat16_rtol=0.03)
def testGraphDefQuantizationInvalid(self): in_tensor_1 = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputA") in_tensor_2 = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputB") _ = array_ops.fake_quant_with_min_max_args( in_tensor_1 + in_tensor_2, min=0., max=1., name="output") sess = session.Session() input_arrays_map = [("inputA", [1, 16, 16, 3]), ("inputB", [1, 16, 16, 3])] output_arrays = ["output"] with self.assertRaises(ValueError) as error: convert.toco_convert_graph_def( sess.graph_def, input_arrays_map, output_arrays, inference_type=lite_constants.QUANTIZED_UINT8) self.assertEqual( "std_dev and mean must be defined when inference_input_type is " "QUANTIZED_UINT8.", str(error.exception))
def _TestOp(self, input_min, input_max, num_bits, narrow_range, expected_nudged_input_min, expected_nudged_input_max, expected_step): inputs = np.array( [ expected_nudged_input_min - expected_step, expected_nudged_input_min - 0.01, expected_nudged_input_min, expected_nudged_input_min + 0.01, expected_nudged_input_min + expected_step - 0.01, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step + 0.01, expected_nudged_input_max - 0.01, expected_nudged_input_max, expected_nudged_input_max + 0.01, expected_nudged_input_max + expected_step ], dtype=np.float32) expected = np.array( [ expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step, expected_nudged_input_max, expected_nudged_input_max, expected_nudged_input_max, expected_nudged_input_max ], dtype=np.float32) with self.cached_session() as session: with self.test_scope(): input_placeholder = array_ops.placeholder( dtypes.float32, inputs.shape, name="inputs") outputs = array_ops.fake_quant_with_min_max_args( input_placeholder, min=input_min, max=input_max, num_bits=num_bits, narrow_range=narrow_range) result = session.run(outputs, {input_placeholder: inputs}) self.assertAllCloseAccordingToType( result, expected, rtol=1e-3, atol=1e-5, bfloat16_rtol=0.03)
def testQuantization(self): in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.float32, name='input') out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor, min=0., max=1., name='output') sess = session.Session() # Convert model and ensure model is not None. converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) converter.inference_type = lite_constants.QUANTIZED_UINT8 converter.quantized_input_stats = [(0., 1.)] # mean, std_dev tflite_model = converter.convert() self.assertTrue(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(1, len(input_details)) self.assertEqual('input', input_details[0]['name']) self.assertEqual(np.uint8, input_details[0]['dtype']) self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) self.assertEqual((1., 0.), input_details[0]['quantization']) # scale, zero_point output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual('output', output_details[0]['name']) self.assertEqual(np.uint8, output_details[0]['dtype']) self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) self.assertTrue(output_details[0]['quantization'][0] > 0) # scale